C#: Remove Html From a String, Optionally Leave Line Breaks In

2/8/2015 10:52:50 PM

This code will remove html from a string. It gives you the option to leave the line breaks it.

public static string RemoveHtml(string html, bool keepLineBreaks = false)
{            
	if (string.IsNullOrEmpty(html))
	{
		return html;
	}

	System.Text.RegularExpressions.Regex regHtml = new System.Text.RegularExpressions.Regex("<[^>]*>");

	if (keepLineBreaks)
	{
		//replace p's
		html = html.Replace("<p>", "");
		html = html.Replace("</p>", "<br />");
		html = html.Replace("<br>", "<br />");
		html = html.Replace("<br/>", "<br />");
		html = html.Replace("<br />", Environment.NewLine);
	}

	//clean html
	html = regHtml.Replace(html, "");

	if (keepLineBreaks)
	{
		//add back breaks
		html = html.Replace(Environment.NewLine, "<br />");
	}

	return html;
}