Click here to Skip to main content
15,896,201 members
Articles / Web Development / HTML

MailMergeLib - A Mail Client Library for .NET

Rate me:
Please Sign up or sign in to vote.
4.95/5 (131 votes)
5 Nov 2017MIT9 min read 843.5K   10.2K   542  
MailMergeLib is an SMTP template mail client library written in C# which provides comfortable mail merge capabilities and SMTP fail-over features. If works on .NET Framework and .NET Core.
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using HtmlAgilityPack;

namespace MailMergeLib
{
	/// <summary>
	/// Convert HTML to plain text using the HtmlAgilityPack library.
	/// Processing is not too fast, but as it must be done only once
	/// per mail merge job and email texts are usually small, it's okay.
	/// </summary>
	/// <remarks>
	/// Should eventually be improved ;-)
	/// See also http://daringfireball.net/projects/markdown/">Inspired by Markdown
	/// </remarks>
	public class ParsingHtmlConverter : IHtmlConverter
	{
		private const string CrLf = "\r\n";
		private const string CrLfCrLf = "\r\n\r\n";

		#region IHtmlConverter Members

		/// <summary>
		/// Convert a text file with HTML content to plain text.
		/// </summary>
		/// <param name="html">The HTML string to convert.</param>
		/// <returns>The plain text representation of the HTML content.</returns>
		public string ToPlainText(string html)
		{
			var doc = new HtmlDocument();
			doc.LoadHtml(html);
			return ToPlainText(doc);
		}

		#endregion

		private string ToPlainText(HtmlDocument doc)
		{
			var sw = new StringWriter();
			ConvertToText(doc.DocumentNode, sw);
			sw.Flush();
			return sw.ToString();
		}

		private void ConvertContentToText(HtmlNode node, TextWriter outText)
		{
			foreach (HtmlNode subnode in node.ChildNodes)
			{
				ConvertToText(subnode, outText);
			}
		}

		private void ConvertToText(HtmlNode node, TextWriter outText)
		{
			string html;
			switch (node.NodeType)
			{
				case HtmlNodeType.Comment:
					// don't output comments
					break;


				case HtmlNodeType.Document:
					ConvertContentToText(node, outText);
					break;


				case HtmlNodeType.Text:
					string parentName = node.ParentNode.Name;

					// script, style and title text is ignored
					if ((parentName == "script") || (parentName == "style") || parentName == "head" || parentName == "title")
						break;

					html = ((HtmlTextNode) node).Text;

					if (parentName != "pre")
					{
						// get text with all characters remodes which are not visible in html
						html = html.Replace("\t", string.Empty).Replace("\r", string.Empty).Replace("\n", string.Empty);
						var regEx =
							new Regex(@"\s+", RegexOptions.Compiled);
						html = regEx.Replace(html, " ");
					}

					// gracefully handle overlapping closing elements
					if (HtmlNode.IsOverlappedClosingElement(html))
						break;

					html = HtmlEntity.DeEntitize(html);
					if (html.Length > 0)
					{
						outText.Write(html);
					}
					break;


				case HtmlNodeType.Element:
					var toWrite = new List<string>();

					switch (node.Name)
					{
						case "p":
							outText.Write(CrLfCrLf);
							break;
						case "br":
						case "td":
						case "ul":
						case "ol":
							outText.Write(CrLf);
							break;
						case "img":
							// images
							toWrite.Clear();
							if (node.Attributes["src"] != null && node.Attributes["src"].Value.Trim() != string.Empty)
								toWrite.Add("[" + HtmlEntity.DeEntitize(node.Attributes["src"].Value + "]"));
							if (node.Attributes["alt"] != null && node.Attributes["alt"].Value.Trim() != string.Empty)
								toWrite.Add("[" + HtmlEntity.DeEntitize(node.Attributes["alt"].Value + "]"));
							if (node.Attributes["title"] != null && node.Attributes["title"].Value.Trim() != string.Empty)
								toWrite.Add("(\"" + HtmlEntity.DeEntitize(node.Attributes["title"].Value + "\")"));
							outText.Write("[" + string.Join(" ", toWrite.ToArray()) + "] ");
							break;
						case "a":
							// links
							toWrite.Clear();
							if (node.Attributes["href"] != null && node.Attributes["href"].Value.Trim() != string.Empty)
								toWrite.Add("[" + HtmlEntity.DeEntitize(node.Attributes["href"].Value + "]"));
							if (node.Attributes["title"] != null && node.Attributes["title"].Value.Trim() != string.Empty)
								toWrite.Add("(\"" + HtmlEntity.DeEntitize(node.Attributes["title"].Value + "\")"));
							outText.Write(string.Join(" ", toWrite.ToArray()) + " ");
							break;
						case "hr":
							outText.Write("{0}----------{0}", CrLf);
							break;
						case "b":
						case "strong":
							node.InnerHtml = "**" + node.InnerHtml + "**";
							break;
						case "i":
						case "u":
						case "em":
							node.InnerHtml = "_" + node.InnerHtml + "_";
							break;
						case "li":
							node.InnerHtml = "* " + node.InnerHtml + "<br />";
							break;
						case "h1":
						case "h2":
						case "h3":
						case "h4":
						case "h5":
						case "h6":
							// headlines
							outText.Write(CrLfCrLf);
							node.InnerHtml = "#######".Substring(0, 7 - int.Parse(node.Name.Substring(1))) + " " + node.InnerHtml + "<br />";
							break;
					}

					if (node.HasChildNodes)
					{
						ConvertContentToText(node, outText);
					}
					break;
			}
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions