Click here to Skip to main content
15,881,882 members
Articles / Programming Languages / C#

How to Create a Spam Filter or Automatic Category Sort Algorithm with Your Mail Application

Rate me:
Please Sign up or sign in to vote.
5.00/5 (9 votes)
29 Jul 2012MIT3 min read 40K   1.2K   19  
This article describes automatic category filters in mail applications.
In this article, you will learn how to create a spam filter on your mail application. You will also see how to filter your mail based on whether the mail tells about a particular topic or not.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using HigLabo.Net;
using HigLabo.Net.Mail;
using HigLabo.Net.Pop3;
using HtmlAgilityPack;
using System.Web;

namespace SpamFilterSample
{
    class Program
    {
        private static String UserName = "";
        private static String Password = "";
        static void Main(string[] args)
        {
            CreateSportDataFromBbc();
        }
        private static void FilterMail()
        {
            MailMessage mg = null;
            List<MailMessage> spamList = new List<MailMessage>();

            MailSpamFilter f = new MailSpamFilter();
            f.LoadData();

            using (Pop3Client cl = new Pop3Client("pop.gmail.com", 995, UserName, Password))
            {
                cl.Ssl = true;
                cl.AuthenticateMode = Pop3AuthenticateMode.Auto;
                var bl = cl.Authenticate();
                if (bl == true)
                {
                    var l = cl.ExecuteList();
                    for (int i = 0; i < l.Count && i < 100; i++)
                    {
                        mg = cl.GetMessage(l[i].MailIndex);
                        if (f.Test(mg.BodyText) == MailType.Spam)
                        {
                            spamList.Add(mg);
                        }
                    }
                }
            }

            Console.WriteLine("Show spam subject list");
            Console.WriteLine("-----------------------------------");
            for (int i = 0; i < spamList.Count; i++)
            {
                Console.WriteLine(spamList[i].Subject);
            }
            Console.WriteLine("-----------------------------------");
            Console.WriteLine("Exit press key");
            Console.ReadLine();
        }
        private static void CreateSpamDataFromMailbox()
        {
            Console.WriteLine("Update spam data?press y");
            if (Console.ReadLine() != "y") { return; }

            MailMessage mg = null;
            List<MailMessage> spamList = new List<MailMessage>();
            StringBuilder sb = new StringBuilder(1024 * 32);

            using (Pop3Client cl = new Pop3Client("pop.gmail.com", 995, "", ""))
            {
                cl.Ssl = true;
                cl.AuthenticateMode = Pop3AuthenticateMode.Auto;
                var bl = cl.Authenticate();
                if (bl == true)
                {
                    var l = cl.ExecuteList();
                    for (int i = 0; i < l.Count && i < 100; i++)
                    {
                        mg = cl.GetMessage(l[i].MailIndex);
                        sb.AppendLine(mg.BodyText);
                        sb.AppendLine();
                    }
                }
            }

            String fileName = "Spam.txt";
            if (File.Exists(fileName) == true)
            {
                File.Delete(fileName);
            }
            File.WriteAllText(fileName, sb.ToString());
        }
        private static void CreateSportDataFromBbc()
        {
            Console.WriteLine("Update SPORT data?press y");
            if (Console.ReadLine() != "y") { return; }
            
            HttpClient cl = new HttpClient();
            HttpRequestCommand cm = new HttpRequestCommand("http://www.bbc.co.uk/sport/0/");
            cm.MethodName = HttpMethodName.Get;
            String htmlText = cl.GetBodyText(cm);

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlText);

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(@"//div[@class=""type-a-headline-list-1""]//li//a[@data-published]");

            List<String> urlList = new List<string>();
            foreach (HtmlNode node in nodes)
            {
                urlList.Add(node.Attributes["href"].Value);
            }
            StringBuilder sb = new StringBuilder(1024 * 32);
            for (int i = 0; i < urlList.Count; i++)
            {
                sb.AppendLine(GetArticleText("http://www.bbc.co.uk" + urlList[i]));
            }

            String fileName = "Sport.txt";
            if (File.Exists(fileName) == true)
            {
                File.Delete(fileName);
            }
            File.WriteAllText(fileName, sb.ToString());
            Console.WriteLine("Sport.txt is created at " + Environment.CurrentDirectory);
            Console.ReadLine();
        }
        private static String GetArticleText(String url)
        {
            StringBuilder sb = new StringBuilder(8096);
            HttpClient cl = new HttpClient();
            HttpRequestCommand cm = new HttpRequestCommand(url);
            cm.MethodName = HttpMethodName.Get;
            String htmlText = cl.GetBodyText(cm);

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlText);

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(@"//div[@class=""story-body""]//div[@class=""article""]//p");

            if (nodes != null)
            {
                foreach (HtmlNode node in nodes)
                {
                    sb.AppendLine(HttpUtility.HtmlDecode(node.InnerText));
                }
            }
            return sb.ToString();
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
CEO TinyBetter, Inc
Japan Japan
I'm a CEO of TinyBetter, Inc in Japan.

Comments and Discussions