Click here to Skip to main content
15,881,172 members
Articles / Programming Languages / C#

An (almost) RFC 822/2822 Compliant Date Parser

Rate me:
Please Sign up or sign in to vote.
3.56/5 (3 votes)
23 Jun 2006CPOL 39.7K   20   2
Parse an e-mail's DateTime

Introduction

In .NET Framework 1.1, there's a DateTime object with a parse(exact) method. Unfortunately, you can't use it to get a DateTime from an email.

The RFC2822DateTime class can be used in this case.

RFC 822/2822

You can find here how date's defined in RFC 822 (the first RFC for "electronic mail" ) and RFC2822 (which supersedes RFC822).

The Code

You'll need System.Text.RegularExpressions and System.DateTime to make it work.

C#
public class RFC2822DateTime 
/*
 * Copyright (c)  vendredi13@007.freesurf.fr
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
{
    static public System.DateTime fromString(string adate) 
    {
        string tmp;
        string [] resp;
        string dayName;
        string dpart;
        string hour, minute;
        string timeZone;
        System.DateTime dt=System.DateTime.Now ;
        //--- strip comments
        //--- XXX : FIXME : how to handle nested comments ?
        tmp = Regex.Replace(adate, "(\\([^(].*\\))", "");

        // strip extra white spaces
        tmp = Regex.Replace(tmp, "\\s+", " ");
        tmp = Regex.Replace(tmp, "^\\s+","");
        tmp = Regex.Replace(tmp, "\\s+$","");

        // extract week name part
        resp = tmp.Split(new char[] {','},2);
        if (resp.Length==2) 
        {
            // there's week name
            dayName = resp[0];
            tmp = resp[1];
        } else dayName = "";

        try 
        {
            // extract date and time
            int pos = tmp.LastIndexOf(" ");
            if (pos < 1) throw new FormatException("probably not a date");
            dpart = tmp.Substring(0,pos-1);
            timeZone = tmp.Substring(pos+1);
            dt = Convert.ToDateTime(dpart);

            // check weekDay name
            // this must be done before convert to GMT 
            if (dayName!=string.Empty) 
            {
                if ((dt.DayOfWeek==DayOfWeek.Friday && dayName!="Fri") ||
                    (dt.DayOfWeek==DayOfWeek.Monday && dayName!="Mon") ||
                    (dt.DayOfWeek==DayOfWeek.Saturday && dayName!="Sat") ||
                    (dt.DayOfWeek==DayOfWeek.Sunday && dayName!="Sun") ||
                    (dt.DayOfWeek==DayOfWeek.Thursday && dayName!="Thu") ||
                    (dt.DayOfWeek==DayOfWeek.Tuesday&& dayName!="Tue") ||
                    (dt.DayOfWeek==DayOfWeek.Wednesday && dayName!="Wed")
                    )
                    throw new FormatException("invalid week of day");
            }

            // adjust to localtime
            if (Regex.IsMatch(timeZone, "[+\\-][0-9][0-9][0-9][0-9]")) 
            {
                // it's a modern ANSI style timezone
                int factor = 0;
                hour = timeZone.Substring(1,2);
                minute = timeZone.Substring(3,2);
                if (timeZone.Substring(0,1)=="+") factor = 1;
                else if (timeZone.Substring(0,1)=="-") factor = -1;
                else throw new FormatException("incorrect time zone");
                dt = dt.AddHours(factor * Convert.ToInt32(hour));
                dt = dt.AddMinutes(factor * Convert.ToInt32(minute));
            } else {
                // it's a old style military time zone ?
                switch (timeZone) 
                {
                    case "A" : dt = dt.AddHours(1); break;
                    case "B" : dt = dt.AddHours(2); break;
                    case "C" : dt = dt.AddHours(3); break;
                    case "D" : dt = dt.AddHours(4); break;
                    case "E" : dt = dt.AddHours(5); break;
                    case "F" : dt = dt.AddHours(6); break;
                    case "G" : dt = dt.AddHours(7); break;
                    case "H" : dt = dt.AddHours(8); break;
                    case "I" : dt = dt.AddHours(9); break;
                    case "K" : dt = dt.AddHours(10); break;
                    case "L" : dt = dt.AddHours(11); break;
                    case "M" : dt = dt.AddHours(12); break;
                    case "N" : dt = dt.AddHours(-1); break;
                    case "O" : dt = dt.AddHours(-2); break;
                    case "P" : dt = dt.AddHours(-3); break;
                    case "Q" : dt = dt.AddHours(-4); break;
                    case "R" : dt = dt.AddHours(-5); break;
                    case "S" : dt = dt.AddHours(-6); break;
                    case "T" : dt = dt.AddHours(-7); break;
                    case "U" : dt = dt.AddHours(-8); break;
                    case "V" : dt = dt.AddHours(-9); break;
                    case "W" : dt = dt.AddHours(-10); break;
                    case "X" : dt = dt.AddHours(-11); break;
                    case "Y" : dt = dt.AddHours(-12); break;
                    case "Z" :  
                    case "UT" : 
                    case "GMT" :  break;    // It's UTC
                    case "EST" : dt = dt.AddHours(5); break;
                    case "EDT" : dt = dt.AddHours(4); break;
                    case "CST" : dt = dt.AddHours(6); break;
                    case "CDT" : dt = dt.AddHours(5); break;
                    case "MST" : dt = dt.AddHours(7); break;
                    case "MDT" : dt = dt.AddHours(6); break;
                    case "PST" : dt = dt.AddHours(8); break;
                    case "PDT" : dt = dt.AddHours(7); break;
                    default: throw new FormatException("invalid time zone");
                }
            }
        } 
        catch (Exception e) 
        {
            throw new FormatException(string.Format("Invalid date:{0}:{1}", 
		e.Message, adate));
        }
        return dt;
    }
}

Bug and Limitation

This code is tested with a 300000 mails box, and no error was seen.

As specified in the code, nested comments like (comment (comment inside) outside) are left as a reader's exercise.

This code is not designed for speed, but it should consume reasonable resources.

History

  • 23rd June, 2006: Initial post 

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
France France
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions

 
GeneralTwo bugs, with corrections supplied Pin
Ross Presser11-May-07 11:51
Ross Presser11-May-07 11:51 
GeneralGreat! Pin
Christian Salvaneschi25-Sep-06 10:10
Christian Salvaneschi25-Sep-06 10:10 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.