Click here to Skip to main content
15,891,513 members
Articles / Programming Languages / C#

Using Regular Expression for Parsing

Rate me:
Please Sign up or sign in to vote.
2.85/5 (4 votes)
16 Mar 2007CPOL 36.6K   264   21  
Derived currently supports RTF and HTML format
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Runtime.InteropServices;
using System.Text.RegularExpressions;

namespace SyntaxEngineClass
{
    /// <summary>
    /// �﷨�����������
    /// ����:2007-03-17
    /// ���:ZswangY37
    /// ֧��:wjhu111#21cn.com
    /// ��лCSDN�е�kissknife��chinnel�ṩ�Ľ����
    /// http://community.csdn.net/Expert/TopicView.asp?id=5400165
    /// </summary>
    class SyntaxEngine
    {
        /// <summary>
        /// �﷨��
        /// </summary>
        public class SyntaxItem
        {
            private string FPattern; // ������ʽ
            private RegexOptions FOptions; // ������ʽ����ѡ��
            private string FName; // �﷨����
            private int FIndex; // ���

            public string Pattern { get { return FPattern; } } // ������ʽ
            public RegexOptions Options { get { return FOptions; } } // ������ʽ����ѡ��
            public string Name { get { return FName; } } // ����
            public int Index { get { return FIndex; } } // ���

            public SyntaxItem(string APattern, RegexOptions AOptions,
                string AName, int AIndex)
            {
                FPattern = APattern;
                FOptions = AOptions;
                FName = AName;
                FIndex = AIndex;
            }
        }
        /// <summary>
        /// �﷨����������
        /// </summary>
        public class AnalyzeReslut
        {
            private SyntaxItem FItem; // �����﷨��
            private string FBlock; // ���ֿ�

            public SyntaxItem Item { get { return FItem; } }
            public string Block { get { return FBlock; } }

            public AnalyzeReslut(SyntaxItem AItem, string ABlock)
            {
                FItem = AItem;
                FBlock = ABlock;
            }
        }

        private List<SyntaxItem> FSyntaxItems = new List<SyntaxItem>();
        private List<AnalyzeReslut> FAnalyzeResluts = new List<AnalyzeReslut>();

        public List<SyntaxItem> SyntaxItems { get { return FSyntaxItems; } }
        public List<AnalyzeReslut> AnalyzeResluts { get { return FAnalyzeResluts; } }


        /// <summary>
        /// �����﷨����
        /// </summary>
        /// <param name="ACode">�������Ĵ���</param>
        /// <returns>���ط����Ƿ�ɹ�</returns>
        public virtual bool Analyze(string ACode)
        {
            if (FSyntaxItems.Count <= 0) return false;
            if (ACode == null) return false;
            AnalyzeResluts.Clear();
            string vCode = ACode;
            bool vFind = true;
            while (vFind && (vCode.Length > 0))
            {
                vFind = false;
                foreach (SyntaxItem vSyntaxItem in FSyntaxItems)
                {
                    if (Regex.IsMatch(vCode, vSyntaxItem.Pattern, vSyntaxItem.Options))
                    {
                        AnalyzeResluts.Add(new AnalyzeReslut(vSyntaxItem,
                            Regex.Match(vCode, vSyntaxItem.Pattern,
                            vSyntaxItem.Options).Value));
                        vCode = Regex.Replace(vCode, vSyntaxItem.Pattern, "",
                            vSyntaxItem.Options);
                        vFind = true;
                        break;
                    }
                }
            }
            return true;
        }
    }

    /// <summary>
    /// C#�﷨��������
    /// </summary>
    class CSharpEngine : SyntaxEngine
    {
        public CSharpEngine()
        {
            SyntaxItems.Add(new SyntaxItem(@"^\s+", RegexOptions.None,
                "Whitespace", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^\/\/[^\n]*[\n]?", RegexOptions.None,
                "LineComment", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^\/\*.*?\*\/", RegexOptions.None,
                "MultiComment", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^#\s*(define|elif|else|endif|endregion|" +
                @"error|if|line|pragma|region|undef|using|warning)\b[^\n]*[\n]?",
                RegexOptions.None, "ָ��", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^(abstract|event|new|struct|as|explicit|" +
                @"null|switch|base|extern|object|this|bool|false|operator|throw|break|" +
                @"finally|out|true|byte|fixed|override|try|case|float|params|typeof|" +
                @"catch|for|private|uint|char|foreach|protected|ulong|checked|goto|" +
                @"public|unchecked|class|if|readonly|unsafe|const|implicit|ref|ushort|" +
                @"continue|in|return|using|decimal|int|sbyte|virtual|default|interface|" +
                @"sealed|volatile|delegate|internal|short|void|do|is|sizeof|while|" +
                @"double|lock|stackalloc|else|long|static|enum|namespace|string)\b",
                RegexOptions.None, "Reserved", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^(get|partial|set|value|where|yield)\b",
                RegexOptions.None, "Reserved", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^(\+\=|\-\=|\&\&|\|\||\/\=|\&\=|\%\=|\~|\!|\+\+|\-\-|" +
                @"\#|\$|\%|\^|\&|\*|\(|\)|\+|\-|\=|\{|\}|\[|\]|\\|\;|\:|\<|\>|\?|\,|\.|\/)+",
                RegexOptions.None, "Symbol", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^(\d+(?!\.|x|e|d|m)u?)|^0x([\da-f]+(?!\.|x|m)u?)",
                RegexOptions.IgnoreCase, "Number", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^(\d+)?\.\d+((\+|\-)?e\d+)?(m|d|f)?|^\d+((\+|\-)?e\d+)?(m|d|f)",
                RegexOptions.IgnoreCase, "Float", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^@""(("""")*([^""])*)*""|^""((\\\\)*(\\"")*(\\[a-z])*[^""^\\]*)*""",
                RegexOptions.None, "string", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^\'(\\\')*[^\']*\'", RegexOptions.None,
                "Character", SyntaxItems.Count));
            SyntaxItems.Add(new SyntaxItem(@"^\w*", RegexOptions.None,
                "Identifier", SyntaxItems.Count));
        }
    }

    /// <summary>
    /// �﷨��������
    /// </summary>
    class SyntaxHighlight
    {
        public class HighlightItem
        {
            private Color FForeColor; // ǰ��ɫ
            private bool FBold; // �Ƿ�Ӵ�
            private bool FItalic; // �Ƿ�б��
            private bool FUnderline; // �Ƿ��»���
            public Color ForeColor { get { return FForeColor; } } // ǰ��ɫ
            public bool Bold { get { return FBold; } } // �Ƿ�Ӵ�
            public bool Italic { get { return FItalic; } } // �Ƿ�б��
            public bool Underline { get { return FUnderline; } } // �Ƿ��»���
            public HighlightItem(Color AForeColor, bool ABold, bool AItalic, bool AUnderline)
            {
                FForeColor = AForeColor;
                FBold = ABold;
                FItalic = AItalic;
                FUnderline = AUnderline;
            }
        }
        private List<SyntaxEngine.AnalyzeReslut> FAnalyzeResluts;
        private Font FDefaultFont;
        private List<HighlightItem> FHighlightItems = new List<HighlightItem>();
        public List<HighlightItem> HighlightItems { get { return FHighlightItems; } }
        public SyntaxHighlight(SyntaxEngine ASyntaxEngine, Font
            ADefaultFont)
        {
            FAnalyzeResluts = ASyntaxEngine.AnalyzeResluts;
            FDefaultFont = ADefaultFont;
        }
        /// <summary>
        /// ���ı��е�RTFԪ�ش����
        /// </summary>
        /// <param name="AText">������ı�</param>
        /// <returns>���ش�����RTF�ı�</returns>
        public string TextToRtf(string AText)
        {
            string Result = "";
            foreach (char vChar in AText)
            {
                switch (vChar)
                {
                    case '\\':
                        Result += @"\\";
                        break;
                    case '{':
                        Result += @"\{";
                        break;
                    case '}':
                        Result += @"\}";
                        break;
                    default:
                        if (vChar > (char)127)
                            Result += @"\u" + ((int)vChar).ToString() + "?";
                        else Result += vChar;
                        break;
                }
            }
            return Result;
        }

        [DllImport("user32.dll")]
        private static extern uint GetKBCodePage();
        [DllImport("kernel32.dll")]
        private static extern ushort GetSystemDefaultLangID();

        /// <summary>
        /// �����봦���RTF��ʽ
        /// </summary>
        /// <returns>���ش�����RTF�ı�</returns>
        public string MakeRtf()
        {
            if (HighlightItems.Count <= 0) return "";
            string Result = @"{\rtf1\ansi\ansicpg" + GetKBCodePage().ToString() +
                @"\deff0\deflang1033\deflangfe" + GetSystemDefaultLangID().ToString() +
                @"{\fonttbl{\f0\fmodern " +
                FDefaultFont.Name + ";}}\r\n";
            Result += @"{\colortbl ;";
            foreach (HighlightItem vHighlightItem in HighlightItems)
                Result += string.Format(@"\red{0}\green{1}\blue{2};",
                    vHighlightItem.ForeColor.R, vHighlightItem.ForeColor.G,
                    vHighlightItem.ForeColor.B);
            Result += "}\r\n";
            Result += @"\viewkind4\uc1\pard\f0\fs20" + "\r\n";
            bool vBold = false, vItalic = false, vUnderline = false;
            foreach (SyntaxEngine.AnalyzeReslut vAnalyzeReslut in
              FAnalyzeResluts)
            {
                int i = vAnalyzeReslut.Item.Index;
                if (i >= HighlightItems.Count) i = 0;
                if (vBold != HighlightItems[i].Bold)
                {
                    if (HighlightItems[i].Bold)
                        Result += @"\b1";
                    else Result += @"\b0";
                }
                if (vItalic != HighlightItems[i].Italic)
                {
                    if (HighlightItems[i].Italic)
                        Result += @"\i1";
                    else Result += @"\i0";
                }
                if (vItalic != HighlightItems[i].Underline)
                {
                    if (HighlightItems[i].Underline)
                        Result += @"\ul1";
                    else Result += @"\ul0";
                }
                Result += string.Format(@"\cf{0} ", i + 1);
                vBold = HighlightItems[i].Bold;
                vItalic = HighlightItems[i].Italic;
                vUnderline = HighlightItems[i].Underline;
                Result += TextToRtf(vAnalyzeReslut.Block).Replace("\r\n",
                    "\r\n" + @"\par");
            }
            return Result + "}";
        }

        /// <summary>
        /// ���ı��е�HTMLԪ�ش����
        /// </summary>
        /// <param name="AText">������ı�</param>
        /// <returns>���ش�����HTML�ı�</returns>
        private string TextToHtml(string AText)
        {
            string Result = "";
            foreach (char vChar in AText)
            {
                switch (vChar)
                {
                    case '&':
                        Result += @"&amp;";
                        break;
                    case ' ':
                        Result += @"&nbsp;";
                        break;
                    case '<':
                        Result += @"&lt;";
                        break;
                    case '>':
                        Result += @"&gt;";
                        break;
                    case '"':
                        Result += @"&quot;";
                        break;
                    //case '\n':
                    //    Result += @"<br>";
                    //    break;
                    default:
                        if (vChar > (char)127)
                            Result += @"&#" + ((int)vChar).ToString() + ";";
                        else Result += vChar;
                        break;
                }
            }
            return Result;
        }

        /// <summary>
        /// ����ɫ����ΪHTML���ķ�ʽ
        /// </summary>
        /// <param name="AColor">�������ɫ</param>
        /// <returns>����HTML��ɫ���ʽ</returns>
        private string ColorToHtml(Color AColor)
        {
            return string.Format("#{0:X2}{1:X2}{2:X2}", AColor.R, AColor.G, AColor.B);
        }

        /// <summary>
        /// �����봦��ΪHTML�ı�
        /// </summary>
        /// <returns>���ش�����HTML�ı�</returns>
        public string MakeHtml()
        {
            string Result = @"<code><pre style=""font-size:" + FDefaultFont.Size +
                @"pt;font-family:" + FDefaultFont.Name + @""">";
            foreach (SyntaxEngine.AnalyzeReslut vAnalyzeReslut in
              FAnalyzeResluts)
            {
                int i = vAnalyzeReslut.Item.Index;
                if (i >= HighlightItems.Count) i = 0;
                string vLeft = string.Format(@"<span style=""color={0}"">",
                    ColorToHtml(HighlightItems[i].ForeColor));
                string vRight = "</span>";
                if (HighlightItems[i].Bold)
                {
                    vLeft += "<b>";
                    vRight = "</b>" + vRight;
                }
                if (HighlightItems[i].Italic)
                {
                    vLeft += "<i>";
                    vRight = "</i>" + vRight;
                }
                if (HighlightItems[i].Underline)
                {
                    vLeft += "<u>";
                    vRight = "</u>" + vRight;
                }

                Result += vLeft + TextToHtml(vAnalyzeReslut.Block) + vRight;
            }

            return Result + "</pre></code>";
        }
    }

    /// <summary>
    /// C#�﷨��������
    /// </summary>
    class CSharpHighlight : SyntaxHighlight
    {
        public CSharpHighlight(SyntaxEngine ASyntaxEngine, Font
            ADefaultFont)
            : base(ASyntaxEngine, ADefaultFont)
        {
            //�հ�
            HighlightItems.Add(new HighlightItem(Color.White, false, false, false));
            //����ע��
            HighlightItems.Add(new HighlightItem(Color.Green, false, false, false));
            //����ע��
            HighlightItems.Add(new HighlightItem(Color.Green, false, false, false));
            //ָ��
            HighlightItems.Add(new HighlightItem(Color.Blue, false, false, false));
            //�ؼ���
            HighlightItems.Add(new HighlightItem(Color.Black, true, false, false));
            //�����Ĺؼ���
            HighlightItems.Add(new HighlightItem(Color.Black, true, false, false));
            //������
            HighlightItems.Add(new HighlightItem(Color.BlueViolet, false, false, false));
            //����
            HighlightItems.Add(new HighlightItem(Color.Red, true, false, false));
            //������
            HighlightItems.Add(new HighlightItem(Color.Red, true, false, false));
            //�ַ���
            HighlightItems.Add(new HighlightItem(Color.Maroon, false, false, false));
            //�ַ�
            HighlightItems.Add(new HighlightItem(Color.Maroon, false, false, false));
            //��ʶ��
            HighlightItems.Add(new HighlightItem(Color.Black, false, false, false));
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
China China
zswang

Comments and Discussions