65.9K
CodeProject is changing. Read more.
Home

Using Regular Expression for Parsing

starIconstarIcon
emptyStarIcon
starIcon
emptyStarIconemptyStarIcon

2.85/5 (4 votes)

Mar 17, 2007

CPOL
viewsIcon

36953

downloadIcon

264

Derived currently supports RTF and HTML format

Screenshot - SyntaxDemo.gif

Introduction

This article discusses regular expression syntax used for analysis and analysis of the results shows the text to HTML or RTF format.

Background

Regular Expression Analysis:

public virtual bool Analyze(string ACode)
{
    if (FSyntaxItems.Count <= 0) return false;
    if (ACode == null) return false;
    AnalyzeResluts.Clear();
    string vCode = ACode;
    bool vFind = true;
    while (vFind && (vCode.Length > 0))
    {
        vFind = false;
        foreach (SyntaxItem vSyntaxItem in FSyntaxItems)
        {
            if (Regex.IsMatch(vCode, vSyntaxItem.Pattern, vSyntaxItem.Options))
            {
                AnalyzeResluts.Add(new AnalyzeReslut(vSyntaxItem,
                    Regex.Match(vCode, vSyntaxItem.Pattern,
                    vSyntaxItem.Options).Value));
                vCode = Regex.Replace(vCode, vSyntaxItem.Pattern, "",
                    vSyntaxItem.Options);
                vFind = true;
                break;
            }
        }
    }
    return true;
}
  • SyntaxEngineClass: Class-based parsing engine SyntaxItems type property inheritance by adding items grammar analysis
  • SyntaxHighlight: Class-based highlight engine HighlightItem type property inheritance by adding items color and font style

Machine translation of text, barcode or see:

public class SyntaxItem
{
    private string FPattern;
    private RegexOptions FOptions; 
    private string FName; 
    private int FIndex; 

    public string Pattern { get { return FPattern; } } 
    public RegexOptions Options { get { return FOptions; } }
    public string Name { get { return FName; } }
    public int Index { get { return FIndex; } }

    public SyntaxItem(string APattern, RegexOptions AOptions,
        string AName, int AIndex)
    {
        FPattern = APattern;
        FOptions = AOptions;
        FName = AName;
        FIndex = AIndex;
    }
}

public class AnalyzeReslut
{
    private SyntaxItem FItem;
    private string FBlock; 

    public SyntaxItem Item { get { return FItem; } }
    public string Block { get { return FBlock; } }

    public AnalyzeReslut(SyntaxItem AItem, string ABlock)
    {
        FItem = AItem;
        FBlock = ABlock;
    }
}

Refer to the following regular expression code written in other languages:

SyntaxItems.Add(new SyntaxItem(@"^\s+", RegexOptions.None,
    "Whitespace", SyntaxItems.Count));
SyntaxItems.Add(new SyntaxItem(@"^\/\/[^\n]*[\n]?", RegexOptions.None,
    "LineComment", SyntaxItems.Count));
SyntaxItems.Add(new SyntaxItem(@"^\/\*.*?\*\/", RegexOptions.None,
    "MultiComment", SyntaxItems.Count));

Add regular expression must be by '^', not to write expression, otherwise it would match the length of the dead cycle 0.

History

  • 17th March, 2007: Version 1.0