Click here to Skip to main content
11,709,220 members (53,026 online)
Click here to Skip to main content
Add your own
alternative version

Solving complex parsing tasks with RegexTreeer

, 23 Oct 2008 LGPL3 17.7K 285 22
Solving complex parsing tasks by utilizing Regular Expression trees built with RegexTreeer.
//Author: Sergey Stoyan,
//        22 March 2008
//Copyright: (C) 2008, Sergey Stoyan
using System;
using System.IO;
using CliverSoft;

namespace Test
    class Test
        internal void Run()
            //text to be parsed
            string page = File.ReadAllText("../../_pages/Products.html");


        ///create CliverSoft.Parser intended for pages with product list
        Cliver.Parser product_parser = new Cliver.Parser("../../_config_files/Products.rgx");

        /// <summary>
        /// Treat the page with CliverSoft.Parser
        /// </summary>
        /// <param name="page">text to be parsed</param>
        void process_product_list(string page)
            Cliver.GroupCapture gc = product_parser.Parse(page);

            foreach (Cliver.GroupCapture product in gc["Product"])
                Console.WriteLine("Brand: " + product.ValueOf("Brand", 0));
                Console.WriteLine("Name: " + product.ValueOf("Name", 0));
                Console.WriteLine("Description: " + product.ValueOf("Description", 0));
                Console.WriteLine("Price: " + product.ValueOf("Price", 0));


            Console.WriteLine(gc.ValueOf("NextPageUrl", 0));            

            //example of use of shortcut methods 
            Console.WriteLine("\n\nNextPageUrl: " + gc.FirstValueOf("NextPageUrl"));
            Cliver.GroupCapture product2 = gc.LastOf("Product");
            Console.WriteLine("\n\nLast product:");
            foreach (string attribute in product2.Keys)
                Console.WriteLine(attribute + ": " + product2.FirstValueOf(attribute));                        

            //example of possible but not efficient and not safe use:
            //not efficient because of getting a complete arrays for a single value,
            //not safe because of not-checking intermediate GroupCapture's for their existence
            Console.WriteLine("\n\nName of the first product: " + gc["Product"][0]["Name"][0].Value);

            //example of more efficient but still not safe use
            Console.WriteLine("\n\nName of the first product: " + gc.FirstOf("Product").FirstValueOf("Name"));

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.


This article, along with any associated source code and files, is licensed under The GNU Lesser General Public License (LGPLv3)


About the Author

Sergey Stoyan
Architect CliverSoft (
Ukraine Ukraine
Sergey is graduated as applied mathematician. He is specialized in client/server applications, backup systems, data parsing tools, web crawlers and search engines. Work for CliverSoft Co. Favorite languages are C#, C++, Perl

You may also be interested in...

| Advertise | Privacy | Terms of Use | Mobile
Web03 | 2.8.150819.1 | Last Updated 23 Oct 2008
Article Copyright 2008 by Sergey Stoyan
Everything else Copyright © CodeProject, 1999-2015
Layout: fixed | fluid