Click here to Skip to main content
15,884,298 members
Articles / Web Development / ASP.NET

Implementation of XML Information Retrieval by LINQ

Rate me:
Please Sign up or sign in to vote.
4.86/5 (4 votes)
31 May 2012CPOL5 min read 23.9K   391   7  
We describe all concepts according to XML corpus by its full set of words based on the tags Term frequencies, Inverse document frequencies, words from the given document are used.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Text.RegularExpressions;

namespace IRProj
{
    public class Tokenizer
    {

        public List<string> Partition(string input)
        {
            List<string> filter = new List<string>();
            Regex r = new Regex("([ \\t{}():;. \n])");
            input = input.ToLower();

            String[] tokens = r.Split(input);           

            for (int i = 0; i < tokens.Length; i++)
            {
                MatchCollection mc = r.Matches(tokens[i]);
                if (mc.Count <= 0 && tokens[i].Trim().Length > 0
                    && !StopWordsHandler.IsStopword(tokens[i]))
                    filter.Add(tokens[i]);
            }
            return filter;
           
        }



    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer PascalSystem,is located in Iran-Isfahan
Iran (Islamic Republic of) Iran (Islamic Republic of)
I was graduated from Esfahan unversity(BS Applied Mathematics)
Master of Computer Science at Saarland University

Comments and Discussions