Click here to Skip to main content
15,896,557 members
Articles / Programming Languages / C#

Lucene.Net – Custom Synonym Analyzer

Rate me:
Please Sign up or sign in to vote.
4.92/5 (25 votes)
10 Sep 2013Apache4 min read 114.2K   3.4K   98  
How to use Lucene.net search to work with synonyms
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Collections.ObjectModel;

namespace Lucene.Net.SynonymEngine
{
    public class XmlSynonymEngine : ISynonymEngine
    {
        //this will contains a list, of lists of words that go together
        private List<ReadOnlyCollection<string>> SynonymGroups =
            new List<ReadOnlyCollection<string>>();

        public XmlSynonymEngine(string xmlSynonymFilePath)
        {
            // create an xml document object, and load it from the specified file.
            XmlDocument Doc = new XmlDocument();
            Doc.Load(xmlSynonymFilePath);

            // get all the <group> nodes
            var groupNodes = Doc.SelectNodes("/synonyms/group");

            //enumerate groups
            foreach (XmlNode g in groupNodes)
            {
                //get all the <syn> elements from the group nodes.
                XmlNodeList synNodes = g.SelectNodes("child::syn");

                //create a list that will hold the items for this group
                List<string> synonymGroupList = new List<string>();

                //enumerate then and add them to the list,
                //and add each synonym group to the list
                foreach (XmlNode synNode in g)
                {
                    synonymGroupList.Add(synNode.InnerText.Trim());
                }

                //add single synonm group to the list of synonm groups.
                SynonymGroups.Add(new ReadOnlyCollection<string>(synonymGroupList));
            }

            // clear the xml document
            Doc = null;
        }

        #region ISynonymEngine Members

        public IEnumerable<string> GetSynonyms(string word)
        {
            //enumerate all the synonym groups
            foreach (var synonymGroup in SynonymGroups)
            {
                //if the word is a part of the group return 
                //the group as the results.
                if (synonymGroup.Contains(word))
                {
                    //gonna use a read only collection for security purposes
                    return synonymGroup;
                }
            }

            return null;
        }

        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Apache License, Version 2.0


Written By
Software Developer
United States United States
I'm a proud father and a software developer. I'm fascinated by a few particular .Net projects such as Lucene.Net, NHibernate, Quartz.Net, and others. I love learning and studying code to learn how other people solve software problems.

Comments and Discussions