Click here to Skip to main content
Click here to Skip to main content
Articles » Database » Database » General » Downloads
 
Add your own
alternative version

hOOt - full text search engine

, 22 Jun 2013
Smallest full text search engine (lucene replacement) built from scratch using inverted WAH bitmap index, highly compact storage, operating in database and document modes
hoot_v1.0-noexe.zip
hoot_v1.0.zip
Hoot
Properties
SampleApp
newifilter
Properties
Settings.settings
hoot_v1.1-noexe.zip
hoot_v1.1.zip
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
_svn
all-wcprops
entries
text-base
ComHelper.cs.svn-base
FilterLoader.cs.svn-base
FilterReader.cs.svn-base
IFilter.cs.svn-base
Settings.settings
_svn
all-wcprops
entries
text-base
AssemblyInfo.cs.svn-base
Resources.Designer.cs.svn-base
Resources.resx.svn-base
Settings.Designer.cs.svn-base
Settings.settings.svn-base
hoot_v1.2-noexe.zip
hoot_v1.2.zip
Settings.settings
hoot_v1.3-noexe.zip
hoot_v1.3.zip
Settings.settings
hoot_v1.4-noexe.zip
hoot_v1.4.zip
Settings.settings
hoot_v1.5-noexe.zip
hoot_v1.5.zip
Settings.settings
hoot_v2.0.zip
fastJSON
MGIndex
Settings.settings
hoot_v2.1.zip
Settings.settings
hoot_v2.2.1.zip
Settings.settings
hoot_v2.2.zip
Settings.settings
sampleapp.exe_v2.2-noexe.zip
sampleapp.exe_v2.2.1-noexe.zip
sampleapp.exe_v2.2.1.zip
sampleapp.exe_v2.2.zip
sampleapp_exe-noexe.zip
sampleapp_exe.zip
sampleapp_exe_v1.1-noexe.zip
sampleapp_exe_v1.1.zip
sampleapp_exe_v1.2-noexe.zip
sampleapp_exe_v1.2.zip
sampleapp_exe_v1.3-noexe.zip
sampleapp_exe_v1.3.zip
sampleapp_exe_v1.4-noexe.zip
sampleapp_exe_v1.4.zip
sampleapp_exe_v1.5-noexe.zip
sampleapp_exe_v1.5.zip
sampleapp_exe_v2.0-noexe.zip
sampleapp_exe_v2.0.zip
sampleapp_exe_v2.1-noexe.zip
sampleapp_exe_v2.1.zip
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.Text;

namespace fastJSON
{
    /// <summary>
    /// This class encodes and decodes JSON strings.
    /// Spec. details, see http://www.json.org/
    /// </summary>
    internal sealed class JsonParser
    {
        enum Token
        {
            None = -1,           // Used to denote no Lookahead available
            Curly_Open,
            Curly_Close,
            Squared_Open,
            Squared_Close,
            Colon,
            Comma,
            String,
            Number,
            True,
            False,
            Null
        }

        readonly char[] json;
        readonly StringBuilder s = new StringBuilder();
        Token lookAheadToken = Token.None;
        int index;
        bool _ignorecase = false;


        internal JsonParser(string json, bool ignorecase)
        {
            this.json = json.ToCharArray();
            _ignorecase = ignorecase;
        }

        public object Decode()
        {
            return ParseValue();
        }

        private Dictionary<string, object> ParseObject()
        {
            Dictionary<string, object> table = new Dictionary<string, object>();

            ConsumeToken(); // {

            while (true)
            {
                switch (LookAhead())
                {

                    case Token.Comma:
                        ConsumeToken();
                        break;

                    case Token.Curly_Close:
                        ConsumeToken();
                        return table;

                    default:
                        {

                            // name
                            string name = ParseString();
                            if (_ignorecase)
                                name = name.ToLower();

                            // :
                            if (NextToken() != Token.Colon)
                            {
                                throw new Exception("Expected colon at index " + index);
                            }

                            // value
                            object value = ParseValue();

                            table[name] = value;
                        }
                        break;
                }
            }
        }

        private List<object> ParseArray()
        {
            List<object> array = new List<object>();
            ConsumeToken(); // [

            while (true)
            {
                switch (LookAhead())
                {

                    case Token.Comma:
                        ConsumeToken();
                        break;

                    case Token.Squared_Close:
                        ConsumeToken();
                        return array;

                    default:
                        array.Add(ParseValue());
                        break;
                }
            }
        }

        private object ParseValue()
        {
            switch (LookAhead())
            {
                case Token.Number:
                    return ParseNumber();

                case Token.String:
                    return ParseString();

                case Token.Curly_Open:
                    return ParseObject();

                case Token.Squared_Open:
                    return ParseArray();

                case Token.True:
                    ConsumeToken();
                    return true;

                case Token.False:
                    ConsumeToken();
                    return false;

                case Token.Null:
                    ConsumeToken();
                    return null;
            }

            throw new Exception("Unrecognized token at index" + index);
        }

        private string ParseString()
        {
            ConsumeToken(); // "

            s.Length = 0;

            int runIndex = -1;

            while (index < json.Length)
            {
                var c = json[index++];

                if (c == '"')
                {
                    if (runIndex != -1)
                    {
                        if (s.Length == 0)
                            return new string(json, runIndex, index - runIndex - 1);

                        s.Append(json, runIndex, index - runIndex - 1);
                    }
                    return s.ToString();
                }

                if (c != '\\')
                {
                    if (runIndex == -1)
                        runIndex = index - 1;

                    continue;
                }

                if (index == json.Length) break;

                if (runIndex != -1)
                {
                    s.Append(json, runIndex, index - runIndex - 1);
                    runIndex = -1;
                }

                switch (json[index++])
                {
                    case '"':
                        s.Append('"');
                        break;

                    case '\\':
                        s.Append('\\');
                        break;

                    case '/':
                        s.Append('/');
                        break;

                    case 'b':
                        s.Append('\b');
                        break;

                    case 'f':
                        s.Append('\f');
                        break;

                    case 'n':
                        s.Append('\n');
                        break;

                    case 'r':
                        s.Append('\r');
                        break;

                    case 't':
                        s.Append('\t');
                        break;

                    case 'u':
                        {
                            int remainingLength = json.Length - index;
                            if (remainingLength < 4) break;

                            // parse the 32 bit hex into an integer codepoint
                            uint codePoint = ParseUnicode(json[index], json[index + 1], json[index + 2], json[index + 3]);
                            s.Append((char)codePoint);

                            // skip 4 chars
                            index += 4;
                        }
                        break;
                }
            }

            throw new Exception("Unexpectedly reached end of string");
        }

        private uint ParseSingleChar(char c1, uint multipliyer)
        {
            uint p1 = 0;
            if (c1 >= '0' && c1 <= '9')
                p1 = (uint)(c1 - '0') * multipliyer;
            else if (c1 >= 'A' && c1 <= 'F')
                p1 = (uint)((c1 - 'A') + 10) * multipliyer;
            else if (c1 >= 'a' && c1 <= 'f')
                p1 = (uint)((c1 - 'a') + 10) * multipliyer;
            return p1;
        }

        private uint ParseUnicode(char c1, char c2, char c3, char c4)
        {
            uint p1 = ParseSingleChar(c1, 0x1000);
            uint p2 = ParseSingleChar(c2, 0x100);
            uint p3 = ParseSingleChar(c3, 0x10);
            uint p4 = ParseSingleChar(c4, 1);

            return p1 + p2 + p3 + p4;
        }

        private long CreateLong(string s)
        {
            long num = 0;
            bool neg = false;
            foreach (char cc in s)
            {
                if (cc == '-')
                    neg = true;
                else if (cc == '+')
                    neg = false;
                else
                {
                    num *= 10;
                    num += (int)(cc - '0');
                }
            }

            return neg ? -num : num;
        }

        private object ParseNumber()
        {
            ConsumeToken();

            // Need to start back one place because the first digit is also a token and would have been consumed
            var startIndex = index - 1;
            bool dec = false;
            do
            {
                if (index == json.Length)
                    break;
                var c = json[index];

                if ((c >= '0' && c <= '9') || c == '.' || c == '-' || c == '+' || c == 'e' || c == 'E')
                {
                    if (c == '.' || c == 'e' || c == 'E')
                        dec = true;
                    if (++index == json.Length)
                        break;                        //throw new Exception("Unexpected end of string whilst parsing number");
                    continue;
                }
                break;
            } while (true);

			if (dec)
			{
				string s = new string(json, startIndex, index - startIndex);
				return double.Parse(s, NumberFormatInfo.InvariantInfo);
			}
			long num;
			return JSON.CreateLong(out num, json, startIndex, index - startIndex);
        }

        private Token LookAhead()
        {
            if (lookAheadToken != Token.None) return lookAheadToken;

            return lookAheadToken = NextTokenCore();
        }

        private void ConsumeToken()
        {
            lookAheadToken = Token.None;
        }

        private Token NextToken()
        {
            var result = lookAheadToken != Token.None ? lookAheadToken : NextTokenCore();

            lookAheadToken = Token.None;

            return result;
        }

        private Token NextTokenCore()
        {
            char c;

            // Skip past whitespace
            do
            {
                c = json[index];

                if (c > ' ') break;
                if (c != ' ' && c != '\t' && c != '\n' && c != '\r') break;

            } while (++index < json.Length);

            if (index == json.Length)
            {
                throw new Exception("Reached end of string unexpectedly");
            }

            c = json[index];

            index++;

            //if (c >= '0' && c <= '9')
            //    return Token.Number;

            switch (c)
            {
                case '{':
                    return Token.Curly_Open;

                case '}':
                    return Token.Curly_Close;

                case '[':
                    return Token.Squared_Open;

                case ']':
                    return Token.Squared_Close;

                case ',':
                    return Token.Comma;

                case '"':
                    return Token.String;

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                case '-':
                case '+':
                case '.':
                    return Token.Number;

                case ':':
                    return Token.Colon;

                case 'f':
                    if (json.Length - index >= 4 &&
                        json[index + 0] == 'a' &&
                        json[index + 1] == 'l' &&
                        json[index + 2] == 's' &&
                        json[index + 3] == 'e')
                    {
                        index += 4;
                        return Token.False;
                    }
                    break;

                case 't':
                    if (json.Length - index >= 3 &&
                        json[index + 0] == 'r' &&
                        json[index + 1] == 'u' &&
                        json[index + 2] == 'e')
                    {
                        index += 3;
                        return Token.True;
                    }
                    break;

                case 'n':
                    if (json.Length - index >= 3 &&
                        json[index + 0] == 'u' &&
                        json[index + 1] == 'l' &&
                        json[index + 2] == 'l')
                    {
                        index += 3;
                        return Token.Null;
                    }
                    break;

            }

            throw new Exception("Could not find token at index " + --index);
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

About the Author

Mehdi Gholam
Architect
United Kingdom United Kingdom
Mehdi first started programming when he was 8 on BBC+128k machine in 6512 processor language, after various hardware and software changes he eventually came across .net and c# which he has been using since v1.0.
He is formally educated as a system analyst Industrial engineer, but his programming passion continues.
 
* Mehdi is the 5th person to get 6 out of 7 Platinums on CodeProject (13th Jan'12)

| Advertise | Privacy | Mobile
Web04 | 2.8.140721.1 | Last Updated 22 Jun 2013
Article Copyright 2011 by Mehdi Gholam
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid