Click here to Skip to main content
15,895,799 members
Articles / Programming Languages / C#

LINQ To Google Image and Google Groups

Rate me:
Please Sign up or sign in to vote.
5.00/5 (17 votes)
8 May 200710 min read 91.6K   400   48  
A LINQ Implementation for Google Images/Groups Search
using System;
using System.Collections.Generic;

using System.Text;
using System.Text.RegularExpressions;

using System.Web;
using System.Net;
using System.Linq;
using System.Linq.Expressions;

using MChen.Linq.GoogleSearch.Common;

namespace MChen.Linq.GoogleSearch.Groups
{
    internal class GroupQueryInfo : QueryInfo
    {
        internal const string ROOT       = @"http://groups.google.com";
        internal const string SEARCH_URL = ROOT + @"/groups";
  
        internal const int ITEM_PER_PG = 10;

        public override string GetUrl(int start)
        {
            return string.Format(
                @"{0}?as_q={1}&num=10&scoring={2}&hl=en&as_epq=&as_oq={3}&as_eq={4}&" +
                @"as_ugroup={5}&as_usubject={6}&as_uauthors={7}&lr=&as_qdr=&" +
                @"as_drrb=b&as_mind={8}&as_minm={9}&as_miny={10}&" +
                @"as_maxd={11}&as_maxm={12}&as_maxy={13}&safe=off",
                SEARCH_URL,
                HttpUtility.UrlEncode(GetWords(AllWords)),
                (OrderbyDate ? 'd' : 'r'),
                HttpUtility.UrlEncode(GetWords(OrWords)),
                HttpUtility.UrlEncode(GetWords(NotWords)),
                HttpUtility.UrlEncode(Domain),
                HttpUtility.UrlEncode(GetWords(Subject)),
                HttpUtility.UrlEncode(Author),
                (From == DateTime.MinValue) ? "" : From.Day.ToString(),
                (From == DateTime.MinValue) ? "" : From.Month.ToString(),
                (From == DateTime.MinValue) ? "" : From.Year.ToString(),
                (To == DateTime.MinValue) ? "" : To.Day.ToString(),
                (To == DateTime.MinValue) ? "" : To.Month.ToString(),
                (To == DateTime.MinValue) ? "" : To.Year.ToString());
        }

        internal List<string> Subject = new List<string>();

        internal string Author = "";

        //date related
        internal DateTime From = DateTime.MinValue;
        internal DateTime To = DateTime.MinValue;

        //false when order by relevence/Rank
        internal bool OrderbyDate = false;

        public override string ToString()
        {
            StringBuilder sb = new StringBuilder();
            sb.Append(base.ToString());
            if (Subject.Count != 0) sb.AppendFormat("Subject:     {0}\n", GetWords(Subject));
            if (Author != "")       sb.AppendFormat("Author:      {0}\n", Author);
            if (From != DateTime.MinValue) sb.AppendFormat("From:        {0}\n", From);
            if (To != DateTime.MinValue) sb.AppendFormat("To:          {0}\n", To);
            sb.AppendFormat("Order By:    {0}\n", (OrderbyDate?"Date":"Relevance"));
            return sb.ToString();
        }
    }

    internal class GroupSearch<T> : Searcher<T, GroupQueryInfo>,
        IQueryable<T>, IOrderedQueryable<T> where T : Result
    {
        private const string REG_EX =
            @"<br>\W*<font\W*size=\""\+0\"">\W*<a\W*href=\""([^\""]+)\"">(.*)</a>" +
            @"\W*</font>[^<]+(?:<nobr>\W*(<img[^>]+>\W*)+</nobr>)?[^<]+<nobr>" +
            @"<font\W*size=\""\-1\"">Group:</font>\W*<a[^>]+>([^<]+)</a>\W*</nobr>[^<]*" +
            @"((?:[^<]|(?:<[^f]))*)\W*<font\W*class=\""gl\"">" +
            @"([^<]+)by([^<]+)-([^<]+)messages?\W*-([^<]+)authors?";

        private Regex _rx = new Regex(REG_EX);

        protected override Regex _regex {
            get { return _rx; }
        }

        private static string TrimHtml(string input)
        {
            Regex html = new Regex(@"<[^>]+>");
            return html.Replace(input, "");
        }

        protected override Result CreateResult(Match match)
        {
            string[] months = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
                                "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};

            Message msg =  new Message();
            msg.Url     = new Uri(GroupQueryInfo.ROOT + match.Groups[1].Value);
            msg.Subject = TrimHtml(match.Groups[2].Value);
            msg.Domain  = match.Groups[4].Value;
            msg.InitialAuthor = match.Groups[7].Value.Trim();
            msg.Description   = TrimHtml(match.Groups[5].Value).Trim();
            msg.NumberOfAuthors = Int32.Parse(match.Groups[9].Value.Trim());
            msg.NumberOfReply   = Int32.Parse(match.Groups[8].Value.Trim());

            //rating
            int rating = 0;
            if (match.Groups[3].Captures.Count == 5)
            {
                foreach (Capture cap in match.Groups[3].Captures)
                    if (cap.Value.IndexOf("_on_") != -1) rating++;
            }
            msg.Rating = rating;

            //postdate
            string[] parts = match.Groups[6].Value.Split(' ');
            if (parts.Length == 3)
            {
                int month = Array.IndexOf(months, parts[0].Trim()) + 1;
                int day = Int32.Parse(parts[1].Trim());
                int year = (parts[2].Trim() == string.Empty)
                           ?DateTime.Today.Year
                           :Int32.Parse(parts[2].Trim());
                msg.PostDate = new DateTime(year, month, day);
            }
            else if (parts.Length == 2)
            {
                int month = Array.IndexOf(months, parts[0].Trim()) + 1;
                if (month == -1) msg.PostDate = DateTime.Today;
                else
                {
                    int day = Int32.Parse(parts[1]);
                    msg.PostDate = new DateTime(DateTime.Today.Year, month, day);
                }
            }
            else
            {
                msg.PostDate = DateTime.Today;
            }
            return msg;
        }

        internal GroupSearch()
        {
            _info = new GroupQueryInfo();
        }

        internal GroupSearch(GroupQueryInfo info)
        {
            if (info == null)
                throw new ArgumentException("Parameter is null.", "info");

            _info = info;
            //Console.WriteLine("\n========= GroupQuery<{0}> =========", typeof(Group));
            //Console.WriteLine(info);
            //Console.WriteLine("=====================================");
            //Console.WriteLine(info.GetUrl(2));
        }

        #region IExpressionVisitor members
        public override GroupQueryInfo VisitSequenceOperatorCall(MethodCallExpression mc, GroupQueryInfo qinfo)
        {
            //pass through the Constant expression
            if (mc.Arguments.Count == 0x2 && mc.Arguments[0].NodeType == ExpressionType.Constant)
            {
                GroupSearch<Message> qgrp =
                    ((ConstantExpression)mc.Arguments[0]).Value as GroupSearch<Message>;
                if (qgrp != null) qinfo = qgrp._info;
            }

            //check "Where" and "OrderBy"
            switch (mc.Method.Name)
            {
                case "Where":
                    if (((mc.Arguments.Count != 0x2) ||
                        !ExpressionUtil.IsLambda(mc.Arguments[0x1])) ||
                        (ExpressionUtil.GetLambda(mc.Arguments[0x1]).Parameters.Count != 0x1))
                    {
                        break;
                    }
                    return VisitLambda(ExpressionUtil.GetLambda(mc.Arguments[0x1]), qinfo); ;
                case "OrderBy":
                    if (((mc.Arguments.Count != 0x2) ||
                        !ExpressionUtil.IsLambda(mc.Arguments[0x1])) ||
                        (ExpressionUtil.GetLambda(mc.Arguments[0x1]).Parameters.Count != 0x1))
                    {
                        break;
                    }

                    //check order by
                    LambdaExpression lexp = ExpressionUtil.GetLambda(mc.Arguments[1]);
                    if (ExpressionUtil.IsMember(lexp.Body))
                    {
                        MemberExpression mexp = ExpressionUtil.GetMember(lexp.Body);
                        if (mexp.Member.DeclaringType.IsAssignableFrom(typeof(Message))
                            && mexp.Member.Name == "Rank") qinfo.OrderbyDate = false;
                        else if (mexp.Member.DeclaringType.IsAssignableFrom(typeof(Message))
                            && mexp.Member.Name == "PostDate") qinfo.OrderbyDate = true;
                        else
                            throw new NotSupportedException(
                              "Only order by Rank or Relevance is supported.");
                        return qinfo;
                    }
                    throw new NotSupportedException(
                              "Only order by Rank or Relevance is supported.");
                default:
                    break;
            }
            throw new ArgumentException(
                string.Format("Sequence Call {0} not yet supported.", mc.Method.Name));
        }

        //visitors
        public override GroupQueryInfo VisitAndAlso(BinaryExpression node, GroupQueryInfo qinfo)
        {
            bool temp = _insideAnd;
            try
            {
                //dumb implementation for AND
                if (node.NodeType != ExpressionType.AndAlso)
                    throw new ArgumentException("Argument is not AND.", "node");

                //simply visit left and right 
                qinfo = Visit(node.Left, qinfo);
                qinfo = Visit(node.Right, qinfo);
            }
            finally
            {
                _insideAnd = temp;
            }
            return qinfo;
        }

        //visitors
        public override GroupQueryInfo VisitOrElse(BinaryExpression node, GroupQueryInfo qinfo)
        {
            //dumb implementation for OR, has to be on leaf nodes
            if (node.NodeType != ExpressionType.OrElse)
                throw new ArgumentException("Argument is not OR expression.", "node");

            //left leaf check
            bool temp = _insideOr;
            try
            {
                _insideOr = true;
                if (node.Left.NodeType == ExpressionType.Call)
                {
                    qinfo = VisitMethodCall((MethodCallExpression)node.Left, qinfo);

                }
                else if (node.Left.NodeType == ExpressionType.OrElse)
                {
                    qinfo = VisitOrElse((BinaryExpression)node.Left, qinfo);
                }
                else
                {
                    throw new ArgumentException("OR operator must be used on leaf expression nodes.", "node");
                }

                //simply visit left and right 
                if (node.Right.NodeType == ExpressionType.Call)
                {
                    qinfo = VisitMethodCall((MethodCallExpression)node.Right, qinfo);
                }
                else if (node.Right.NodeType == ExpressionType.OrElse)
                {
                    qinfo = VisitOrElse((BinaryExpression)node.Right, qinfo);
                }
                else
                {
                    throw new ArgumentException("OR operator must be used on leaf expression nodes.", "node");
                }
            }
            finally
            {
                _insideOr = temp;
            }
            return qinfo;
        }

        public override GroupQueryInfo VisitNot(UnaryExpression node, GroupQueryInfo qinfo)
        {
            bool temp = _insideNot;
            try
            {
                _insideNot = true;
                //only not over method call is supported!
                if (node.Operand.NodeType == ExpressionType.Call)
                {
                    qinfo = VisitMethodCall((MethodCallExpression)node.Operand, qinfo);
                }
                else
                {
                    throw new ArgumentException(
                        string.Format("Not operator on {0} not supported.", node.Operand.NodeType));
                }
            }
            finally
            {
                _insideNot = temp;
            }
            return qinfo;
        }

        public override GroupQueryInfo VisitEquals(BinaryExpression node, GroupQueryInfo qinfo)
        {
            //has to be memeber = constant or constant = member
            MemberExpression member = null;
            ConstantExpression constant = null;

            if (ExpressionUtil.IsMember(node.Left))
                member = ExpressionUtil.GetMember(node.Left);
            else if (node.Left.NodeType == ExpressionType.Constant)
                constant = node.Left as ConstantExpression;
            else constant = ExpressionUtil.ProduceConstantExpression<string>(node.Left);

            if (ExpressionUtil.IsMember(node.Right))
                member = ExpressionUtil.GetMember(node.Right);
            else if (node.Right.NodeType == ExpressionType.Constant)
                constant = node.Right as ConstantExpression;
            else constant = ExpressionUtil.ProduceConstantExpression<string>(node.Right);

            if (member == null || constant == null ||
                !member.Member.DeclaringType.IsAssignableFrom(typeof(Message)))
                throw new ArgumentException(
                      "Equals operator must apply to a Image member and a constant.");

            //only Domain, Type, Size and Color are supported
            switch (member.Member.Name)
            {
                case "Group":
                    qinfo.Domain = constant.Value.ToString();
                    break;
                case "InitialAuthor":
                    qinfo.Author = constant.Value.ToString();
                    break;
                default:
                    throw new ArgumentException(
                          "Only InitialAuthor, Group and Domain fields are supported for Equals operator.");
            }
            return qinfo;
        }

        //besides Where, only two function call: RelateTo && UnrelateTo are supported
        public override GroupQueryInfo VisitRegularCall(
            MethodCallExpression node, GroupQueryInfo qinfo)
        {
            //check object
            string member = null;
            if (node.Object != null && ExpressionUtil.IsMember(node.Object))
            {
                MemberExpression mex = ExpressionUtil.GetMember(node.Object);
                if (mex.Member.DeclaringType.IsAssignableFrom(typeof(Message))) {
                    member = mex.Member.Name;
                }
            }

            //method name
            if (member != null && node.Method.Name == "Contains")
            {
                //parse the parameter
                if (node.Arguments.Count != 1)
                    throw new ArgumentException("Only constant search terms are supported.");

                ConstantExpression cont = null;
                if (node.Arguments[0].NodeType != ExpressionType.Constant)
                    cont = ExpressionUtil.ProduceConstantExpression<string>(node.Arguments[0]);
                else cont = node.Arguments[0] as ConstantExpression;

                string term = cont.Value.ToString();
                if (member == "Subject") {
                    if (_insideNot || _insideOr)
                        throw new NotSupportedException(
                            "Subject query can't be part of OR or NOT expression.");
                    qinfo.Subject.Add(term);
                } else if (member == "Description") {
                    if (_insideNot) qinfo.NotWords.Add(term);
                    else if (_insideOr) qinfo.OrWords.Add(term);
                    else qinfo.AllWords.Add(term);
                } else {
                    throw new NotSupportedException(
                        string.Format("Method {0} is not supported.", node.Method.Name));
                }
            }
            else
            {
                throw new NotSupportedException(
                    string.Format("Method {0} is not supported.", node.Method.Name));
            }
            return qinfo;
        }

        public override GroupQueryInfo VisitLambda(LambdaExpression node, GroupQueryInfo qinfo)
        {
            return Visit(node.Body, qinfo);
        }

        private GroupQueryInfo ProcessCompare(
            MemberExpression member, ConstantExpression constant, 
            bool greaterThan, GroupQueryInfo qinfo)
        {
            if (member == null || constant == null 
                || !member.Member.DeclaringType.IsAssignableFrom(typeof(Message))
                || member.Member.Name != "PostDate"
                || constant.Value.GetType() != typeof(DateTime))
                throw new ArgumentException(
                      "Compare operator must apply to a Message PostDate and a constant.");

            //set number
            if (greaterThan)
            {
                qinfo.From = (DateTime)constant.Value;
            }
            else
            {
                qinfo.To = (DateTime)constant.Value;
            }
            return qinfo;
        }

        public override GroupQueryInfo VisitGreaterThan(BinaryExpression node, bool equal, GroupQueryInfo qinfo)
        {
            //has to be memeber = constant or constant = member
            bool greaterThan = true;
            MemberExpression member = null;
            ConstantExpression constant = null;

            if (ExpressionUtil.IsMember(node.Left))
                member = ExpressionUtil.GetMember(node.Left);
            else if (node.Left.NodeType == ExpressionType.Constant)
                constant = node.Left as ConstantExpression;
            else
                constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Left);

            if (ExpressionUtil.IsMember(node.Right))
            {
                member = ExpressionUtil.GetMember(node.Right);
                greaterThan = false;
            } else if (node.Right.NodeType == ExpressionType.Constant)
                constant = node.Right as ConstantExpression;
            else
                constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Right);

            //process
            qinfo = ProcessCompare(member, constant, greaterThan, qinfo);
            return qinfo;
        }

        public override GroupQueryInfo VisitLessThan(BinaryExpression node, bool equal, GroupQueryInfo qinfo)
        {
            //has to be memeber = constant or constant = member
            bool greaterThan = false;
            MemberExpression member = null;
            ConstantExpression constant = null;

            if (ExpressionUtil.IsMember(node.Left))
                member = ExpressionUtil.GetMember(node.Left);
            else if (node.Left.NodeType == ExpressionType.Constant)
                constant = node.Left as ConstantExpression;
            else
                constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Left);

            if (ExpressionUtil.IsMember(node.Right))
            {
                member = ExpressionUtil.GetMember(node.Right);
                greaterThan = true;
            }
            else if (node.Right.NodeType == ExpressionType.Constant)
                constant = node.Right as ConstantExpression;
            else
                constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Right);

            //process
            qinfo = ProcessCompare(member, constant, greaterThan, qinfo);
            return qinfo;
        }

        private bool _insideOr = false;
        private bool _insideAnd = false;
        private bool _insideNot = false;
        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States

Comments and Discussions