Click here to Skip to main content
15,897,187 members
Articles / Programming Languages / C#

LINQ To Google Image and Google Groups

Rate me:
Please Sign up or sign in to vote.
5.00/5 (17 votes)
8 May 200710 min read 91.8K   400   48  
A LINQ Implementation for Google Images/Groups Search
using System;
using System.IO;
using System.Collections;
using System.Collections.Generic;

using System.Text;
using System.Text.RegularExpressions;

using System.Web;
using System.Net;
using System.Linq;
using System.Linq.Expressions;

using MChen.Linq.GoogleSearch.Common;

namespace MChen.Linq.GoogleSearch.Images
{
    internal class ImageQueryInfo : QueryInfo
    {
        internal const string SEARCH_URL = "http://images.google.com/images";
        internal const int ITEM_PER_PG = 18;

        public ImageSize Size { get; set; }
        public ImageFormat Format { get; set; }
        public ImageColor Color { get; set; }

        #region private methods
        private string GetImageSize()
        {
            switch (Size)
            {
                case ImageSize.Any: return "";
                case ImageSize.Large: return "xxlarge";
                case ImageSize.Medium: return "small|medium|large|xlarge";
                case ImageSize.Small: return "icon";
            }
            return "";
        }

        private string GetImageColor()
        {
            switch (Color)
            {
                case ImageColor.Any: return "";
                case ImageColor.BlackWhite: return "mono";
                case ImageColor.Color: return "color";
                case ImageColor.Greyscale: return "gray";
            }
            return "";
        }

        private string GetImageFormat()
        {
            switch (Format)
            {
                case ImageFormat.Any: return "";
                case ImageFormat.GIF: return "gif";
                case ImageFormat.JPG: return "jpg";
                case ImageFormat.PNG: return "gif";
            }
            return "";
        }
        #endregion

        public override string GetUrl(int start)
        {
            return String.Format(
                "{0}?hl=en&as_q={1}&as_oq={2}&as_eq={3}&imgsz={4}&as_filetype={5}&imgc={6}&as_sitesearch={7}&start={8}",
                SEARCH_URL,
                HttpUtility.UrlEncode(GetWords(AllWords)),
                HttpUtility.UrlEncode(GetWords(OrWords)),
                HttpUtility.UrlEncode(GetWords(NotWords)),
                GetImageSize(),
                GetImageFormat(),
                GetImageColor(),
                HttpUtility.UrlEncode(GetDomain()),
                start);
        }

        public override string ToString()
        {
            StringBuilder sb = new StringBuilder();
            sb.Append(base.ToString());
            sb.AppendFormat("Image Size:  {0}\n", Size);
            sb.AppendFormat("Image Format:{0}\n", Format);
            sb.AppendFormat("Image Color: {0}\n", Color);
            return sb.ToString();
        }
    }

    internal class ImageSearch<T> : Searcher<T, ImageQueryInfo>,
        IQueryable<T>, IOrderedQueryable<T> where T : Result
    {
        private const string REG_EX = @"dyn\.Img\((?:\""(?<param>[^\""]*)\"",?\w*)+\);";

        private Regex _rx = new Regex(REG_EX);

        private static string EncodeDescription(string desc)
        {
            return desc.Replace(@"\x3cb\x3e", "").Replace(@"\x3c/b\x3e", "");
        }

        protected override Regex _regex
        {
            get { return _rx; }
        }

        protected override Result CreateResult(Match m)
        {
            Image img = new Image();
            CaptureCollection vals = m.Groups[1].Captures;
            img.Url = new Uri(vals[3].Value);
            img.Domain = vals[11].Value;
            img.ThumbnailWidth = Int32.Parse(vals[4].Value);
            img.ThumbnailHeight = Int32.Parse(vals[5].Value);
            img.Description = EncodeDescription(vals[6].Value);
            img.Color = _info.Color;
            img.Size = _info.Size;
            img.ThumbnailURL =
                string.Format("{0}?q=tbn:{1}", vals[14].Value, vals[2].Value);
            try
            {
                img.Format =
                    (ImageFormat)Enum.Parse(typeof(ImageFormat), vals[10].Value.ToUpper());
            }
            catch (Exception ex)
            {
                img.Format = ImageFormat.Any;
            }

            //parse the one parameter
            string[] splits = vals[9].Value.Split(' ');
            if (splits.Length == 5 && splits[1] == "x" && splits[3] == "-")
            {
                img.Width = Int32.Parse(splits[0]);
                img.Height = Int32.Parse(splits[2]);
                img.FileSize = Int32.Parse(splits[4].Remove(splits[4].Length - 1));
            }
            return img;
        }

        internal ImageSearch()
        {
            _info = new ImageQueryInfo();
        }

        internal ImageSearch(ImageQueryInfo info)
        {
            if (info == null)
                throw new ArgumentException("Parameter is null.", "info");

            _info = info;
            //Console.WriteLine("\n========= ImageQuery<{0}> =========", typeof(Image));
            //Console.WriteLine(info);
            //Console.WriteLine("=====================================");
            //Console.WriteLine(info.GetUrl(2));
        }

        #region IExpressionVisitor members
        public override ImageQueryInfo VisitSequenceOperatorCall(MethodCallExpression mc, ImageQueryInfo qinfo)
        {
            //pass through the Constant expression
            if (mc.Arguments.Count == 0x2 && mc.Arguments[0].NodeType == ExpressionType.Constant)
            {
                ImageSearch<Image> qimg =
                    ((ConstantExpression)mc.Arguments[0]).Value as ImageSearch<Image>;
                if (qimg != null) qinfo = qimg._info;
            }

            //check "Where" and "OrderBy"
            switch (mc.Method.Name)
            {
                case "Where":
                    if (((mc.Arguments.Count != 0x2) ||
                        !ExpressionUtil.IsLambda(mc.Arguments[0x1])) ||
                        (ExpressionUtil.GetLambda(mc.Arguments[0x1]).Parameters.Count != 0x1))
                    {
                        break;
                    }
                    return VisitLambda(ExpressionUtil.GetLambda(mc.Arguments[0x1]), qinfo); ;
                case "OrderBy":
                    if (((mc.Arguments.Count != 0x2) ||
                        !ExpressionUtil.IsLambda(mc.Arguments[0x1])) ||
                        (ExpressionUtil.GetLambda(mc.Arguments[0x1]).Parameters.Count != 0x1))
                    {
                        break;
                    }

                    //check order by
                    LambdaExpression lexp = ExpressionUtil.GetLambda(mc.Arguments[1]);
                    if (ExpressionUtil.IsMember(lexp.Body))
                    {
                        MemberExpression mexp = ExpressionUtil.GetMember(lexp.Body);
                        if (mexp.Member.DeclaringType.IsAssignableFrom(typeof(Image)) &&
                            mexp.Member.Name == "Rank") return qinfo;
                    }
                    throw new ArgumentException(
                              "Only order by Rank is supported.", "mc");
                default:
                    break;
            }
            throw new ArgumentException(
                string.Format("Sequence Call {0} not yet supported.", mc.Method.Name));
        }

        //visitors
        public override ImageQueryInfo VisitAndAlso(BinaryExpression node, ImageQueryInfo qinfo)
        {
            bool temp = _insideAnd;
            try
            {
                //dumb implementation for AND
                if (node.NodeType != ExpressionType.AndAlso)
                    throw new ArgumentException("Argument is not AND.", "node");

                //simply visit left and right 
                qinfo = Visit(node.Left, qinfo);
                qinfo = Visit(node.Right, qinfo);
            }
            finally
            {
                _insideAnd = temp;
            }
            return qinfo;
        }

        //visitors
        public override ImageQueryInfo VisitOrElse(BinaryExpression node, ImageQueryInfo qinfo)
        {
            //dumb implementation for OR, has to be on leaf nodes
            if (node.NodeType != ExpressionType.OrElse)
                throw new ArgumentException("Argument is not OR expression.", "node");

            //left leaf check
            bool temp = _insideOr;
            try
            {
                _insideOr = true;
                if (node.Left.NodeType == ExpressionType.Call)
                {
                    qinfo = VisitMethodCall((MethodCallExpression)node.Left, qinfo);

                }
                else if (node.Left.NodeType == ExpressionType.OrElse)
                {
                    qinfo = VisitOrElse((BinaryExpression)node.Left, qinfo);
                }
                else
                {
                    throw new ArgumentException("OR operator must be used on leaf expression nodes.", "node");
                }

                //simply visit left and right 
                if (node.Right.NodeType == ExpressionType.Call)
                {
                    qinfo = VisitMethodCall((MethodCallExpression)node.Right, qinfo);
                }
                else if (node.Right.NodeType == ExpressionType.OrElse)
                {
                    qinfo = VisitOrElse((BinaryExpression)node.Right, qinfo);
                }
                else
                {
                    throw new ArgumentException("OR operator must be used on leaf expression nodes.", "node");
                }
            }
            finally
            {
                _insideOr = temp;
            }
            return qinfo;
        }

        public override ImageQueryInfo VisitNot(UnaryExpression node, ImageQueryInfo qinfo)
        {
            bool temp = _insideNot;
            try
            {
                _insideNot = true;
                //only not over method call is supported!
                if (node.Operand.NodeType == ExpressionType.Call)
                {
                    qinfo = VisitMethodCall((MethodCallExpression)node.Operand, qinfo);
                }
                else
                {
                    throw new ArgumentException(
                        string.Format("Not operator on {0} not supported.", node.Operand.NodeType));
                }
            }
            finally
            {
                _insideNot = temp;
            }
            return qinfo;
        }

        public override ImageQueryInfo VisitEquals(BinaryExpression node, ImageQueryInfo qinfo)
        {
            //has to be memeber = constant or constant = member
            MemberExpression member = null;
            ConstantExpression constant = null;

            if (ExpressionUtil.IsMember(node.Left))
                member = ExpressionUtil.GetMember(node.Left);
            else if (node.Left.NodeType == ExpressionType.Constant)
                constant = node.Left as ConstantExpression;
            else
            {
                switch (node.Left.Type.Name)
                {
                    case "Int32":
                        constant =
                        ExpressionUtil.ProduceConstantExpression<int>(node.Left);
                        break;
                    default:
                        constant =
                        ExpressionUtil.ProduceConstantExpression<string>(node.Left);
                        break;
                }

            }

            if (ExpressionUtil.IsMember(node.Right))
                member = ExpressionUtil.GetMember(node.Right);
            else if (node.Right.NodeType == ExpressionType.Constant)
                constant = node.Right as ConstantExpression;
            else
            {
                switch (node.Right.Type.Name)
                {
                    case "Int32":
                        constant =
                        ExpressionUtil.ProduceConstantExpression<int>(node.Right);
                        break;
                    default:
                        constant =
                        ExpressionUtil.ProduceConstantExpression<string>(node.Right);
                        break;
                }
            }

            if (member == null || constant == null ||
                !member.Member.DeclaringType.IsAssignableFrom(typeof(Image)))
                throw new ArgumentException(
                      "Equals operator must apply to a Image member and a constant.");

            //only Domain, Type, Size and Color are supported
            switch (member.Member.Name)
            {
                case "Domain":
                    qinfo.Domain = constant.Value.ToString();
                    break;
                case "Format":
                    if (constant.Value.GetType() != typeof(Int32))
                        throw new ArgumentException("Image format is not valid.");
                    qinfo.Format = (ImageFormat)constant.Value;
                    break;
                case "Size":
                    if (constant.Value.GetType() != typeof(Int32))
                        throw new ArgumentException("Image size is not valid.");
                    qinfo.Size = (ImageSize)constant.Value;
                    break;
                case "Color":
                    if (constant.Value.GetType() != typeof(Int32))
                        throw new ArgumentException("Image color is not valid.");
                    qinfo.Color = (ImageColor)constant.Value;
                    break;
                default:
                    throw new ArgumentException(
                          "Only Size, Type, Color and Domain fields are supported for Equals operator.");
            }
            return qinfo;
        }

        //besides Where, only two function call: RelateTo && UnrelateTo are supported
        public override ImageQueryInfo VisitRegularCall(
            MethodCallExpression node, ImageQueryInfo qinfo)
        {
            if (node.Method.Name == "RelatesTo")
            {
                ConstantExpression cont = null;

                //parse the parameter
                if (node.Arguments.Count != 1)
                    throw new ArgumentException("Only constant search terms are supported.");

                if (node.Arguments[0].NodeType != ExpressionType.Constant)
                    cont = ExpressionUtil.ProduceConstantExpression<string>(node.Arguments[0]);
                else cont = node.Arguments[0] as ConstantExpression;

                string term = cont.Value.ToString();
                if (_insideNot) qinfo.NotWords.Add(term);
                else if (_insideOr) qinfo.OrWords.Add(term);
                else qinfo.AllWords.Add(term);
            }
            else
            {
                throw new ArgumentException(
                    string.Format("Method {0} is not supported.", node.Method.Name));
            }
            return qinfo;
        }

        public override ImageQueryInfo VisitLambda(LambdaExpression node, ImageQueryInfo qinfo)
        {
            return Visit(node.Body, qinfo);
        }

        private bool _insideOr = false;
        private bool _insideAnd = false;
        private bool _insideNot = false;
        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States

Comments and Discussions