Click here to Skip to main content
15,896,154 members
Articles / Programming Languages / C#

LINQ To Google Image and Google Groups

Rate me:
Please Sign up or sign in to vote.
5.00/5 (17 votes)
8 May 200710 min read 91.7K   400   48  
A LINQ Implementation for Google Images/Groups Search
using System;
using System.IO;
using System.Collections;
using System.Collections.Generic;

using System.Text;
using System.Text.RegularExpressions;

using System.Web;
using System.Net;
using System.Linq;
using System.Linq.Expressions;

namespace MChen.Linq.Google
{
    internal class ImageQueryInfo
    {
        internal const string   SEARCH_URL    = "http://images.google.com/images";
        internal const int      ITEM_PER_PG   = 18;

        public List<string> AllWords = new List<string>();
        public List<string> OrWords = new List<string>();
        public List<string> NotWords = new List<string>();

        public ImageSize Size { get; set; }
        public ImageFormat Format { get; set; }
        public ImageColor Color { get; set; }

        public string Domain { get; set; }

        #region private methods
        private string GetImageSize()
        {
            switch (Size)
            {
                case ImageSize.Any: return "";
                case ImageSize.Large: return "xxlarge";
                case ImageSize.Medium: return "small|medium|large|xlarge";
                case ImageSize.Small: return "icon";
            }
            return "";
        }

        private string GetImageColor()
        {
            switch (Color)
            {
                case ImageColor.Any: return "";
                case ImageColor.BlackWhite: return "mono";
                case ImageColor.Color: return "color";
                case ImageColor.Greyscale: return "gray";
            }
            return "";
        }

        private string GetImageFormat()
        {
            switch (Format)
            {
                case ImageFormat.Any: return "";
                case ImageFormat.GIF: return "gif";
                case ImageFormat.JPG: return "jpg";
                case ImageFormat.PNG: return "gif";
            }
            return "";
        }

        private string GetDomain()
        {
            if (Domain == null) return "";
            return Domain;
        }

        private string GetWords(List<string> list)
        {
            StringBuilder sb = new StringBuilder();
            foreach (string s in list)
                sb.AppendFormat("{0} ", s);
            return sb.ToString();
        }
        #endregion

        public string GetUrl(int start)
        {
            return String.Format(
                "{0}?hl=en&as_q={1}&as_oq={2}&as_eq={3}&imgsz={4}&as_filetype={5}&imgc={6}&as_sitesearch={7}&start={8}",
                SEARCH_URL,
                HttpUtility.UrlEncode(GetWords(AllWords)),
                HttpUtility.UrlEncode(GetWords(OrWords)),
                HttpUtility.UrlEncode(GetWords(NotWords)),
                GetImageSize(),
                GetImageFormat(),
                GetImageColor(),
                HttpUtility.UrlEncode(GetDomain()),
                start);
        }

        public override string ToString()
        {
            StringBuilder sb = new StringBuilder();
            sb.Append("All:         ");
            foreach (string s in AllWords) sb.AppendFormat("{0}, ", s);
            sb.Append("\n");
            sb.Append("Or:          ");
            foreach (string s in OrWords) sb.AppendFormat("{0}, ", s);
            sb.Append("\n");
            sb.Append("Not:         ");
            foreach (string s in NotWords) sb.AppendFormat("{0}, ", s);
            sb.Append("\n");
            sb.AppendFormat("Image Size:  {0}\n", Size);
            sb.AppendFormat("Image Format:{0}\n", Format);
            sb.AppendFormat("Image Color: {0}\n", Color);
            sb.AppendFormat("Domian:      {0}\n", Domain);
            return sb.ToString();
        }
    }

    internal class ImageQuery<T> : IQueryable<T>, IOrderedQueryable<T>
    {
        private const string REG_EX = @"dyn\.Img\((?:\""(?<param>[^\""]*)\"",?\w*)+\);";

        internal Regex _rx = new Regex(REG_EX);
        internal ImageQueryInfo _info;

        private static string EncodeDescription(string desc)
        {
            return desc.Replace(@"\x3cb\x3e", "").Replace(@"\x3c/b\x3e", "");
        }

        private IList<T> RequestForPage(int start)
        {
            //send web request and collect results
            WebRequest request = WebRequest.Create(_info.GetUrl(start));
            WebResponse response = request.GetResponse();

            string content = null;
            using (StreamReader sr = new StreamReader(response.GetResponseStream()))
            {
                content = sr.ReadToEnd();
            }

            //Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
            //Console.WriteLine(content);
            //Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
            
            //parse result
            List<Image> list = new List<Image>();
            if (content != null)
            {
                int cnt = 1;
                foreach (Match m in _rx.Matches(content))
                {
                    Image img = new Image();
                    CaptureCollection vals = m.Groups[1].Captures;
                    img.Rank            = start + (cnt ++);
                    img.Url             = new Uri(vals[3].Value);
                    img.Domain          = vals[11].Value;
                    img.ThumbnailWidth  = Int32.Parse(vals[4].Value);
                    img.ThumbnailHeight = Int32.Parse(vals[5].Value);
                    img.Description     = EncodeDescription(vals[6].Value);
                    img.Color           = _info.Color;
                    img.Size            = _info.Size;
                    img.ThumbnailURL    =
                        string.Format("{0}?q=tbn:{1}", vals[14].Value, vals[2].Value);
                    try {
                        img.Format      =
                            (ImageFormat)Enum.Parse(typeof(ImageFormat), vals[10].Value.ToUpper());
                    } catch (Exception ex) {
                        img.Format      = ImageFormat.Any;
                    }

                    //parse the one parameter
                    string[] splits = vals[9].Value.Split(' ');
                    if (splits.Length == 5 && splits[1] == "x" && splits[3] == "-")
                    {
                        img.Width  = Int32.Parse(splits[0]);
                        img.Height = Int32.Parse(splits[2]);
                        img.FileSize = Int32.Parse(splits[4].Remove(splits[4].Length - 1));
                    }
                    list.Add(img);
                }
            }
            return (IList<T>)list;
        }

        public ImageQuery(ImageQueryInfo info) {
            if (info == null)
                throw new ArgumentException("Parameter is null.", "info");

            _info = info;
            //Console.WriteLine("\n========= ImageQuery<{0}> =========", typeof(Image));
            //Console.WriteLine(info);
            //Console.WriteLine("=====================================");
            //Console.WriteLine(info.GetUrl(2));
        }

        #region IQueryable<T> Members
        public IQueryable<S> CreateQuery<S>(System.Linq.Expressions.Expression expression)
        {
            if (expression == null)
            {
                throw new ArgumentException("expression");
            }
            if (!typeof(IQueryable<S>).IsAssignableFrom(expression.Type))
            {
                throw new ArgumentException("expression");
            }
            //Diagnostic.DebugExpressionTree(expression);
            return new ImageQuery<S>(ImageQueryBuilder.BuildQuery(expression));
        }

        public TResult Execute<TResult>(System.Linq.Expressions.Expression expression)
        {
            throw new Exception("The method or operation is not implemented.");
        }

        #endregion

        #region IEnumerable<T> Members

        public IEnumerator<T> GetEnumerator()
        {
            int cnt = 1;
            //implementation of enumerator
            while (true)
            {
                IList<T> batch = RequestForPage(cnt);
                foreach (var img in batch)
                {
                    cnt++;
                    yield return img;
                }

                //stop condition
                if (batch.Count == 0) break;
            }
        }

        #endregion

        #region IEnumerable Members

        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
        {
            throw new Exception("The method or operation is not implemented.");
        }

        #endregion

        #region IQueryable Members

        public IQueryable CreateQuery(System.Linq.Expressions.Expression expression)
        {
            throw new Exception("The method or operation is not implemented.");
        }

        public Type ElementType
        {
            get { throw new Exception("The method or operation is not implemented."); }
        }

        public object Execute(System.Linq.Expressions.Expression expression)
        {
            throw new Exception("The method or operation is not implemented.");
        }

        public System.Linq.Expressions.Expression Expression
        {
            get { return Expression.Constant(this); }
        }

        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Web Developer
United States United States

Comments and Discussions