using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Net;
using System.Linq;
using System.Linq.Expressions;
using MChen.Linq.GoogleSearch.Common;
namespace MChen.Linq.GoogleSearch.Groups
{
internal class GroupQueryInfo : QueryInfo
{
internal const string ROOT = @"http://groups.google.com";
internal const string SEARCH_URL = ROOT + @"/groups";
internal const int ITEM_PER_PG = 10;
public override string GetUrl(int start)
{
return string.Format(
@"{0}?as_q={1}&num=10&scoring={2}&hl=en&as_epq=&as_oq={3}&as_eq={4}&" +
@"as_ugroup={5}&as_usubject={6}&as_uauthors={7}&lr=&as_qdr=&" +
@"as_drrb=b&as_mind={8}&as_minm={9}&as_miny={10}&" +
@"as_maxd={11}&as_maxm={12}&as_maxy={13}&safe=off",
SEARCH_URL,
HttpUtility.UrlEncode(GetWords(AllWords)),
(OrderbyDate ? 'd' : 'r'),
HttpUtility.UrlEncode(GetWords(OrWords)),
HttpUtility.UrlEncode(GetWords(NotWords)),
HttpUtility.UrlEncode(Domain),
HttpUtility.UrlEncode(GetWords(Subject)),
HttpUtility.UrlEncode(Author),
(From == DateTime.MinValue) ? "" : From.Day.ToString(),
(From == DateTime.MinValue) ? "" : From.Month.ToString(),
(From == DateTime.MinValue) ? "" : From.Year.ToString(),
(To == DateTime.MinValue) ? "" : To.Day.ToString(),
(To == DateTime.MinValue) ? "" : To.Month.ToString(),
(To == DateTime.MinValue) ? "" : To.Year.ToString());
}
internal List<string> Subject = new List<string>();
internal string Author = "";
//date related
internal DateTime From = DateTime.MinValue;
internal DateTime To = DateTime.MinValue;
//false when order by relevence/Rank
internal bool OrderbyDate = false;
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append(base.ToString());
if (Subject.Count != 0) sb.AppendFormat("Subject: {0}\n", GetWords(Subject));
if (Author != "") sb.AppendFormat("Author: {0}\n", Author);
if (From != DateTime.MinValue) sb.AppendFormat("From: {0}\n", From);
if (To != DateTime.MinValue) sb.AppendFormat("To: {0}\n", To);
sb.AppendFormat("Order By: {0}\n", (OrderbyDate?"Date":"Relevance"));
return sb.ToString();
}
}
internal class GroupSearch<T> : Searcher<T, GroupQueryInfo>,
IQueryable<T>, IOrderedQueryable<T> where T : Result
{
private const string REG_EX =
@"<br>\W*<font\W*size=\""\+0\"">\W*<a\W*href=\""([^\""]+)\"">(.*)</a>" +
@"\W*</font>[^<]+(?:<nobr>\W*(<img[^>]+>\W*)+</nobr>)?[^<]+<nobr>" +
@"<font\W*size=\""\-1\"">Group:</font>\W*<a[^>]+>([^<]+)</a>\W*</nobr>[^<]*" +
@"((?:[^<]|(?:<[^f]))*)\W*<font\W*class=\""gl\"">" +
@"([^<]+)by([^<]+)-([^<]+)messages?\W*-([^<]+)authors?";
private Regex _rx = new Regex(REG_EX);
protected override Regex _regex {
get { return _rx; }
}
private static string TrimHtml(string input)
{
Regex html = new Regex(@"<[^>]+>");
return html.Replace(input, "");
}
protected override Result CreateResult(Match match)
{
string[] months = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
Message msg = new Message();
msg.Url = new Uri(GroupQueryInfo.ROOT + match.Groups[1].Value);
msg.Subject = TrimHtml(match.Groups[2].Value);
msg.Domain = match.Groups[4].Value;
msg.InitialAuthor = match.Groups[7].Value.Trim();
msg.Description = TrimHtml(match.Groups[5].Value).Trim();
msg.NumberOfAuthors = Int32.Parse(match.Groups[9].Value.Trim());
msg.NumberOfReply = Int32.Parse(match.Groups[8].Value.Trim());
//rating
int rating = 0;
if (match.Groups[3].Captures.Count == 5)
{
foreach (Capture cap in match.Groups[3].Captures)
if (cap.Value.IndexOf("_on_") != -1) rating++;
}
msg.Rating = rating;
//postdate
string[] parts = match.Groups[6].Value.Split(' ');
if (parts.Length == 3)
{
int month = Array.IndexOf(months, parts[0].Trim()) + 1;
int day = Int32.Parse(parts[1].Trim());
int year = (parts[2].Trim() == string.Empty)
?DateTime.Today.Year
:Int32.Parse(parts[2].Trim());
msg.PostDate = new DateTime(year, month, day);
}
else if (parts.Length == 2)
{
int month = Array.IndexOf(months, parts[0].Trim()) + 1;
if (month == -1) msg.PostDate = DateTime.Today;
else
{
int day = Int32.Parse(parts[1]);
msg.PostDate = new DateTime(DateTime.Today.Year, month, day);
}
}
else
{
msg.PostDate = DateTime.Today;
}
return msg;
}
internal GroupSearch()
{
_info = new GroupQueryInfo();
}
internal GroupSearch(GroupQueryInfo info)
{
if (info == null)
throw new ArgumentException("Parameter is null.", "info");
_info = info;
//Console.WriteLine("\n========= GroupQuery<{0}> =========", typeof(Group));
//Console.WriteLine(info);
//Console.WriteLine("=====================================");
//Console.WriteLine(info.GetUrl(2));
}
#region IExpressionVisitor members
public override GroupQueryInfo VisitSequenceOperatorCall(MethodCallExpression mc, GroupQueryInfo qinfo)
{
//pass through the Constant expression
if (mc.Arguments.Count == 0x2 && mc.Arguments[0].NodeType == ExpressionType.Constant)
{
GroupSearch<Message> qgrp =
((ConstantExpression)mc.Arguments[0]).Value as GroupSearch<Message>;
if (qgrp != null) qinfo = qgrp._info;
}
//check "Where" and "OrderBy"
switch (mc.Method.Name)
{
case "Where":
if (((mc.Arguments.Count != 0x2) ||
!ExpressionUtil.IsLambda(mc.Arguments[0x1])) ||
(ExpressionUtil.GetLambda(mc.Arguments[0x1]).Parameters.Count != 0x1))
{
break;
}
return VisitLambda(ExpressionUtil.GetLambda(mc.Arguments[0x1]), qinfo); ;
case "OrderBy":
if (((mc.Arguments.Count != 0x2) ||
!ExpressionUtil.IsLambda(mc.Arguments[0x1])) ||
(ExpressionUtil.GetLambda(mc.Arguments[0x1]).Parameters.Count != 0x1))
{
break;
}
//check order by
LambdaExpression lexp = ExpressionUtil.GetLambda(mc.Arguments[1]);
if (ExpressionUtil.IsMember(lexp.Body))
{
MemberExpression mexp = ExpressionUtil.GetMember(lexp.Body);
if (mexp.Member.DeclaringType.IsAssignableFrom(typeof(Message))
&& mexp.Member.Name == "Rank") qinfo.OrderbyDate = false;
else if (mexp.Member.DeclaringType.IsAssignableFrom(typeof(Message))
&& mexp.Member.Name == "PostDate") qinfo.OrderbyDate = true;
else
throw new NotSupportedException(
"Only order by Rank or Relevance is supported.");
return qinfo;
}
throw new NotSupportedException(
"Only order by Rank or Relevance is supported.");
default:
break;
}
throw new ArgumentException(
string.Format("Sequence Call {0} not yet supported.", mc.Method.Name));
}
//visitors
public override GroupQueryInfo VisitAndAlso(BinaryExpression node, GroupQueryInfo qinfo)
{
bool temp = _insideAnd;
try
{
//dumb implementation for AND
if (node.NodeType != ExpressionType.AndAlso)
throw new ArgumentException("Argument is not AND.", "node");
//simply visit left and right
qinfo = Visit(node.Left, qinfo);
qinfo = Visit(node.Right, qinfo);
}
finally
{
_insideAnd = temp;
}
return qinfo;
}
//visitors
public override GroupQueryInfo VisitOrElse(BinaryExpression node, GroupQueryInfo qinfo)
{
//dumb implementation for OR, has to be on leaf nodes
if (node.NodeType != ExpressionType.OrElse)
throw new ArgumentException("Argument is not OR expression.", "node");
//left leaf check
bool temp = _insideOr;
try
{
_insideOr = true;
if (node.Left.NodeType == ExpressionType.Call)
{
qinfo = VisitMethodCall((MethodCallExpression)node.Left, qinfo);
}
else if (node.Left.NodeType == ExpressionType.OrElse)
{
qinfo = VisitOrElse((BinaryExpression)node.Left, qinfo);
}
else
{
throw new ArgumentException("OR operator must be used on leaf expression nodes.", "node");
}
//simply visit left and right
if (node.Right.NodeType == ExpressionType.Call)
{
qinfo = VisitMethodCall((MethodCallExpression)node.Right, qinfo);
}
else if (node.Right.NodeType == ExpressionType.OrElse)
{
qinfo = VisitOrElse((BinaryExpression)node.Right, qinfo);
}
else
{
throw new ArgumentException("OR operator must be used on leaf expression nodes.", "node");
}
}
finally
{
_insideOr = temp;
}
return qinfo;
}
public override GroupQueryInfo VisitNot(UnaryExpression node, GroupQueryInfo qinfo)
{
bool temp = _insideNot;
try
{
_insideNot = true;
//only not over method call is supported!
if (node.Operand.NodeType == ExpressionType.Call)
{
qinfo = VisitMethodCall((MethodCallExpression)node.Operand, qinfo);
}
else
{
throw new ArgumentException(
string.Format("Not operator on {0} not supported.", node.Operand.NodeType));
}
}
finally
{
_insideNot = temp;
}
return qinfo;
}
public override GroupQueryInfo VisitEquals(BinaryExpression node, GroupQueryInfo qinfo)
{
//has to be memeber = constant or constant = member
MemberExpression member = null;
ConstantExpression constant = null;
if (ExpressionUtil.IsMember(node.Left))
member = ExpressionUtil.GetMember(node.Left);
else if (node.Left.NodeType == ExpressionType.Constant)
constant = node.Left as ConstantExpression;
else constant = ExpressionUtil.ProduceConstantExpression<string>(node.Left);
if (ExpressionUtil.IsMember(node.Right))
member = ExpressionUtil.GetMember(node.Right);
else if (node.Right.NodeType == ExpressionType.Constant)
constant = node.Right as ConstantExpression;
else constant = ExpressionUtil.ProduceConstantExpression<string>(node.Right);
if (member == null || constant == null ||
!member.Member.DeclaringType.IsAssignableFrom(typeof(Message)))
throw new ArgumentException(
"Equals operator must apply to a Image member and a constant.");
//only Domain, Type, Size and Color are supported
switch (member.Member.Name)
{
case "Group":
qinfo.Domain = constant.Value.ToString();
break;
case "InitialAuthor":
qinfo.Author = constant.Value.ToString();
break;
default:
throw new ArgumentException(
"Only InitialAuthor, Group and Domain fields are supported for Equals operator.");
}
return qinfo;
}
//besides Where, only two function call: RelateTo && UnrelateTo are supported
public override GroupQueryInfo VisitRegularCall(
MethodCallExpression node, GroupQueryInfo qinfo)
{
//check object
string member = null;
if (node.Object != null && ExpressionUtil.IsMember(node.Object))
{
MemberExpression mex = ExpressionUtil.GetMember(node.Object);
if (mex.Member.DeclaringType.IsAssignableFrom(typeof(Message))) {
member = mex.Member.Name;
}
}
//method name
if (member != null && node.Method.Name == "Contains")
{
//parse the parameter
if (node.Arguments.Count != 1)
throw new ArgumentException("Only constant search terms are supported.");
ConstantExpression cont = null;
if (node.Arguments[0].NodeType != ExpressionType.Constant)
cont = ExpressionUtil.ProduceConstantExpression<string>(node.Arguments[0]);
else cont = node.Arguments[0] as ConstantExpression;
string term = cont.Value.ToString();
if (member == "Subject") {
if (_insideNot || _insideOr)
throw new NotSupportedException(
"Subject query can't be part of OR or NOT expression.");
qinfo.Subject.Add(term);
} else if (member == "Description") {
if (_insideNot) qinfo.NotWords.Add(term);
else if (_insideOr) qinfo.OrWords.Add(term);
else qinfo.AllWords.Add(term);
} else {
throw new NotSupportedException(
string.Format("Method {0} is not supported.", node.Method.Name));
}
}
else
{
throw new NotSupportedException(
string.Format("Method {0} is not supported.", node.Method.Name));
}
return qinfo;
}
public override GroupQueryInfo VisitLambda(LambdaExpression node, GroupQueryInfo qinfo)
{
return Visit(node.Body, qinfo);
}
private GroupQueryInfo ProcessCompare(
MemberExpression member, ConstantExpression constant,
bool greaterThan, GroupQueryInfo qinfo)
{
if (member == null || constant == null
|| !member.Member.DeclaringType.IsAssignableFrom(typeof(Message))
|| member.Member.Name != "PostDate"
|| constant.Value.GetType() != typeof(DateTime))
throw new ArgumentException(
"Compare operator must apply to a Message PostDate and a constant.");
//set number
if (greaterThan)
{
qinfo.From = (DateTime)constant.Value;
}
else
{
qinfo.To = (DateTime)constant.Value;
}
return qinfo;
}
public override GroupQueryInfo VisitGreaterThan(BinaryExpression node, bool equal, GroupQueryInfo qinfo)
{
//has to be memeber = constant or constant = member
bool greaterThan = true;
MemberExpression member = null;
ConstantExpression constant = null;
if (ExpressionUtil.IsMember(node.Left))
member = ExpressionUtil.GetMember(node.Left);
else if (node.Left.NodeType == ExpressionType.Constant)
constant = node.Left as ConstantExpression;
else
constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Left);
if (ExpressionUtil.IsMember(node.Right))
{
member = ExpressionUtil.GetMember(node.Right);
greaterThan = false;
} else if (node.Right.NodeType == ExpressionType.Constant)
constant = node.Right as ConstantExpression;
else
constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Right);
//process
qinfo = ProcessCompare(member, constant, greaterThan, qinfo);
return qinfo;
}
public override GroupQueryInfo VisitLessThan(BinaryExpression node, bool equal, GroupQueryInfo qinfo)
{
//has to be memeber = constant or constant = member
bool greaterThan = false;
MemberExpression member = null;
ConstantExpression constant = null;
if (ExpressionUtil.IsMember(node.Left))
member = ExpressionUtil.GetMember(node.Left);
else if (node.Left.NodeType == ExpressionType.Constant)
constant = node.Left as ConstantExpression;
else
constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Left);
if (ExpressionUtil.IsMember(node.Right))
{
member = ExpressionUtil.GetMember(node.Right);
greaterThan = true;
}
else if (node.Right.NodeType == ExpressionType.Constant)
constant = node.Right as ConstantExpression;
else
constant = ExpressionUtil.ProduceConstantExpression<DateTime>(node.Right);
//process
qinfo = ProcessCompare(member, constant, greaterThan, qinfo);
return qinfo;
}
private bool _insideOr = false;
private bool _insideAnd = false;
private bool _insideNot = false;
#endregion
}
}