#define WITHOUT_NETWORK_CREDENTIAL // with or without proxy server support
#define WITH_LOGGING // with or without logging to Console
#define WITHOUT_MORE_LOGGING // with or without more logging messages
#define WITHOUT_MEMBER_LIMIT // with or without limited number of members
using System;
using System.Collections.Generic;
using System.Diagnostics; // StopWatch
using System.Drawing;
using System.Globalization; // NumberStyles
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace CPVanity {
public class CPSite {
// constants
private static string baseURL="http://www.codeproject.com";
private const int WEB_TIMEOUT=15000;
// variables
private uint memberID;
private string page;
private List<Article> articles;
private Dictionary<string, Regex> regexesOneRep=new Dictionary<string, Regex>();
private Regex regexOneRep1;
private Regex regexOneRep2;
private Regex regexCounter1;
private Regex regexCounter2;
private WebClient client;
public int ArticlesPageCount { get; private set; }
public int MessagesPageCount { get; private set; }
public int ReputationPageCount { get; private set; }
public CPSite(uint memberID) {
this.memberID=memberID;
client=new WebClient();
#if WITH_NETWORK_CREDENTIAL
// based on a suggestion by Don Kackman
// http://www.codeproject.com/KB/dotnet/CPVanity.aspx?msg=3416457#xx3416457xx
CredentialCache myCache=new CredentialCache();
Uri proxy=new Uri("http://proxy.sitename.com:8080");
myCache.Add(proxy, "Basic", new NetworkCredential("username", "password"));
client.Proxy=new WebProxy(proxy, true, null, myCache.GetCredential(proxy, "Basic"));
#endif
}
private static void log(string s) {
#if WITH_LOGGING
s=DateTime.Now.ToString("HH:mm:ss ")+s;
Console.WriteLine(s);
#endif
}
private static void logMore(string s) {
#if WITH_MORE_LOGGING
log(s);
#endif
}
private static void log(string title, Exception exc) {
log(title);
foreach (string s in exc.ToString().Split('\n', '\r')) if (s.Length!=0) log(s);
}
public string BaseURL {
get {
return baseURL;
}
}
public void Ping() {
try {
Dns.GetHostEntry("www.codeproject.com");
} catch (Exception exc) {
log("Failed to connect to "+baseURL, exc);
throw new Exception("Failed to connect to "+baseURL, exc);
}
}
public Bitmap GetBob() {
//return downloadImage(baseURL+"/SiteRes/CP/Img/Std/logo225x90.gif");
return DownloadImage(baseURL+"/App_Themes/Std/Img/logo225x90.gif");
}
public Bitmap GetReputationGraph() {
return DownloadImage(baseURL+"/script/Reputation/ReputationGraph.aspx?mid="+memberID);
}
public static string GetReputationFaqUrl() {
return baseURL+"/script/Membership/Reputation.aspx";
}
public string GetArticlePage() {
page=downloadPage("script/Articles/MemberArticles.aspx?amid="+memberID);
return page;
}
public static string GetMemberPageUrl(uint memberID) {
// http://www.codeproject.com/script/Membership/view.aspx?mid=6556
return baseURL+"/script/Membership/view.aspx?mid="+memberID.ToString("D7");
}
public string GetMemberPage(uint memberID) {
string URL=GetMemberPageUrl(memberID);
return downloadPage(URL);
}
public void ClearWhoIsWhoPageCounters() {
MessagesPageCount=0;
ArticlesPageCount=0;
ReputationPageCount=0;
}
// get a Who is Who page, sorted either by article count or by message count; page number in range [1,4]
public string GetWhoIsWhoPage(char type, int pageNumber) {
string mlob="";
switch (type) {
case 'A':
mlob="ArticleCount";
ArticlesPageCount++;
break;
case 'M':
mlob="MessageCount";
MessagesPageCount++;
break;
case 'R':
mlob="Reputation";
ReputationPageCount++;
break;
}
page=downloadPage("script/Membership/Profiles.aspx?ml_ob="+mlob+"&pgnum="+pageNumber);
return page;
}
public Bitmap DownloadImage(string URL) {
try {
HttpWebRequest req=CreateWebRequest(URL);
HttpWebResponse resp=(HttpWebResponse)req.GetResponse();
log("resp.StatusCode="+resp.StatusCode);
log("resp.ContentLength="+resp.ContentLength);
// reputation graphs for nonexisting memberID's return a short web page
// containing an error string.
if (resp.ContentLength<1000) return null;
using (Stream stream=resp.GetResponseStream()) {
return new Bitmap(stream);
}
} catch (Exception exc) {
log("Could not access "+URL, exc);
return null;
}
}
private string downloadPage(string URL) {
if (!URL.StartsWith("http")) URL=baseURL+"/"+URL;
try {
log("begin downloadPage "+URL);
HttpWebRequest req=CreateWebRequest(URL);
HttpWebResponse resp=(HttpWebResponse)req.GetResponse();
using (Stream stream=resp.GetResponseStream()) {
using (StreamReader reader=new StreamReader(stream)) {
string line;
StringBuilder sb=new StringBuilder(300000);
while ((line=reader.ReadLine())!=null) {
sb.AppendLine(line);
}
log("downloadPage done; len="+(sb.Length/1024)+"KB");
return sb.ToString();
}
}
} catch (Exception exc) {
log("Could not access "+URL, exc);
throw new Exception("Could not access "+URL, exc);
}
}
private static HttpWebRequest CreateWebRequest(string URL) {
HttpWebRequest req=(HttpWebRequest)WebRequest.Create(URL);
req.Timeout=WEB_TIMEOUT;
#if WITH_NETWORK_CREDENTIAL
// as suggested by Don Kackman
// http://www.codeproject.com/KB/dotnet/CPVanity.aspx?msg=3416457#xx3416457xx
CredentialCache myCache=new CredentialCache();
Uri proxy=new Uri("http://proxy.sitename.com:8080");
myCache.Add(proxy, "Basic", new NetworkCredential("username", "password"));
req.Proxy=new WebProxy(proxy, true, null, myCache.GetCredential(proxy, "Basic"));
#endif
return req;
}
public string GetName() {
Match match=Regex.Match(page, @"Articles by [^\(]*\(");
string s="";
if (match.Success) {
s=match.Value;
log("s="+s);
int idx=s.LastIndexOf("by")+"by ".Length;
s=match.Value.Substring(idx);
if (s.EndsWith("(")) s=s.Substring(0, s.Length-1);
s=s.Trim();
}
log("GetName returns: "+s);
return s;
}
public string GetAdornedName() {
log("GetAdornedName");
// looking for:
// <h1>Articles by <sup class="Error">C</sup>Pallini (Articles: 3, Answers: 389, Tip/Trick: 1)</h1>
//int idx1=page.IndexOf("<h1>");
//if (idx1>0) log("h1="+page.Substring(idx1, 100));
Match match=Regex.Match(page, "<h1>.*?</h1>");
string s="";
if (match.Success) {
s=match.Value;
s=s.Substring(4, s.Length-9); // strip h1 tags
if (s.ToLower().StartsWith("articles by ")) s=s.Substring(12);
int idx=s.LastIndexOf(" (");
if (idx>0) s=s.Substring(0, idx);
s=s.Trim();
}
log("GetAdornedName returns: "+s);
return s;
}
public List<string> GetHonors() {
page=GetMemberPage(memberID);
// <img title="..." src="/script/Membership/Images/....gif" alt="" style="border-width:0px;" />
string pattern=@"src=""([a-z\./]*)""";
//log("pattern="+pattern);
MatchCollection matches=Regex.Matches(page, pattern, RegexOptions.IgnoreCase);
log("GetHonors: match count="+matches.Count);
List<string> honors=new List<string>();
foreach (Match match in matches) {
string s=match.Groups[1].Value;
if (s.StartsWith("/script/Membership/Images/") && !s.Contains("help.gif")) {
s=baseURL+s;
log(s);
honors.Add(s);
}
}
return honors;
}
public string GetAverageRating() {
// example: <h4>Average article rating: 3.68</h4>
Match match=Regex.Match(page, @">Average article rating: [0-9\.]*");
string s="";
if (match.Success) {
s=match.Value;
int idx=s.LastIndexOf("rating: ")+"rating: ".Length;
s=s.Substring(idx).Trim();
}
log("Average Rating=" + s);
return s;
}
public List<Article> GetArticles() {
// search <span id="ctl00_MC_AR_ctl00_MAS">
// or <span id="ctl00_MC_TR_ctl00_MAS">
articles=new List<Article>();
string pattern="<span id=\"ctl\\d*_MC_.R_ctl\\d*_MAS[\\s\\S]*?</div>\\s*?</span>";
MatchCollection matches=Regex.Matches(page, pattern,
RegexOptions.Multiline | RegexOptions.ExplicitCapture);
log("Match Count="+matches.Count);
int idx;
Match m;
Regex rViews=new Regex("iew.*?: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rRatng=new Regex("ating: [0-9./]*", RegexOptions.ExplicitCapture);
Regex rVotes=new Regex("otes: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rBookmarks=new Regex("ookmark.*?: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rDownloads=new Regex("ownload.*?: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rPopul=new Regex("opularity: [0-9.]*", RegexOptions.ExplicitCapture);
Regex rURL= new Regex("_AT\" href=\"([-a-zA-Z_/\\#0-9.:]*?)\">([\\s\\S]*?)<");
Regex rUpdat=new Regex("pdate.*?: [0-9 a-zA-Z]*", RegexOptions.ExplicitCapture);
foreach (Match match in matches) {
string article=match.Value;
if (article.Contains("CP Vanity")) log("match="+article);
//log("match="+article);
// view
int views=0;
m=rViews.Match(article);
if (m.Success) {
idx=m.Value.LastIndexOf(":")+2;
bool OK=int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out views);
log("OK="+OK+" \""+m.Value.Substring(idx)+"\"");
}
// rating
string rating="";
m=rRatng.Match(article);
if (m.Success) {
idx=m.Value.LastIndexOf(":")+2;
rating=m.Value.Substring(idx);
if (rating.StartsWith("0")) rating=""; // tips without any vote
}
// votes
int votes=0;
m=rVotes.Match(article);
if (m.Success) {
idx=m.Value.LastIndexOf(":") + 2;
int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out votes);
}
// bookmarks
int bookmarks=0;
m=rBookmarks.Match(article);
log("Bookmark Match="+m.Value+" "+m.Success);
if (m.Success) {
idx=m.Value.LastIndexOf(":") + 2;
int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out bookmarks);
}
// downloads
int downloads=0;
m=rDownloads.Match(article);
log("Download Match="+m.Value+" "+m.Success);
if (m.Success) {
idx=m.Value.LastIndexOf(":") + 2;
int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out downloads);
}
// popularity
float popularity=0;
m=rPopul.Match(article);
if (m.Success) {
idx=m.Value.LastIndexOf(":") + 2;
float.TryParse(m.Value.Substring(idx),NumberStyles.AllowDecimalPoint,
CultureInfo.InvariantCulture, out popularity);
}
// URL and title
string URL="";
string title="";
m=rURL.Match(article);
if (m.Success) {
//log(m.Groups.Count.ToString()+" groups");
URL=m.Groups[1].Value;
if (URL.StartsWith("/")) URL=CPSite.baseURL+URL;
title=m.Groups[2].Value;
//log("URL="+URL);
log("title="+title);
}
// updated
DateTime updated=DateTime.MinValue;
m=rUpdat.Match(article);
if (m.Success) {
log(m.Value);
idx=m.Value.LastIndexOf(":") + 2;
bool OK=DateTime.TryParse(m.Value.Substring(idx), CultureInfo.InvariantCulture,
DateTimeStyles.None, out updated);
log("OK="+OK+" \""+m.Value.Substring(idx)+"\"");
log("updated="+updated);
}
articles.Add(new Article(views, rating, votes, popularity, URL, title,
updated, bookmarks, downloads));
}
return articles;
}
public Dictionary<string, Member> GetTopMembers(string page, bool excludeDebatorFromTotal) {
Dictionary<string, Member> members=new Dictionary<string, Member>();
// <h4><a name="43"/></a>43. Vikram A Punathambekar</h4>
// ...
// <td class="small-text" valign="top" align="left">
// <b><span id="ctl00_MC_MR_ctl34_P_S"><table cellspacing="4">
// <td class="MemberSilver" title="Contributes articles: Silver"><div class="medium-text"><b>1,620</b></div>Author</td>
// <td class="MemberSilver" title="Answers questions: Silver"><div class="medium-text"><b>2,867</b></div>Authority</td>
// <td class="MemberNoStatus" title="Posts questions: No Status"><div class="medium-text"><b>31</b></div>Enquirer</td>
// <td class="MemberGold" title="Organises and retags content: Gold"><div class="medium-text"><b>1,866</b></div>Organiser</td>
// </tr><tr>
// <td class="MemberBronze" title="General participation with the site: Bronze"><div class="medium-text"><b>278</b></div>Participant</td>
// <td class="MemberPlatinum" title="Activity in the non-programming forums: Platinum"><div class="medium-text"><b>18,068</b></div>Debator</td>
// <td class="MemberPlatinum" title="Total: Platinum"><div class="medium-text"><b>24,730</b></div>Total</td><td colspan="1"></td>
// </tr></table></span></b>
//string pattern="<span id=\"ctl\\d*_MC_.R_ctl\\d*_MAS[\\s\\S]*?</span>";
Stopwatch sw=new Stopwatch();
sw.Start();
Regex regexUntag=new Regex("<[\\s\\S]*?>", RegexOptions.Compiled);
// <h2 id="ctl00_MC_MR_ctl02_P_Name">Nishant Sivakumar</h2>
Regex regexName=new Regex("<h2[\\s\\S]*?>([\\s\\S]*)</h2>", RegexOptions.Multiline);
// <table class="member-profile">
MatchCollection matches=Regex.Matches(page, "member-profile[\\s\\S]*?</hr>",
RegexOptions.Multiline | RegexOptions.ExplicitCapture);
log("Match Count="+matches.Count);
foreach (Match match in matches) { // enumerate all members on this page
string memberHtml=match.Value;
// <a id="ctl00_MC_MR_ctl18_P_Bl" href="/script/Membership/View.aspx?mid=1972989">View Member's Profile page</a>
//Match m1=Regex.Match(
bool MVP=memberHtml.Contains("Images/MVP");
string name="name";
uint memberID=0;
Match m1=regexName.Match(memberHtml);
if (m1.Success) {
logMore("group0="+m1.Groups[0].Value);
logMore("group1="+m1.Groups[1].Value);
name=regexUntag.Replace(m1.Groups[1].Value, "");
name=name.Replace("\r", "");
name=name.Replace("\n", "");
name=name.Trim();
log("name='"+name+"' MVP="+MVP);
}
if (!members.ContainsKey(name)) {
int i=memberHtml.IndexOf("Member No. ");
if (i>0) {
i+=11;
string sMemberID=memberHtml.Substring(i, 10);
int j=sMemberID.IndexOfAny(" &\r\n".ToCharArray());
if (j>0) sMemberID=sMemberID.Substring(0, j);
uint.TryParse(sMemberID,
NumberStyles.AllowThousands|NumberStyles.AllowTrailingWhite,
CultureInfo.InvariantCulture, out memberID);
}
Member member=new Member();
member.Name=name;
member.MemberID=memberID;
if (regexOneRep1==null) {
regexOneRep1=new Regex("<td class=\"member[\\s\\S]*?</td>",
RegexOptions.Multiline|RegexOptions.IgnoreCase);
regexOneRep2=new Regex("<b>([\\s\\S]*)</b></div>", RegexOptions.Multiline);
}
MatchCollection matches1=regexOneRep1.Matches(memberHtml);
logMore("matches1.Count="+matches1.Count);
int maxColor=0;
foreach(Match match1 in matches1) {
string s1=match1.Value;
string s1LC=s1.ToLower();
logMore("s1="+s1);
Match match2=regexOneRep2.Match(s1);
if (!match2.Success) continue;
string s2=match2.Value;
logMore("s2="+s2+" group1="+match2.Groups[1].Value);
int rep=0;
int color=0;
int.TryParse(match2.Groups[1].Value, NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out rep);
if (s1LC.Contains("memberbronze")||s1LC.Contains("box bronze")) color=1;
else if (s1LC.Contains("membersilver")||s1LC.Contains("box silver")) color=2;
else if (s1LC.Contains("membergold")||s1LC.Contains("box gold")) color=3;
else if (s1LC.Contains("memberplatinum")||s1LC.Contains("box platinum")) color=4;
if (color>maxColor) maxColor=color;
logMore(name+"="+rep+" color="+color+" s2="+s2);
if (s1LC.Contains("authority")) {
member.Authority=rep;
member.AuthorityColor=color;
} else if (s1LC.Contains("author")) {
member.Author=rep;
member.AuthorColor=color;
} else if (s1LC.Contains("debator")) {
member.Debator=rep;
member.DebatorColor=color;
} else if (s1LC.Contains("editor")) {
member.Editor=rep;
member.EditorColor=color;
} else if (s1LC.Contains("enquirer")) {
member.Enquirer=rep;
member.EnquirerColor=color;
} else if (s1LC.Contains("organiser")) {
member.Organiser=rep;
member.OrganiserColor=color;
} else if (s1LC.Contains("participant")) {
member.Participant=rep;
member.ParticipantColor=color;
} else if (s1LC.Contains("total")) {
member.Total=rep;
member.TotalColor=color;
}
}
if (member.Total==0) {
int total=0;
for (int ii=0; ii<7; ii++) {
if (ii!=2 || !excludeDebatorFromTotal) total+=member.GetReputationComponentValue(ii);
}
member.Total=total;
//member.TotalColor=maxColor;
}
if (regexCounter1==null) {
// <td><b><a id="ctl00_MC_MR_ctl00_P_ArticleContributionLink" href="/sc...">Articles</a></b>
// </td><td nowrap="nowrap">37 (Legend)</td></tr>
regexCounter1=new Regex("<td><b><a[\\s\\S]*?</tr>",
RegexOptions.Multiline|RegexOptions.IgnoreCase);
regexCounter2=new Regex("\">([\\s\\S]*?)</a>[\\s\\S]*>\\s*([\\d,]+)",
RegexOptions.Multiline|RegexOptions.IgnoreCase);
}
MatchCollection matches2=regexCounter1.Matches(memberHtml);
logMore("matches2.Count="+matches2.Count);
foreach (Match match2 in matches2) {
//logMore(match2.Value);
Match match2a=regexCounter2.Match(match2.Value);
string counterName=match2a.Groups[1].ToString();
string counterValue=match2a.Groups[2].ToString();
logMore("counter: "+counterName+"="+counterValue);
int val=0;
int.TryParse(counterValue, NumberStyles.AllowThousands, null, out val);
if (counterName.Contains("rticle")) member.ArticleCount=val;
if (counterName.Contains("essage")) member.MessageCount=val;
if (counterName.Contains("uestions")) member.QuestionCount=val;
if (counterName.Contains("nswers")) member.AnswerCount=val;
if (counterName.Contains("ips")) member.TipCount=val;
if (counterName.Contains("logs")) member.BlogCount=val;
if (counterName.Contains("omments")) member.CommentCount=val;
}
member.MVP=MVP;
members.Add(name, member);
#if WITH_MEMBER_LIMIT
if (members.Count>=3) break;
#endif
}
//if (members.Count>=20) break;
}
sw.Stop();
log(sw.ElapsedMilliseconds.ToString()+" msecs in GetTopMembers");
return members;
}
}
}