#define WITHOUT_NETWORK_CREDENTIAL // with or without proxy server support
#define WITHOUT_LOGGING // with or without logging to Console
#define WITHOUT_MORE_LOGGING // with or without more logging messages
using System;
using System.Collections.Generic;
using System.Diagnostics; // StopWatch
using System.Globalization; // NumberStyles
using System.Net;
using System.Text.RegularExpressions;
namespace CPVanity
{
public class CPSite
{
// constants
private const string baseURL = "http://www.codeproject.com";
private const int WEB_TIMEOUT = 15000;
// variables
private uint memberID;
private string page;
private List<Article> articles;
private Dictionary<string, Regex> regexesOneRep = new Dictionary<string, Regex>();
private Regex regexOneRep1;
private Regex regexOneRep2;
public int ArticlesPageCount { get; private set; }
public int MessagesPageCount { get; private set; }
public int ReputationPageCount { get; private set; }
public CPSite(uint memberID)
{
this.memberID = memberID;
}
public uint MemberId
{
get
{
return memberID;
}
}
private static void log(string s)
{
#if WITH_LOGGING
s=DateTime.Now.ToString("HH:mm:ss ")+s;
Console.WriteLine(s);
#endif
}
private static void logMore(string s)
{
#if WITH_MORE_LOGGING
log(s);
#endif
}
private static void log(string title, Exception exc)
{
log(title);
foreach (string s in exc.ToString().Split('\n', '\r')) if (s.Length != 0) log(s);
}
public string BaseURL
{
get
{
return baseURL;
}
}
#if !WINDOWS_PHONE
public void Ping() {
try {
Dns.GetHostEntry("www.codeproject.com");
} catch (Exception exc) {
log("Failed to connect to "+baseURL, exc);
throw new Exception("Failed to connect to "+baseURL, exc);
}
}
#endif
#if WINDOWS_PHONE
public Uri GetBob()
{
//return downloadImage(baseURL+"/SiteRes/CP/Img/Std/logo225x90.gif");
return new Uri(baseURL + "/App_Themes/Std/Img/logo225x90.gif");
}
public Uri GetReputationGraph()
{
return new Uri(baseURL + "/script/Reputation/ReputationGraph.aspx?mid=" + memberID);
}
#else
public Bitmap GetBob() {
//return downloadImage(baseURL+"/SiteRes/CP/Img/Std/logo225x90.gif");
return downloadImage(baseURL+"/App_Themes/Std/Img/logo225x90.gif");
}
public Bitmap GetReputationGraph() {
return downloadImage(baseURL+"/script/Reputation/ReputationGraph.aspx?mid="+memberID);
}
#endif
public static string GetReputationFaqUrl()
{
return baseURL + "/script/Membership/Reputation.aspx";
}
#if WINDOWS_PHONE
public void GetArticlePage(Action<string> callback, Action<string> failure)
{
Debug.Assert(callback != null);
downloadPage("script/Articles/MemberArticles.aspx?amid=" + memberID, callback, failure);
}
#else
public string GetArticlePage() {
page=downloadPage("script/Articles/MemberArticles.aspx?amid="+memberID);
return page;
}
#endif
public string MemberPageUrl
{
get
{
return GetMemberPageUrl(memberID);
}
}
public static string GetMemberPageUrl(uint memberID)
{
// http://www.codeproject.com/script/Membership/view.aspx?mid=6556
return baseURL + "/script/Membership/view.aspx?mid=" + memberID.ToString("D7");
}
#if WINDOWS_PHONE
public void GetMemberPage(uint memberID, Action<string> callback, Action<string> failure)
{
Debug.Assert(callback != null);
downloadPage("script/Articles/MemberArticles.aspx?amid=" + memberID, callback, failure);
}
#else
public string GetMemberPage(uint memberID)
{
string URL = GetMemberPageUrl(memberID);
return downloadPage(URL);
}
#endif
public void ClearWhoIsWhoPageCounters()
{
MessagesPageCount = 0;
ArticlesPageCount = 0;
ReputationPageCount = 0;
}
#if WINDOWS_PHONE
// get a Who is Who page, sorted either by article count or by message count; page number in range [1,4]
public void GetWhoIsWhoPage(char type, int pageNumber, Action<string> callback, Action<string> failure)
{
Debug.Assert(callback != null);
string mlob = "";
switch (type)
{
case 'A':
mlob = "ArticleCount";
ArticlesPageCount++;
break;
case 'M':
mlob = "MessageCount";
MessagesPageCount++;
break;
case 'R':
mlob = "Reputation";
ReputationPageCount++;
break;
}
downloadPage("script/Membership/Profiles.aspx?ml_ob=" + mlob + "&pgnum=" + pageNumber, callback, failure);
}
#else
// get a Who is Who page, sorted either by article count or by message count; page number in range [1,4]
public string GetWhoIsWhoPage(char type, int pageNumber)
{
string mlob = "";
switch (type)
{
case 'A':
mlob = "ArticleCount";
ArticlesPageCount++;
break;
case 'M':
mlob = "MessageCount";
MessagesPageCount++;
break;
case 'R':
mlob = "Reputation";
ReputationPageCount++;
break;
}
page = downloadPage("script/Membership/Profiles.aspx?ml_ob=" + mlob + "&pgnum=" + pageNumber);
return page;
}
#endif
#if WINDOWS_PHONE
private void downloadPage(string URL, Action<string> callback, Action<string> failure)
{
if (!URL.StartsWith("http"))
URL = baseURL + "/" + URL;
WebClient client = new WebClient();
client.DownloadStringCompleted += new DownloadStringCompletedEventHandler(client_DownloadStringCompleted);
client.DownloadStringAsync(new Uri(URL), new WebClientCallbacks(callback, failure));
}
void client_DownloadStringCompleted(object sender, DownloadStringCompletedEventArgs e)
{
Debug.Assert(e.UserState is WebClientCallbacks);
var callback = (WebClientCallbacks)e.UserState;
try
{
page = e.Result;
callback.Success(page);
}
catch (Exception ex)
{
Debug.Assert(false);
page = "";
callback.Failure("Unable to reach codeproject. Make sure you have a data connection.");
}
finally
{
((WebClient)sender).DownloadStringCompleted -= new DownloadStringCompletedEventHandler(client_DownloadStringCompleted);
}
}
#endif
#if !WINDOWS_PHONE
public static Bitmap downloadImage(string URL)
{
try
{
HttpWebRequest req = CreateWebRequest(URL);
HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
log("resp.StatusCode=" + resp.StatusCode);
log("resp.ContentLength=" + resp.ContentLength);
// reputation graphs for nonexisting memberID's return a short web page
// containing an error string.
if (resp.ContentLength < 1000) return null;
using (Stream stream = resp.GetResponseStream())
{
return new Bitmap(stream);
}
}
catch (Exception exc)
{
log("Could not access " + URL, exc);
return null;
}
}
private string downloadPage(string URL) {
if (!URL.StartsWith("http")) URL=baseURL+"/"+URL;
try {
log("begin downloadPage "+URL);
#if false
WebClient wc=new WebClient();
string s=wc.DownloadString(URL);
log("downloadPage done; len="+(s.Length/1024)+"KB");
return s;
#else
HttpWebRequest req=CreateWebRequest(URL);
HttpWebResponse resp=(HttpWebResponse)req.GetResponse();
using (Stream stream=resp.GetResponseStream()) {
using (StreamReader reader=new StreamReader(stream)) {
string line;
StringBuilder sb=new StringBuilder(300000);
while ((line=reader.ReadLine())!=null) {
sb.AppendLine(line);
}
log("downloadPage done; len="+(sb.Length/1024)+"KB");
return sb.ToString();
}
}
#endif
} catch (Exception exc) {
log("Could not access "+URL, exc);
throw new Exception("Could not access "+URL, exc);
}
}
private static HttpWebRequest CreateWebRequest(string URL) {
HttpWebRequest req=(HttpWebRequest)WebRequest.Create(URL);
req.Timeout=WEB_TIMEOUT;
#if WITH_NETWORK_CREDENTIAL
// as suggested by Don Kackman
// http://www.codeproject.com/KB/dotnet/CPVanity.aspx?msg=3416457#xx3416457xx
CredentialCache myCache=new CredentialCache();
Uri proxy=new Uri("http://proxy.sitename.com:8080");
myCache.Add(proxy, "Basic", new NetworkCredential("username", "password"));
req.Proxy=new WebProxy(proxy, true, null, myCache.GetCredential(proxy, "Basic"));
#endif
return req;
}
#endif
public string GetName()
{
Match match = Regex.Match(page, @"Articles by [^\(]*\(");
string s = "";
if (match.Success)
{
s = match.Value;
log("s=" + s);
int idx = s.LastIndexOf("by") + "by ".Length;
s = match.Value.Substring(idx);
if (s.EndsWith("(")) s = s.Substring(0, s.Length - 1);
s = s.Trim();
}
log("GetName returns: " + s);
return s;
}
public string GetAdornedName()
{
log("GetAdornedName");
// looking for:
// <h1>Articles by <sup class="Error">C</sup>Pallini (Articles: 3, Answers: 389, Tip/Trick: 1)</h1>
//int idx1=page.IndexOf("<h1>");
//if (idx1>0) log("h1="+page.Substring(idx1, 100));
Match match = Regex.Match(page, "<h1>.*?</h1>");
string s = "";
if (match.Success)
{
s = match.Value;
s = s.Substring(4, s.Length - 9); // strip h1 tags
if (s.ToLower().StartsWith("articles by ")) s = s.Substring(12);
int idx = s.LastIndexOf(" (");
if (idx > 0) s = s.Substring(0, idx);
s = s.Trim();
}
log("GetAdornedName returns: " + s);
return s;
}
#if WINDOWS_PHONE
public void GetHonors(Action<List<string>> callback, Action<string> failure)
{
Debug.Assert(callback != null);
GetMemberPage(memberID, s => GetHonors(s, callback, failure), failure);
}
private void GetHonors(string page, Action<List<string>> callback, Action<string> failure)
{
// <img title="..." src="/script/Membership/Images/....gif" alt="" style="border-width:0px;" />
string pattern = @"src=""([a-z\./]*)""";
//log("pattern="+pattern);
MatchCollection matches = Regex.Matches(page, pattern, RegexOptions.IgnoreCase);
log("GetHonors: match count=" + matches.Count);
List<string> honors = new List<string>();
foreach (Match match in matches)
{
string s = match.Groups[1].Value;
if (s.StartsWith("/script/Membership/Images/") && !s.Contains("help.gif"))
{
s = baseURL + s;
log(s);
honors.Add(s);
}
}
callback(honors);
}
#else
public List<string> GetHonors()
{
page = GetMemberPage(memberID);
// <img title="..." src="/script/Membership/Images/....gif" alt="" style="border-width:0px;" />
string pattern = @"src=""([a-z\./]*)""";
//log("pattern="+pattern);
MatchCollection matches = Regex.Matches(page, pattern, RegexOptions.IgnoreCase);
log("GetHonors: match count=" + matches.Count);
List<string> honors = new List<string>();
foreach (Match match in matches)
{
string s = match.Groups[1].Value;
if (s.StartsWith("/script/Membership/Images/") && !s.Contains("help.gif"))
{
s = baseURL + s;
log(s);
honors.Add(s);
}
}
return honors;
}
#endif
public string GetAverageRating()
{
// example: <h4>Average article rating: 3.68</h4>
Match match = Regex.Match(page, @">Average article rating: [0-9\.]*");
string s = "";
if (match.Success)
{
s = match.Value;
int idx = s.LastIndexOf("rating: ") + "rating: ".Length;
s = s.Substring(idx).Trim();
}
log("Average Rating=" + s);
return s;
}
public List<Article> GetArticles()
{
// search <span id="ctl00_MC_AR_ctl00_MAS">
// or <span id="ctl00_MC_TR_ctl00_MAS">
articles = new List<Article>();
//string pattern = "<span id=\"ctl\\d*_MC_.R_ctl\\d*_MAS[\\s\\S]*?</div>\\s*?</span>";
string pattern = "<span id=\"ctl\\d*_MC_.R_ctl\\d*_CAR[\\s\\S]*?</div>\\s*?</span>";
MatchCollection matches = Regex.Matches(page, pattern,
RegexOptions.Multiline | RegexOptions.ExplicitCapture);
log("Match Count=" + matches.Count);
int idx;
Match m;
Regex rViews = new Regex("iew.*?: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rRatng = new Regex("ating: [0-9./]*", RegexOptions.ExplicitCapture);
Regex rVotes = new Regex("otes: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rBookmarks = new Regex("ookmark.*?: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rDownloads = new Regex("ownload.*?: [0-9,]*", RegexOptions.ExplicitCapture);
Regex rPopul = new Regex("opularity: [0-9.]*", RegexOptions.ExplicitCapture);
Regex rURL = new Regex("_Title\" href=\"([-a-zA-Z_/\\#0-9.:]*?)\">([\\s\\S]*?)<");
Regex rUpdat = new Regex("pdated.*?: <b>[0-9 a-zA-Z]*</b>", RegexOptions.ExplicitCapture);
foreach (Match match in matches)
{
string article = match.Value;
if (article.Contains("CP Vanity")) log("match=" + article);
//log("match="+article);
// view
int views = 0;
m = rViews.Match(article);
if (m.Success)
{
idx = m.Value.LastIndexOf(":") + 2;
bool OK = int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out views);
log("OK=" + OK + " \"" + m.Value.Substring(idx) + "\"");
}
// rating
string rating = "";
m = rRatng.Match(article);
if (m.Success)
{
idx = m.Value.LastIndexOf(":") + 2;
rating = m.Value.Substring(idx);
if (rating.StartsWith("0")) rating = ""; // tips without any vote
}
// votes
int votes = 0;
m = rVotes.Match(article);
if (m.Success)
{
idx = m.Value.LastIndexOf(":") + 2;
int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out votes);
}
// bookmarks
int bookmarks = 0;
m = rBookmarks.Match(article);
log("Bookmark Match=" + m.Value + " " + m.Success);
if (m.Success)
{
idx = m.Value.LastIndexOf(":") + 2;
int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out bookmarks);
}
// downloads
int downloads = 0;
m = rDownloads.Match(article);
log("Download Match=" + m.Value + " " + m.Success);
if (m.Success)
{
idx = m.Value.LastIndexOf(":") + 2;
int.TryParse(m.Value.Substring(idx), NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out downloads);
}
// popularity
float popularity = 0;
m = rPopul.Match(article);
if (m.Success)
{
idx = m.Value.LastIndexOf(":") + 2;
float.TryParse(m.Value.Substring(idx), NumberStyles.AllowDecimalPoint,
CultureInfo.InvariantCulture, out popularity);
}
// URL and title
string URL = "";
string title = "";
m = rURL.Match(article);
if (m.Success)
{
//log(m.Groups.Count.ToString()+" groups");
URL = m.Groups[1].Value;
if (URL.StartsWith("/")) URL = CPSite.baseURL + URL;
title = m.Groups[2].Value;
//log("URL="+URL);
log("title=" + title);
}
// updated
DateTime updated = DateTime.MinValue;
m = rUpdat.Match(article);
if (m.Success)
{
log(m.Value);
idx = m.Value.LastIndexOf(":") + 2;
var value = m.Value.Substring(idx).Replace("<b>", "").Replace("</b>", "");
bool OK = DateTime.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.None, out updated);
log("OK=" + OK + " \"" + m.Value.Substring(idx) + "\"");
log("updated=" + updated);
}
articles.Add(new Article(views, rating, votes, popularity, URL, title,
updated, bookmarks, downloads));
}
return articles;
}
public Dictionary<string, User> GetTopUsers(string page, bool excludeDebatorFromTotal)
{
Dictionary<string, User> users = new Dictionary<string, User>();
// <h4><a name="43"/></a>43. Vikram A Punathambekar</h4>
// ...
// <td class="small-text" valign="top" align="left">
// <b><span id="ctl00_MC_MR_ctl34_P_S"><table cellspacing="4">
// <td class="MemberSilver" title="Contributes articles: Silver"><div class="medium-text"><b>1,620</b></div>Author</td>
// <td class="MemberSilver" title="Answers questions: Silver"><div class="medium-text"><b>2,867</b></div>Authority</td>
// <td class="MemberNoStatus" title="Posts questions: No Status"><div class="medium-text"><b>31</b></div>Enquirer</td>
// <td class="MemberGold" title="Organises and retags content: Gold"><div class="medium-text"><b>1,866</b></div>Organiser</td>
// </tr><tr>
// <td class="MemberBronze" title="General participation with the site: Bronze"><div class="medium-text"><b>278</b></div>Participant</td>
// <td class="MemberPlatinum" title="Activity in the non-programming forums: Platinum"><div class="medium-text"><b>18,068</b></div>Debator</td>
// <td class="MemberPlatinum" title="Total: Platinum"><div class="medium-text"><b>24,730</b></div>Total</td><td colspan="1"></td>
// </tr></table></span></b>
//string pattern="<span id=\"ctl\\d*_MC_.R_ctl\\d*_MAS[\\s\\S]*?</span>";
Stopwatch sw = new Stopwatch();
sw.Start();
Regex regexUntag = new Regex("<[\\s\\S]*?>", RegexOptions.Compiled);
#if true
// <h2 id="ctl00_MC_MR_ctl02_P_Name">Nishant Sivakumar</h2>
Regex regexName = new Regex("<h2[\\s\\S]*?>([\\s\\S]*)</h2>", RegexOptions.Multiline);
// <table class="member-profile">
MatchCollection matches = Regex.Matches(page, "member-profile[\\s\\S]*?</hr>",
RegexOptions.Multiline | RegexOptions.ExplicitCapture);
#else
Regex regexName=new Regex("<h4>[\\s\\S]*nbsp;([\\s\\S]*?)</h4>", RegexOptions.Multiline);
MatchCollection matches=Regex.Matches(page, "<h4[\\s\\S]*?</hr>",
RegexOptions.Multiline | RegexOptions.ExplicitCapture);
#endif
log("Match Count=" + matches.Count);
foreach (Match match in matches)
{
string s = match.Value;
// <a id="ctl00_MC_MR_ctl18_P_Bl" href="/script/Membership/View.aspx?mid=1972989">View Member's Profile page</a>
//Match m1=Regex.Match(
bool MVP = s.Contains("Images/MVP");
string name = "name";
uint memberID = 0;
Match m1 = regexName.Match(s);
if (m1.Success)
{
logMore("group0=" + m1.Groups[0].Value);
logMore("group1=" + m1.Groups[1].Value);
name = regexUntag.Replace(m1.Groups[1].Value, "");
name = name.Replace("\r", "");
name = name.Replace("\n", "");
name = name.Trim();
log("name='" + name + "' MVP=" + MVP);
}
if (!users.ContainsKey(name))
{
int i = s.IndexOf("Member No. ");
if (i > 0)
{
i += 11;
string sMemberID = s.Substring(i, 10);
int j = sMemberID.IndexOfAny(" &\r\n".ToCharArray());
if (j > 0) sMemberID = sMemberID.Substring(0, j);
uint.TryParse(sMemberID,
NumberStyles.AllowThousands | NumberStyles.AllowTrailingWhite,
CultureInfo.InvariantCulture, out memberID);
}
#if false
i=s.IndexOf("eputation");
if (i>0) s=s.Substring(i-1);
i=s.IndexOf("wards");
if (i>0) s=s.Substring(0, i);
#endif
User user = new User();
user.Name = name;
user.MemberID = memberID;
if (regexOneRep1 == null)
{
regexOneRep1 = new Regex("<td class=\"member[\\s\\S]*?</td>",
RegexOptions.Multiline | RegexOptions.IgnoreCase);
regexOneRep2 = new Regex("<b>([\\s\\S]*)</b></div>", RegexOptions.Multiline);
}
#if false
<td class="member
<h2 id="ctl00_MC_MR_ctl00_P_Name">Christian Graus</h2>
<table class="member-rep-list"><tr valign="top">
<td><span id="ctl00_MC_MR_ctl00_P_Status"><table cellspacing="4">
<td class="member-rep-box platinum" title="Contributes articles: Platinum">
<div class="medium-text"><b>14,615</b></div>
<a href="/script/Membership/Reputation.aspx#Author">Author</a></td>
<td class="member-rep-box platinum" title="Answers questions: Platinum">
<div class="medium-text"><b>215,464</b></div>
<a href="/script/Membership/Reputation.aspx#Authority">Authority</a></td>
<td class="member-rep-box platinum" title="Activity in the non-programming forums: Platinum"><div class="medium-text"><b>99,098</b></div><a href="/script/Membership/Reputation.aspx#Debator">Debator</a></td><td class="member-rep-box silver" title="Edits and improves content (articles, tips/tricks, questions, answers): Silver"><div class="medium-text"><b>1,022</b></div><a href="/script/Membership/Reputation.aspx#Editor">Editor</a></td><td class="member-rep-box bronze" title="Posts questions: Bronze"><div class="medium-text"><b>164</b></div><a href="/script/Membership/Reputation.aspx#Enquirer">Enquirer</a></td><td class="member-rep-box silver" title="Organises and retags content: Silver"><div class="medium-text"><b>675</b></div><a href="/script/Membership/Reputation.aspx#Organiser">Organiser</a></td><td class="member-rep-box platinum" title="General participation with the site: Platinum"><div class="medium-text"><b>7,329</b></div><a href="/script/Membership/Reputation.aspx#Participant">Participant</a></td></tr><tr><td colspan="7"></td></tr></table></span></td>
</tr></table>
#endif
MatchCollection matches1 = regexOneRep1.Matches(s);
logMore("matches1.Count=" + matches1.Count);
int maxColor = 0;
foreach (Match match1 in matches1)
{
string s1 = match1.Value;
string s1LC = s1.ToLower();
logMore("s1=" + s1);
Match match2 = regexOneRep2.Match(s1);
if (!match2.Success) continue;
string s2 = match2.Value;
logMore("s2=" + s2 + " group1=" + match2.Groups[1].Value);
int rep = 0;
int color = 0;
int.TryParse(match2.Groups[1].Value, NumberStyles.AllowThousands,
CultureInfo.InvariantCulture, out rep);
if (s1LC.Contains("memberbronze") || s1LC.Contains("box bronze")) color = 1;
else if (s1LC.Contains("membersilver") || s1LC.Contains("box silver")) color = 2;
else if (s1LC.Contains("membergold") || s1LC.Contains("box gold")) color = 3;
else if (s1LC.Contains("memberplatinum") || s1LC.Contains("box platinum")) color = 4;
if (color > maxColor) maxColor = color;
logMore(name + "=" + rep + " color=" + color + " s2=" + s2);
if (s1LC.Contains("authority"))
{
user.Authority = rep;
user.AuthorityColor = color;
}
else if (s1LC.Contains("author"))
{
user.Author = rep;
user.AuthorColor = color;
}
else if (s1LC.Contains("debator"))
{
user.Debator = rep;
user.DebatorColor = color;
}
else if (s1LC.Contains("editor"))
{
user.Editor = rep;
user.EditorColor = color;
}
else if (s1LC.Contains("enquirer"))
{
user.Enquirer = rep;
user.EnquirerColor = color;
}
else if (s1LC.Contains("organiser"))
{
user.Organiser = rep;
user.OrganiserColor = color;
}
else if (s1LC.Contains("participant"))
{
user.Participant = rep;
user.ParticipantColor = color;
}
else if (s1LC.Contains("total"))
{
user.Total = rep;
user.TotalColor = color;
}
}
if (user.Total == 0)
{
int total = 0;
for (int ii = 0; ii < 7; ii++)
{
if (ii != 2 || !excludeDebatorFromTotal) total += user.GetReputationComponentValue(ii);
}
user.Total = total;
//user.TotalColor=maxColor;
}
user.MVP = MVP;
users.Add(name, user);
}
//if (users.Count>=20) break;
}
sw.Stop();
log(sw.ElapsedMilliseconds.ToString() + " msecs in GetTopUsers");
return users;
}
}
}