|
using System;
using System.Linq;
using System.Net;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Documents;
using System.Windows.Ink;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Animation;
using System.Windows.Shapes;
using HtmlAgilityPack;
using System.Diagnostics;
using System.Collections.Generic;
namespace CPPostsAnalyzerWP7.Models
{
public class PostsFetcher
{
private string memberId;
public PostsFetcher(string memberId)
{
this.memberId = memberId.Trim();
}
public event EventHandler<PostInfoEventArgs> PostFetched;
public event EventHandler<FetchCompletedEventArgs> FetchCompleted;
private void FirePostFetched(PostInfoEventArgs e)
{
var handler = this.PostFetched;
if (handler != null)
{
handler(this, e);
}
}
private void FireFetchCompleted(FetchCompletedEventArgs e)
{
var handler = this.FetchCompleted;
if (handler != null)
{
handler(this, e);
}
}
private int nextPage = 1;
private const int maxPage = 4;
public void Fetch()
{
int temp;
if (Int32.TryParse(this.memberId, out temp))
{
LoadNextPageAsync();
}
else
{
FireFetchCompleted(new FetchCompletedEventArgs() { Error = new ArgumentException("Invalid memberId.") });
}
}
private void LoadNextPageAsync()
{
HtmlWeb.LoadAsync(String.Format("http://www.codeproject.com/script/Forums/Messages.aspx?fmid={0}&fid=0&pgnum={1}", this.memberId, nextPage++), HtmlLoaded);
}
private void HtmlLoaded(object sender, HtmlDocumentLoadCompleted e)
{
if (e.Error != null)
{
FireFetchCompleted(new FetchCompletedEventArgs() { Error = e.Error });
return;
}
try
{
ParseHtml(e);
}
catch (Exception ex)
{
FireFetchCompleted(new FetchCompletedEventArgs() { Error = ex });
}
if (nextPage > maxPage)
{
var args = new FetchCompletedEventArgs();
foreach (var item in forumTypeCountMap)
{
args.ForumTypeSummaries.Add(new PostSummaryInfo() { ForumType = item.Key, Count = item.Value, ForumName = String.Empty });
}
foreach (var item in forumPostsCountMap)
{
args.PostSummaries.Add(new PostSummaryInfo() { ForumType = ForumType.Unknown, Count = item.Value, ForumName = item.Key });
}
FireFetchCompleted(args);
}
else
{
LoadNextPageAsync();
}
}
private Dictionary<ForumType, int> forumTypeCountMap = new Dictionary<ForumType, int>();
private Dictionary<string, int> forumPostsCountMap = new Dictionary<string, int>();
private void ParseHtml(HtmlDocumentLoadCompleted e)
{
var tableNode = e.Document.DocumentNode.DescendantNodes().Where(
n => n.Name.ToLower() == "table"
&& n.Attributes["cellspacing"] != null
&& n.Attributes["cellspacing"].Value == "4").FirstOrDefault();
if (tableNode == null)
return;
var trNodes = tableNode.Descendants("tr");
foreach (var tdNode in trNodes)
{
var aNode = tdNode.Descendants("a").FirstOrDefault();
if (aNode == null)
continue;
PostInfo postInfo = new PostInfo();
postInfo.ThreadName = aNode.InnerText.Trim();
var divNodes = tdNode.Descendants("div").Where(
n => n.Attributes["class"] != null
&& n.Attributes["class"].Value == "small-text subdue");
if (divNodes.Count() == 2)
{
var divNodesArray = divNodes.ToArray();
string nameAndTime = divNodesArray[0].InnerText.Trim();
int byPos = nameAndTime.IndexOf("by");
int atPos = nameAndTime.LastIndexOf("at");
if (byPos == -1 || atPos == -1)
continue;
postInfo.DisplayName = nameAndTime.Substring(byPos + 2, atPos - byPos - 2).Trim();
postInfo.TimeString = nameAndTime.Substring(atPos + 2).Trim();
string[] forumLines = divNodesArray[1].InnerText.Trim().Split('\r', '\n').Where(s => !String.IsNullOrEmpty(s.Trim('\r', '\n'))).ToArray();
if (forumLines.Length < 1 || forumLines.Length > 2)
continue;
string forumNameInput = forumLines.Length == 1 ? "(Untitled)" : forumLines[0];
string forumTypeInput = forumLines.Length == 1 ? forumLines[0] : forumLines[1];
postInfo.ForumName = forumNameInput.Trim();
int leftBracketPost = forumTypeInput.IndexOf('(');
int rightBracketPost = forumTypeInput.IndexOf(')');
if (leftBracketPost == -1 || rightBracketPost == -1)
continue;
ForumType forumType = ForumType.Unknown;
try
{
string enumLine = forumTypeInput.Substring(leftBracketPost + 1, rightBracketPost - leftBracketPost - 1).Trim();
forumType = (ForumType)Enum.Parse(typeof(ForumType), enumLine.Trim(), true);
}
catch (ArgumentException)
{
}
postInfo.ForumType = forumType;
}
if (!forumTypeCountMap.ContainsKey(postInfo.ForumType))
{
forumTypeCountMap[postInfo.ForumType] = 1;
}
else
{
forumTypeCountMap[postInfo.ForumType]++;
}
if (!forumPostsCountMap.ContainsKey(postInfo.ForumName))
{
forumPostsCountMap[postInfo.ForumName] = 1;
}
else
{
forumPostsCountMap[postInfo.ForumName]++;
}
FirePostFetched(new PostInfoEventArgs() { PostInfo = postInfo });
}
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
Nish Nishant is a technology enthusiast from Columbus, Ohio. He has over 20 years of software industry experience in various roles including Chief Technology Officer, Senior Solution Architect, Lead Software Architect, Principal Software Engineer, and Engineering/Architecture Team Leader. Nish is a 14-time recipient of the Microsoft Visual C++ MVP Award.
Nish authored C++/CLI in Action for Manning Publications in 2005, and co-authored Extending MFC Applications with the .NET Framework for Addison Wesley in 2003. In addition, he has over 140 published technology articles on CodeProject.com and another 250+ blog articles on his WordPress blog. Nish is experienced in technology leadership, solution architecture, software architecture, cloud development (AWS and Azure), REST services, software engineering best practices, CI/CD, mentoring, and directing all stages of software development.
Nish's Technology Blog :
voidnish.wordpress.com