|
using System;
using System.Collections.Generic;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.Threading;
using System.IO;
using System.Net;
namespace CodeProjectArticleEditor
{
class CPExtractor
{
// --------------------------------------------
// CodeProjectArticleEditor > CPExtractor.cs
// --------------------------------------------
// CodeProject Article Editor
// Huseyin Atasoy
// atasoyweb.net [at] gmail.com
// www.atasoyweb.net
// September 2012
// --------------------------------------------
public Action After_LoadLoginPage = null;
public Action After_Login = null;
public Action After_LoadArticleList = null;
public Action After_LoadAnArticle = null;
public Action After_LoadLogoutPage = null;
private bool _isLoggedIn = false;
public bool isLoggedIn
{
get { return _isLoggedIn; }
}
private string _loginInfo = null;
public string loginInfo
{
get { return _loginInfo; }
}
private string _memberID;
public string memberID
{
get { return _memberID; }
}
private string _memberFullName;
public string memberFullName
{
get { return _memberFullName; }
}
private string _memberName;
public string memberName
{
get { return _memberName; }
}
private string _avgArRating;
public string avgArRating
{
get { return _avgArRating; }
}
private int _articleCount;
public int articleCount
{
get { return _articleCount; }
}
private string _lastLoadedArticleContent;
public string lastLoadedArticleContent
{
get { return _lastLoadedArticleContent; }
}
public enum NavigatedFor
{
Nothing,
LoadLoginPage,
Login,
LoadArticleList,
LoadAnArticle,
LoadLogoutPage
}
private NavigatedFor _navigatedFor = NavigatedFor.Nothing;
public NavigatedFor navigatedFor
{
get { return _navigatedFor; }
}
public struct Article
{
public string id;
public string name;
public string info;
public Article(string id, string name, string info)
{
this.id = id;
this.name = name;
this.info = info;
}
}
private List<Article> _articles;
public List<Article> articles
{
get { return _articles; }
}
private WebBrowser webBrowser;
private frmMain actionInvoker;
public CPExtractor(frmMain actionInvoker)
{
this.actionInvoker = actionInvoker;
ClickSoundDisabler.disableClickSound();
webBrowser = new WebBrowser();
webBrowser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(webBrowser_DocumentCompleted);
}
private string email, password;
public void login(string email, string password)
{
this.email = email;
this.password = password;
_navigatedFor = NavigatedFor.LoadLoginPage;
webBrowser.Navigate("https://www.codeproject.com/script/Membership/LogOn.aspx?rp=/¥ilemesiicin=" + Guid.NewGuid().ToString());
}
public void logout()
{
_navigatedFor = NavigatedFor.LoadLogoutPage;
webBrowser.Navigate("http://www.codeproject.com/script/Membership/LogOff.aspx?rp=%2f¥ilemesiicin=" + Guid.NewGuid().ToString());
}
private string lastLoadedArticleId;
private string articlePathToSaveContents;
public void loadArticle(int index, string articlePath)
{
_navigatedFor = NavigatedFor.LoadAnArticle;
lastLoadedArticleId = _articles[index].id;
this.articlePathToSaveContents = articlePath;
webBrowser.Navigate("http://www.codeproject.com/script/Articles/ViewHtml.aspx?aid=" + lastLoadedArticleId + "¥ilemesiicin=" + Guid.NewGuid().ToString());
}
private void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
switch (_navigatedFor)
{
case NavigatedFor.Nothing:
return;
case NavigatedFor.LoadLogoutPage:
_isLoggedIn = false;
if (After_LoadLogoutPage != null)
actionInvoker.Invoke(After_LoadLogoutPage);
break;
case NavigatedFor.LoadLoginPage:
if (extractMemberId())
{
MessageBox.Show(null, "You are already logged in. Please don't forget to logout before you exit the program...", "Already Logged In", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
_navigatedFor = NavigatedFor.Login;
webBrowser_DocumentCompleted(null, null);
}
else
{
try
{
_navigatedFor = NavigatedFor.Login;
webBrowser.Document.GetElementById(getElementId("_MC_MemberLogOn_CurrentEmail")).SetAttribute("value", email);
webBrowser.Document.GetElementById(getElementId("_MC_MemberLogOn_CurrentPassword")).SetAttribute("value", password);
webBrowser.Document.GetElementById(getElementId("_MC_MemberLogOn_SignInButton")).InvokeMember("click");
}
catch
{
_navigatedFor = NavigatedFor.Nothing;
}
}
if (After_LoadLoginPage != null)
actionInvoker.Invoke(After_LoadLoginPage);
break;
case NavigatedFor.Login:
string errorElementId = getElementId("LogonError");
if (errorElementId.Length > 1)
{
_isLoggedIn = false;
_navigatedFor = NavigatedFor.Nothing;
HtmlElement errorElement = webBrowser.Document.GetElementById(errorElementId);
MessageBox.Show(null, errorElement.InnerText, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
else
{
if (extractMemberId())
{
_isLoggedIn = true;
_navigatedFor = NavigatedFor.LoadArticleList;
webBrowser.Navigate("http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=" + _memberID + "¥ilemesiicin=" + Guid.NewGuid().ToString());
}
else
{
_isLoggedIn = false;
_navigatedFor = NavigatedFor.Nothing;
}
}
if (After_Login != null)
actionInvoker.Invoke(After_Login);
break;
case NavigatedFor.LoadArticleList:
_avgArRating = webBrowser.Document.GetElementById(getElementId("ArticleRatingDiv")).InnerText.Trim();
bool startedFrom1 = false;
_articleCount = 0;
_articles = new List<Article>();
string prefixForArticles = getElementId("_MC_AR_ctl");
HtmlElement link = webBrowser.Document.GetElementById(prefixForArticles + _articleCount.ToString("00") + "_CAR_Title");
if (link == null) // Try to start from 1
{
startedFrom1 = true;
_articleCount = 1;
link = webBrowser.Document.GetElementById(prefixForArticles + _articleCount.ToString("00") + "_CAR_Title");
}
while (link != null)
{
string articleName = link.InnerText;
string articleInfo = "";
try
{
articleInfo = webBrowser.Document.GetElementById(prefixForArticles + _articleCount.ToString("00") + "_CAR_SbD").InnerText.Trim();
articleInfo = Regex.Replace(articleInfo, "[ ]{2,}", " | ");
}
catch { }
try
{
articleInfo += " | " + webBrowser.Document.GetElementById(prefixForArticles + _articleCount.ToString("00") + "_CAR_BookmarkCountSpan").InnerText.Trim();
}
catch { }
try
{
articleInfo += " | " + webBrowser.Document.GetElementById(prefixForArticles + _articleCount.ToString("00") + "_CAR_DownloadCountSpan").InnerText.Trim();
}
catch { }
string linkToTheArticle = link.GetAttribute("href");
string articleID = Regex.Match(linkToTheArticle, "/([0-9]{1,12})/").Groups[1].Value;
_articles.Add(new Article(articleID, articleName, articleInfo));
_articleCount++;
link = webBrowser.Document.GetElementById(prefixForArticles + _articleCount.ToString("00") + "_CAR_Title");
}
if (startedFrom1)
_articleCount--;
_navigatedFor = NavigatedFor.Nothing;
if (After_LoadArticleList != null)
actionInvoker.Invoke(After_LoadArticleList);
break;
case NavigatedFor.LoadAnArticle:
_navigatedFor = NavigatedFor.Nothing;
if (webBrowser.Document.GetElementById("ArticleContent") != null)
downloadRelatedImages();
else
{
_lastLoadedArticleContent = null;
if (After_LoadAnArticle != null)
actionInvoker.Invoke(After_LoadAnArticle);
}
break;
}
}
// Added on 05/10/2012 to be able to find element IDs using a suffix...
private string getElementId(string suffix)
{
Match m = Regex.Match(webBrowser.Document.Body.InnerHtml, "(ctl[0-9]{1,3}[A-Za-z0-9_]{1,64}" + suffix + ")", RegexOptions.IgnoreCase);
if (m.Success)
return m.Groups[1].Value;
else
return "";
}
private void downloadRelatedImages()
{
string[] images = new string[webBrowser.Document.Images.Count];
for (int i = 0; i < images.Length; i++)
images[i] = webBrowser.Document.Images[i].GetAttribute("src");
new Thread((object o) =>
{
try
{
try{ Directory.CreateDirectory(articlePathToSaveContents + "\\" + lastLoadedArticleId + "\\"); }catch { }
WebClient webClient = new WebClient();
webClient.Proxy = null; // To prevent it from trying to determine proxy settings of IE
for (int i = 0; i < images.Length; i++)
try
{
string src = images[i];
Uri uri = new Uri(src);
string imageFileName = Path.GetFileName(uri.LocalPath);
webClient.DownloadFile(src, articlePathToSaveContents + lastLoadedArticleId + "\\" + imageFileName);
images[i] = lastLoadedArticleId + "/" + imageFileName; // To make src attribute relative
}
catch { }
}
catch { }
webBrowser.Invoke((Action)(() =>
{
for (int i = 0; i < images.Length; i++) // Change src attributes
webBrowser.Document.Images[i].SetAttribute("src", images[i]);
_lastLoadedArticleContent = webBrowser.Document.GetElementById("ArticleContent").InnerHtml;
}));
if (After_LoadAnArticle != null)
actionInvoker.Invoke(After_LoadAnArticle);
}).Start();
}
private bool extractMemberId()
{
try
{
string href = webBrowser.Document.GetElementById(getElementId("MyProfile")).GetAttribute("href");
int start = href.LastIndexOf("=") + 1;
_memberID = href.Substring(start, href.Length - start);
_memberFullName = webBrowser.Document.GetElementById(getElementId("MyProfile")).InnerText;
if (_memberID.Length < 1 || _memberFullName.Length < 1)
return false;
if (_memberFullName.Contains(" "))
_memberName = _memberFullName.Substring(0, _memberFullName.IndexOf(" "));
else
_memberName = _memberFullName;
}
catch
{
return false;
}
return true;
}
}
}
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.