Click here to Skip to main content
15,896,111 members
Articles / Web Development / HTML

A Really Vain "How are my articles doing" Web Spider

Rate me:
Please Sign up or sign in to vote.
4.56/5 (43 votes)
4 Feb 2013CPOL6 min read 92.1K   984   74  
A simple web spider to see fetch CodeProject articles.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using Microsoft.Win32;

namespace VainWebSpider
{
    #region frmMain CLASS
    /// <summary>
    /// Creates a new BackGroundWorker which creates a new 
    /// <see cref="WebScreenScraper">WebScreenScraper </see>
    /// and subscribe to its StartParse/EndParse events. If the  WebScreenScraper
    /// data signifies that the currently selected codeproject user has some
    /// articles, get the article data out of the WebScreenScraper, and display
    /// the data in a DataGridView.
    /// </summary>
    /// <param name="sender">BackgroundWorker</param>
    /// <param name="e">DoWorkEventArgs</param>
    public partial class frmMain : Form
    {
        #region Instance Fields
        //instance fields
        private Boolean formShown = true;
        private WebScreenScraper wss;
        private DataTable dt;
        #endregion
        #region Contructor
        /// <summary>
        /// Constructs a new frmMain object
        /// </summary>
        public frmMain()
        {
            InitializeComponent();
        }
        #endregion

        #region Private Methods

        /// <summary>
        /// User double clicked the system tray icon, so if the form
        /// is shown it is hidden, if its hidden its shown
        /// </summary>
        /// <param name="sender">The notify icon</param>
        /// <param name="e">The event arguments</param>
        private void nfIcon_DoubleClick(object sender, EventArgs e)
        {
            if (formShown)
            {
                this.Hide();
                formShown = false;
            }
            else
            {
                this.Show();
                formShown = true;
            }
        }

        /// <summary>
        /// Shows the form
        /// </summary>
        /// <param name="sender">The show menu</param>
        /// <param name="e">The event arguments</param>
        private void showFormToolStripMenuItem_Click(object sender, EventArgs e)
        {
            this.Show();
        }

        /// <summary>
        /// Hides the form
        /// </summary>
        /// <param name="sender">The hide menu</param>
        /// <param name="e">The event arguments</param>
        private void hideFormToolStripMenuItem_Click(object sender, EventArgs e)
        {
            this.Hide();
        }

        /// <summary>
        /// Calls the ClearRemoteObjectReference() method if the user confirms they
        /// wish to quit.
        /// </summary>
        /// <param name="sender">The exit menu</param>
        /// <param name="e">The event arguments</param>
        private void exitToolStripMenuItem_Click(object sender, EventArgs e)
        {
            DialogResult dr = MessageBox.Show("Are you sure you want to quit.\r\n" +
                "There may be client connected at present", "Exit", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
            if (dr.Equals(DialogResult.Yes))
            {
                Application.Exit();
            }
        }
        
        /// <summary>
        /// Creates a new <see cref="WebScreenScraper">WebScreenScraper </see>
        /// and subscribe to its StartParse/EndParse events. If the  WebScreenScraper
        /// data signifies that the currently selected codeproject user has some
        /// articles, get the article data out of the WebScreenScraper, and display
        /// the data in a DataGridView.
        /// </summary>
        /// <param name="sender">BackgroundWorker</param>
        /// <param name="e">DoWorkEventArgs</param>
        private void bgw_DoWork(object sender, DoWorkEventArgs e)
        {
            //create a new WebScreenScraper and subscribe to its events
            wss = new WebScreenScraper(Program.UserID);
            wss.StartParse += wss_StartParse;
            //get the initial article summary area only, discard the other 
            //text that doesnt hold any text we need to parse
            wss.getInitialData();
            if (wss.HasArticles)
            {
                dt = wss.getWebData();
            }
        }


        private void bgw_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {

            //need to test for an invoke initially, as the BackgroundWorker
            //that is run to do the web site parsing is on a different handle
            //to that of this forms controls, so will need to be marshalled to
            //the correct thread handle, on order to change properties
            //are there any articles for the current user
            if (wss.HasArticles)
            {

                lblProgress.Visible = false;
                prgBar.Visible = false;
                pnlUser.Visible = true;
                pnlGridMainFill.Visible = true;
                this.Invalidate();
                Application.DoEvents();



                lblCurrentUser.Text = wss.AuthorName + ": " + wss.NoOfArticles + " articles available";

                //check there is at least 1 article, before showing the 
                //article DataGridView
                if (dt.Rows.Count > 0)
                {
                    dgArticles.Columns.Clear();
                    dgArticles.DataSource = dt;
                    alterColumns();
                    resizeColumns();
                    dgArticles.Visible = true;
                    pnlResults.Visible = true;
                    this.Invalidate();
                    Application.DoEvents();
                }
                    //known author, but no articles to show
                else
                {
                    dgArticles.Visible = false;
                    pnlResults.Visible = false;
                    this.Invalidate();
                    Application.DoEvents();
                }
            }
                //there are no articles to show, so update GUI to show this
            else
            {
                pnlResults.Visible = false;
                lblCurrentUser.Text = "Unknown Or Unpublished Author";
                lblProgress.Visible = false;
                prgBar.Visible = false;
                dgArticles.Visible = false;
                pnlResults.Visible = false;
                pnlUser.Visible = true;
                this.Invalidate();
                Application.DoEvents();
                Program.InfoBox("There are no CodeProject articles avaialble for user ("
                                + Program.UserID + ")");
            }
        }
    



        /// <summary>
        /// Alter the article DataGridView columns, by firstly adding an image column
        /// which will be a new column index of 4. And then Delete the auto mapped
        /// "ArticleURL" column, and create a new DataGridViewLinkColumn column for
        /// the "ArticleURL" column, which will be column index 5.
        /// </summary>
        private void alterColumns()
        {

            //need to catch this, as this column may not be in existence
            //when the request to remove it is made.
            try
            {
                //remove existing ArticleURL column
                dgArticles.Columns.Remove("ArticleURL");
            }
            catch (Exception)
            {
                //cant do much about the removal of a non-existent column
            }
            //create a new image column
            DataGridViewImageColumn imgs = new DataGridViewImageColumn();
            imgs.Image = global::VaneWebSpider.FormResources.LinkIcon;
            imgs.DisplayIndex = 0;
            imgs.Width = 40;
            dgArticles.Columns.Add(imgs);
            //create a new hyperlink column
            DataGridViewLinkColumn links = new DataGridViewLinkColumn();
            links.HeaderText = "ArticleURL";
            links.DataPropertyName = "ArticleURL";
            links.ActiveLinkColor = Color.Blue;
            links.LinkBehavior = LinkBehavior.SystemDefault;
            links.LinkColor = Color.Blue;
            links.SortMode = DataGridViewColumnSortMode.Automatic;
            links.TrackVisitedState = true;
            links.VisitedLinkColor = Color.Blue;
            links.DisplayIndex = 1;
            links.Width = 300;
            dgArticles.Columns.Add(links);
        }

        /// <summary>
        /// Resize all article DataGridView columns to fixed sizes
        /// </summary>
        private void resizeColumns()
        {
            //resize all other columns to have default width of 60
            dgArticles.Columns[0].Width = 60; //Views column
            dgArticles.Columns[1].Width = 60; //Ratings column
            dgArticles.Columns[2].Width = 60; //Votes column
            dgArticles.Columns[3].Width = 60; //Popularity column
        }

        ///// <summary>
        ///// Puts all the GUI components into a EndParse state
        ///// </summary>
        ///// <param name="sender"><see cref="WebScreenScraper">
        ///// The WebScreenScraper</param>
        ///// <param name="e">EventArgs</param>
        //private void wss_EndParse(object sender, EventArgs e)
        //{
        //    lblProgress.Visible = false;
        //    prgBar.Visible = false;
        //    pnlUser.Visible = true;
        //    pnlGridMainFill.Visible = true;
        //    this.Invalidate();
        //    Application.DoEvents();
        //}

        /// <summary>
        /// Puts all the GUI components into a StartParse state
        /// </summary>
        /// <param name="sender"><see cref="WebScreenScraper">
        /// The WebScreenScraper</param>
        /// <param name="e">EventArgs</param>
        private void wss_StartParse(object sender, EventArgs e)
        {
            //need to test for an invoke initially, as the BackgroundWorker
            //that is run to do the web site parsing is on a different handle
            //to that of this forms controls, so will need to be marshalled to
            //the correct thread handle, on order to change properties
            if (this.InvokeRequired)
            {
                this.Invoke(new EventHandler(delegate
                {
                    lblProgress.Visible = true;
                    prgBar.Visible = true;
                    this.Invalidate();
                    Application.DoEvents();
                }));
            }
        }

        /// <summary>
        /// If the column of the DataridView clicked was the link column
        /// call the startProcess, passing it the correct URL to navigate to
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void dgArticles_CellContentClick(object sender, DataGridViewCellEventArgs e)
        {
            int LINK_COLUMN_INDEX = 5;
            //the link column is index 5, as it was created at index 5, as there were
            //originally 5 auto generated columns created by the WebScreenScraper.createDataSet() 
            //method, but then we deleted that auto-generated column, and swapped it for a hyperlink
            //column which was added to the end of teh existing auto-generated columns. Thats why its
            //at index 5 which is a little strange, but there you go.
            if (e.ColumnIndex == LINK_COLUMN_INDEX)
            {
                startProcess(@"http://www.codeproject.com" +
                    dgArticles[e.ColumnIndex, e.RowIndex].Value.ToString());
            }
        }

        /// <summary>
        /// Attempts to start the process which has the name of the parameter supplied, So
        /// long as the process is a URL. Must start with www or http, as we are attempting
        /// to start a web browser
        /// </summary>
        /// <param name="target">The process to start</param>
        private void startProcess(string target)
        {
            // If the value looks like a URL, navigate to it.
            if (null != target && (target.StartsWith("www") || target.StartsWith("http")))
            {
                try
                {
                    System.Diagnostics.Process.Start(target);
                }
                catch (Exception)
                {
                    Program.ErrorBox("Problem with starting process " + target);
                }
            }
        }

        /// <summary>
        /// Creates a new BackgroundWorker thread and calls the 
        /// BackgroundWorkers bgw_DoWork(..) method, where the 
        /// argument is the value of the <see cref="Program">
        /// Program classes </see>UserID
        /// </summary>
        /// <param name="sender">frmMain</param>
        /// <param name="e">EventArgs</param>
        private void frmMain_Load(object sender, EventArgs e)
        {
            pnlUser.Visible = false;
            pnlGridMainFill.Visible = false;
            BackgroundWorker bgw = new BackgroundWorker();
            bgw.DoWork += bgw_DoWork;
            bgw.RunWorkerCompleted += bgw_RunWorkerCompleted;
            bgw.RunWorkerAsync(Program.UserID);
        }

        /// <summary>
        /// Allows the user to specify a new UserId to fetch codeproject articles for by the
        /// use of a <see cref="InputBoxDialog">InputBoxDialog </see>
        /// The value entered must be a postive number
        /// </summary>
        /// <param name="sender">lnkChangeUser</param>
        /// <param name="e">LinkLabelLinkClickedEventArgs</param>
        private void lnkChangeUser_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
        {
            //get the new userId
            string stringEntered = Program.InputBox("Enter a new user ID to examine", "Enter a new user ID", "");
            //check for empty
            if (stringEntered.Equals(string.Empty)) 
            {
                Program.ErrorBox("You must enter a value for the userId");
            }
            else 
            {
                try 
                {
                    //make sure its a positive number, then update the Program
                    //held property
                    long uId = long.Parse(stringEntered);
                    if (uId > 0)
                    {
                        Program.UserID = uId;
                        BackgroundWorker bgw = new BackgroundWorker();
                        bgw.DoWork += new DoWorkEventHandler(bgw_DoWork);
                        bgw.RunWorkerAsync(Program.UserID);
                    }
                    else
                    {
                        Program.ErrorBox("User ID must be a postive value");
                    }
                }
                //its not a number that was entered, tell them off
                catch(Exception) 
                {
                    Program.ErrorBox("The value you entered was not valid\r\n" +
                                    "The user ID must be a number");
                }
            }
        }

        /// <summary>
        /// Hide the notify icon, and shutdown the application
        /// </summary>
        /// <param name="sender">frmMain</param>
        /// <param name="e">FormClosedEventArgs</param>
        private void frmMain_FormClosed(object sender, FormClosedEventArgs e)
        {
            nfIcon.Visible = false;
            Application.Exit();
        }

        /// <summary>
        /// Create and show a new <see cref="frmPie">frmPie</see> object, and hide this form
        /// </summary>
        /// <param name="sender">lnkResults</param>
        /// <param name="e">LinkLabelLinkClickedEventArgs</param>
        private void lnkResults_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
        {
            frmPie fPie = new frmPie();
            fPie.GridIsUse = dgArticles;
            fPie.AuthorString = lblCurrentUser.Text;
            this.Hide();
            fPie.ShowDialog(this);
            this.Show();
        }
        #endregion
    }
    #endregion
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer (Senior)
United Kingdom United Kingdom
I currently hold the following qualifications (amongst others, I also studied Music Technology and Electronics, for my sins)

- MSc (Passed with distinctions), in Information Technology for E-Commerce
- BSc Hons (1st class) in Computer Science & Artificial Intelligence

Both of these at Sussex University UK.

Award(s)

I am lucky enough to have won a few awards for Zany Crazy code articles over the years

  • Microsoft C# MVP 2016
  • Codeproject MVP 2016
  • Microsoft C# MVP 2015
  • Codeproject MVP 2015
  • Microsoft C# MVP 2014
  • Codeproject MVP 2014
  • Microsoft C# MVP 2013
  • Codeproject MVP 2013
  • Microsoft C# MVP 2012
  • Codeproject MVP 2012
  • Microsoft C# MVP 2011
  • Codeproject MVP 2011
  • Microsoft C# MVP 2010
  • Codeproject MVP 2010
  • Microsoft C# MVP 2009
  • Codeproject MVP 2009
  • Microsoft C# MVP 2008
  • Codeproject MVP 2008
  • And numerous codeproject awards which you can see over at my blog

Comments and Discussions