Click here to Skip to main content
15,881,413 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
Hi,
I am crawling a page usin HTMLAgilityPack.I done a code for it. But now I would like to get the current URL which is running through my code side by side.As of now my code is bindind the last URL once the whole process is completed .but I need each and every URL to show in runtime.can anyone help me to find out the solution?

n backend I can see each and every URL binding in the lblCurrentPageURL. But in front end it is not loading.

Here is my Code:

C#
public List<string> lstResults = new List<string>();
        public int counter = 1;
        public string strInputURL;
        public int count;

        protected void Page_Load(object sender, EventArgs e)
        {
        }
        public void GetURL(string strGetURL, string strParentURL)
        {
            var getHtmlSource = new HtmlWeb();
            var document = new HtmlDocument();
            try
            {
                document = getHtmlSource.Load(strGetURL);
                lblLinkDepthCount.Text = Convert.ToString(strGetURL.Count());
                var aTags = document.DocumentNode.SelectNodes("//a");
                if (aTags != null)
                {
                    foreach (var aTag in aTags)
                    {
                        string strURLTmp;
                        strURLTmp = aTag.Attributes["href"].Value;
                        lblLinkDepthCount.Text = Convert.ToString(strURLTmp.Count());
                        strURLTmp = GetAbsoluteURL(strURLTmp, strParentURL);
                        if (!CheckDuplicate(strURLTmp))
                        {
                            lstResults.Add(strURLTmp);
                            lblScannedPageCount.Text = Convert.ToString(lstResults.Count-1);
                            outputurl1.Text += counter + ". " + strURLTmp + "\n";
                            counter++;
                            lblCurrentPageURL.Text = strURLTmp;
                            if (strURLTmp.Contains(new System.Uri(strInputURL).Host))
                            {
                                GetURL(strURLTmp, strGetURL);

                            }
                        }

                    }
                }
            }
            catch (Exception e)
            {

            }
        }


        protected void btnSubmit_Click(object sender, EventArgs e)
        {
            strInputURL = txtInput1.Text;
            lstResults.Add(strInputURL);
            GetURL(strInputURL,strInputURL);

        }

        public bool CheckDuplicate(string strURL)
        {
            if (lstResults.Any() && lstResults.Contains(strURL))
            {
                return true;
            }
            else
            {
                return false;
            }

        }

        public static string GetAbsoluteURL(string strRelativeURL, string strbaseURL)
        {
            return new Uri(new Uri(strbaseURL), strRelativeURL).AbsoluteUri;

        }



XML
<div id="main">
        <h1>Find Your Site Map Here, it's FREE!</h1>
        <form id="form1" runat="server">
                <div class="floatLeft">
                <asp:Label ID="lblURL1" runat="server" Text="Enter the URL Here" />
                <asp:TextBox ID="txtInput1" runat="server" />
                <br />
                <asp:Button ID="btnSubmit" runat="server" Text="Submit" OnClick="btnSubmit_Click" />
                <br />
            <asp:ScriptManager EnablePartialRendering="true" ID="ScriptManager1" runat="server"></asp:ScriptManager>
            <asp:UpdatePanel runat="server" ID="UpdatePanel" UpdateMode="Conditional">
                    <ContentTemplate>
                        <asp:Label ID="Label1" runat="server">Result
                <asp:TextBox ID="outputurl1" TextMode="multiline" Columns="50" Rows="5" runat="server" /></asp:Label>

                        <asp:Label CssClass="" ID="lblLinkDepth" runat="server" Text="Links Depth : " />
                        <asp:Label CssClass="" ID="lblLinkDepthCount" runat="server" />
                        <br />
                        <asp:Label CssClass="" ID="lblCurrentPage" runat="server" Text="Current Page :" />
                        <asp:Label CssClass="" ID="lblCurrentPageURL" runat="server" />
                        <br />
                        <asp:Label CssClass="" ID="lblScannedPage" runat="server" Text="Scanned Pages : " />
                        <asp:Label CssClass="" ID="lblScannedPageCount" runat="server" />
                        <br />
                        <%--<asp:Label CssClass="" ID="lblTotalPages" runat="server" Text="Total Pages  :" />
                            <asp:Label CssClass="" ID="lblTotalPagesCount" runat="server" />
                            <br />--%>
                        </ContentTemplate>
            </asp:UpdatePanel>
                        </div>
        </form>
    </div>
Posted
Updated 23-Sep-14 1:03am
v4

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900