Click here to Skip to main content
Rate this: bad
good
Please Sign up or sign in to vote.
Hello,
 
I'm developing an C# windows application to pull data from main url and then I get the inner url's from main url data and then call the thread to get each inner url data.
 
The same is working but in the process unless and until all inner url data is not fetched the main url data extraction is stuck.
 
Like main url has 50 inner url, it runs all 50 urls and then goto next of main url.
But I want to run both threads parallel.
 
Below is the code : http://msdn.microsoft.com/en-IN/library/system.net.httpwebrequest.begingetresponse(v=vs.95).aspx
 
public class RequestState
{
        public string _urlHtml = "";
        public bool isCompleted = false;
 
  // This class stores the State of the request.
  const int BUFFER_SIZE = 1024;
  public StringBuilder requestData;
  public byte[] BufferRead;
  public HttpWebRequest request;
  public HttpWebResponse response;
  public Stream streamResponse;
 
  public RequestState()
  {
    BufferRead = new byte[BUFFER_SIZE];
    requestData = new StringBuilder("");
    request = null;
    streamResponse = null;
  }
}
 
public class HttPagent
{
public static ManualResetEvent allDone= new ManualResetEvent(false);
  const int BUFFER_SIZE = 1024;
 

        public void GetAsyncHtml(string url)
        {
            try
            {
                var uri = new Uri(url);
 
                // Create a HttpWebrequest object to the desired URL.
                var myHttpWebRequest1 = (HttpWebRequest)WebRequest.Create(uri);
 
                // Create an instance of the RequestState and assign the previous myHttpWebRequest1
                // object to it's request field.  
                var myRequestState = new RequestState();
                myRequestState.request = myHttpWebRequest1;
 
                // Start the asynchronous request.
                IAsyncResult result =
                  myHttpWebRequest1.BeginGetResponse(RespCallback, myRequestState);
 
            }
            catch (WebException e1)
            {
                _urlHtml = "MZon-GetDataERROR" + e1.Message;
                isCompleted = true;
            }
            catch (Exception e2)
            {
                _urlHtml = "MZon-GetDataERROR" + e2.Message;
                isCompleted = true;
            }
        }
 
        private void RespCallback(IAsyncResult asynchronousResult)
        {
            try
            {
                // State of request is asynchronous.
                RequestState myRequestState = (RequestState)asynchronousResult.AsyncState;
                HttpWebRequest myHttpWebRequest2 = myRequestState.request;
                myRequestState.response = (HttpWebResponse)myHttpWebRequest2.EndGetResponse(asynchronousResult);
 
                // Read the response into a Stream object.
                Stream responseStream = myRequestState.response.GetResponseStream();
                myRequestState.streamResponse = responseStream;
 
                // Begin the Reading of the contents of the HTML page and print it to the console.
                if (responseStream != null)
                {
                    IAsyncResult asynchronousInputRead = responseStream.BeginRead(myRequestState.BufferRead, 0,
                                                                                  BUFFER_SIZE,
                                                                                  new AsyncCallback(ReadCallBack),
                                                                                  myRequestState);
                }
            }
            catch (WebException e)
            {
                Console.WriteLine(e.Message);
                isCompleted = true;
            }
        }
 
        //Read html callback
        private void ReadCallBack(IAsyncResult asyncResult)
        {
            try
            {
                RequestState myRequestState = (RequestState)asyncResult.AsyncState;
                Stream responseStream = myRequestState.streamResponse;
                int read = responseStream.EndRead(asyncResult);
 
                // Read the HTML page and then do something with it
                if (read > 0)
                {
                    myRequestState.requestData.Append(Encoding.UTF8.GetString(myRequestState.BufferRead, 0, read));
                    IAsyncResult asynchronousResult = responseStream.BeginRead(myRequestState.BufferRead, 0, BUFFER_SIZE,
                                                                               new AsyncCallback(ReadCallBack),
                                                                               myRequestState);
                }
                else
                {
                    if (myRequestState.requestData.Length > 1)
                    {
                        string stringContent;
                        stringContent = myRequestState.requestData.ToString();
                        _urlHtml = stringContent;
                    }
 
                    responseStream.Close();
                    allDone.Set();
                    isCompleted = true;
                }
 
            }
            catch (WebException e)
            {
                Console.WriteLine(e.Message);
            }
        }
}
 
 

Button click
 
_thMain = new Thread(MainPageThreadfunction)
                        {
                            Priority = ThreadPriority.Highest,
                            IsBackground = true
                        };
                    _thMain.Start();
 

Thread functions
 
        //Main page thread function
        private void MainPageThreadfunction()
        {
var uri = "https://www.TESTSITE.com/search?page=";
 

for(int i=0; i<5; i++)
{
 
 //Get page data
                var data = "";
var link = uri+ i;
                var oPagent = new HttPagent();
                oPagent.GetAsyncHtml(link );
 
                while (!oPagent.isCompleted)
                {
                    Application.DoEvents();
                }
 
                data = oPagent._urlHtml;
 
// GET INNER LINKS CODE HERE... I USED REGULAR EXPRESSION

 MatchCollection mc = Regex.Matches(data, "REGULAREXPRESSION",
                                                   RegexOptions.IgnoreCase);
 
                foreach (Match match in mc)
                {
var URL= match.Groups["URL"].Value;
 var _thInner = new Thread(() => InnerPageThreadfunction(URL);
                        _thInner.Start();
}
 

 
}
}
 
      
        private void InnerPageThreadfunction(string url)
        {
 
//Inner thread url extract code here
}
 

Please check the code and let me know where i'm wrong.
 
Thanks
Kapil
Posted 25-Mar-13 19:47pm
Edited 25-Mar-13 19:53pm
v4
Comments
Sergey Alexandrovich Kryukov at 26-Mar-13 1:14am
   
If you are using threads already, why would you also use IAsyncResult? With threads, you can always use blocking calls...
—SA
KapilWaghe at 26-Mar-13 2:35am
   
The page I'm requesting is very large. That's why I'm using IAsyncResult. It avoide the timeout error for me. I want to run my two threads parallel so that the main thread get the url list and the inner thread read the each url html text.
Sergey Alexandrovich Kryukov at 26-Mar-13 2:40am
   
Using asynchronous API cannot make anything smaller. I see no sense...
—SA

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

  Print Answers RSS
0 OriginalGriff 8,284
1 Sergey Alexandrovich Kryukov 7,327
2 DamithSL 5,614
3 Manas Bhardwaj 4,986
4 Maciej Los 4,920


Advertise | Privacy | Mobile
Web04 | 2.8.1411023.1 | Last Updated 26 Mar 2013
Copyright © CodeProject, 1999-2014
All Rights Reserved. Terms of Service
Layout: fixed | fluid

CodeProject, 503-250 Ferrand Drive Toronto Ontario, M3C 3G8 Canada +1 416-849-8900 x 100