Ok, so I was able to get it to work. I created a simple form that has a Listbox on it. When the form loads, it goes to the
http://www.un.org/depts/dhl/resguide/r1.htm[
^] page and pulls out all of the links. Then, when you click on a link (assuming you click one of the pdf links), it goes through the whole process of redirecting, acquiring cookies, and then outputting the file to a temporary file. Here's the code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
namespace UNDocs
{
public partial class Form1 : Form
{
private const string StartingPage = @"http://www.un.org/depts/dhl/resguide/r1.htm";
private const string CookieOriginator = @"http://daccess-dds-ny.un.org/prod/ods_mother.nsf?Login&Username=freeods2&Password=1234";
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
string html = GetHTML(StartingPage);
MatchCollection matches = GetLinks(html);
foreach (Match match in matches)
{
string value = match.Groups["link"].Value;
listBox1.Items.Add(value);
}
}
private void button1_Click(object sender, EventArgs e)
{
string tempPage = GetURLBase(listBox1.SelectedItem.ToString()) +
GetPageToRedirectTo(listBox1.SelectedItem.ToString(), StartingPage);
CookieContainer cookies = GetCookies(CookieOriginator, tempPage);
string finalPage = GetPageToRedirectTo(tempPage);
byte[] pdf = GetBytesFromHTTP(finalPage, cookies);
WriteFile(@"D:\temp.pdf", pdf);
}
public MatchCollection GetLinks(string s)
{
Regex regex = new Regex("href=\"(?<link>.*?)\"", RegexOptions.Multiline);
return regex.Matches(s);
}
public string GetHTML(string url)
{
return GetHTML(url, "");
}
public string GetHTML(string url, string Referer)
{
return GetHTML(url, Referer, new CookieContainer());
}
public string GetHTML(string url, string Referer, CookieContainer cookies)
{
HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);
myRequest.Referer = Referer;
myRequest.CookieContainer = cookies;
string pageSource = "";
using (HttpWebResponse response = (HttpWebResponse)myRequest.GetResponse())
{
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
pageSource = reader.ReadToEnd();
}
}
return pageSource;
}
public byte[] GetBytesFromHTTP(string url, CookieContainer cookies)
{
HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);
myRequest.CookieContainer = cookies;
myRequest.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");
byte[] result = null;
byte[] buffer = new byte[4096];
using (HttpWebResponse response = (HttpWebResponse)myRequest.GetResponse())
{
using (Stream responseStream = response.GetResponseStream())
{
using (MemoryStream memoryStream = new MemoryStream())
{
int count = 0;
do
{
count = responseStream.Read(buffer, 0, buffer.Length);
memoryStream.Write(buffer, 0, count);
} while (count != 0);
result = memoryStream.ToArray();
}
}
}
return result;
}
private string GetURLBase(string url)
{
Regex regex = new Regex("(?<base>http://.*?)/");
return regex.Match(url).Groups["base"].Value;
}
private string GetPageToRedirectTo(string url)
{
return GetPageToRedirectTo(url, "");
}
private string GetPageToRedirectTo(string url, string Referer)
{
HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);
myRequest.Referer = Referer;
string pageSource = "";
using (HttpWebResponse response = (HttpWebResponse)myRequest.GetResponse())
{
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
pageSource = reader.ReadToEnd();
}
}
string urlToRedirectTo = "";
Regex regex = new Regex("<META.*URL=(?<URL>.*)\"");
urlToRedirectTo = regex.Match(pageSource).Groups["URL"].Value;
return urlToRedirectTo;
}
private CookieContainer GetCookies(string url, string Referer)
{
HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);
myRequest.Referer = Referer;
myRequest.CookieContainer = new CookieContainer();
myRequest.GetResponse().Close();
return myRequest.CookieContainer;
}
public void WriteFile(string FileName, byte[] fileContents)
{
FileStream outFile = new FileStream(FileName, FileMode.Create);
using (BinaryWriter writer = new BinaryWriter(outFile))
{
writer.Write(fileContents, 0, fileContents.Length);
}
outFile.Dispose();
}
}
}
(that was fun to figure out!)