Imports System.IO
Imports System.Net
Imports System.Text.RegularExpressions
Imports ICSharpCode.SharpZipLib
''' <summary>
''' This is a class similar to <c>System.Net.WebClient</c>, but with:
''' - Autodetection of content encoding
''' - Proxy support
''' - Authentication support
''' - Cookie retention
''' - HTTP compression (via SharpZipLib)
''' - IfModifiedSince
''' </summary>
''' <remarks>
''' Jeff Atwood
''' http://www.codinghorror.com/
''' </remarks>
Friend Class WebClientEx
Private Const _AcceptedEncodings As String = "gzip,deflate"
Private _DefaultEncoding As System.Text.Encoding
Private _DetectedEncoding As System.Text.Encoding
Private _ForcedEncoding As System.Text.Encoding
Private _DetectedContentType As String
Private _ContentLocation As String
Private _ResponseBytes As Byte()
Private _AuthenticationRequired As Boolean
Private _ProxyAuthenticationRequired As Boolean
Private _ProxyUrl As String
Private _ProxyUser As String
Private _ProxyPassword As String
Private _AuthenticationUser As String
Private _AuthenticationPassword As String
Private _KeepCookies As Boolean
Private _RequestTimeoutMilliseconds As Integer
Private _PersistedCookies As CookieContainer
'-- http://www.zytrax.com/tech/web/browser_ids.htm
Private _HttpUserAgent As String
Public Sub New()
'-- sets default values
Clear()
End Sub
#Region " Properties"
''' <summary>
''' this is the string encoding that was autodetected from the HTML content
''' </summary>
Public ReadOnly Property DetectedEncoding() As System.Text.Encoding
Get
Return _DetectedEncoding
End Get
End Property
''' <summary>
''' Bypass detection of content encoding and force a specific encoding
''' </summary>
Public Property ForcedEncoding() As System.Text.Encoding
Get
Return _ForcedEncoding
End Get
Set(ByVal Value As System.Text.Encoding)
_ForcedEncoding = Value
End Set
End Property
''' <summary>
''' if the correct string encoding type cannot be detected, or detection is disabled
''' this is the default string encoding that will be used.
''' </summary>
Public Property DefaultEncoding() As System.Text.Encoding
Get
Return _DefaultEncoding
End Get
Set(ByVal Value As System.Text.Encoding)
_DefaultEncoding = Value
End Set
End Property
''' <summary>
''' this is the string encoding that was autodetected from the HTML content
''' </summary>
Public ReadOnly Property ResponseContentType() As String
Get
Return _DetectedContentType
End Get
End Property
''' <summary>
''' Returns true if the last HTTP response was in a non-text format
''' </summary>
Public ReadOnly Property ResponseIsBinary() As Boolean
Get
'-- if we truly have no content-type, we're kinda hosed, but
'-- let's assume the response is text data to be on the safe side
If _DetectedContentType = "" Then
Return False
Else
Return _DetectedContentType.IndexOf("text") = -1
End If
End Get
End Property
''' <summary>
''' Returns a string representation, with encoding, of the last HTTP response data
''' </summary>
Public ReadOnly Property ResponseString() As String
Get
If Me.ResponseIsBinary Then
Return "(" & _ResponseBytes.Length & " bytes of binary data)"
Else
If _ForcedEncoding Is Nothing Then
Return _DetectedEncoding.GetString(_ResponseBytes)
Else
Return _ForcedEncoding.GetString(_ResponseBytes)
End If
End If
End Get
End Property
''' <summary>
''' Returns the raw bytestream representing the last HTTP response data
''' </summary>
Public ReadOnly Property ResponseBytes() As Byte()
Get
Return _ResponseBytes
End Get
End Property
''' <summary>
''' Returns the actual location of the downloaded content, which can
''' be different than the requested URL, eg, http://web.com/IsThisAfolderOrNot
''' </summary>
Public ReadOnly Property ContentLocation() As String
Get
Return _ContentLocation
End Get
End Property
''' <summary>
''' Browser ID string to send with web requests
''' note that many popular websites will serve alternate content based on this value
''' </summary>
''' <remarks>
''' defaults to browser ID string of vanilla IE6 as seen in XP SP2
''' </remarks>
Public Property BrowserIdString() As String
Get
Return _HttpUserAgent
End Get
Set(ByVal Value As String)
_HttpUserAgent = Value
End Set
End Property
''' <summary>
''' how long, in milliseconds, to wait for HTTP content to arrive before timing out
''' </summary>
Public Property TimeoutMilliseconds() As Integer
Get
Return _RequestTimeoutMilliseconds
End Get
Set(ByVal Value As Integer)
_RequestTimeoutMilliseconds = Value
End Set
End Property
''' <summary>
''' URL of the web proxy to use
''' if left blank, no Proxy will be used; if provided, will ALWAYS be used!
''' </summary>
Public Property ProxyUrl() As String
Get
Return _ProxyUrl
End Get
Set(ByVal Value As String)
_ProxyUrl = Value
End Set
End Property
''' <summary>
''' username to use for Proxy authentication
''' if left blank, the current user's credentials will be sent
''' </summary>
Public Property ProxyUser() As String
Get
Return _ProxyUser
End Get
Set(ByVal Value As String)
_ProxyUser = Value
End Set
End Property
''' <summary>
''' password to use for Proxy authentication
''' if left blank, the current user's credentials will be sent
''' </summary>
Public Property ProxyPassword() As String
Get
Return _ProxyPassword
End Get
Set(ByVal Value As String)
_ProxyPassword = Value
End Set
End Property
''' <summary>
''' the Proxy requires authentication
''' if credentials are not explicitly provided, the current user's credentials will automatically be sent
''' </summary>
Public Property ProxyAuthenticationRequired() As Boolean
Get
Return _ProxyAuthenticationRequired
End Get
Set(ByVal Value As Boolean)
_ProxyAuthenticationRequired = Value
End Set
End Property
''' <summary>
''' username for authentication to the target URL
''' if left blank, the current user's credentials will be sent
''' </summary>
Public Property AuthenticationUser() As String
Get
Return _AuthenticationUser
End Get
Set(ByVal Value As String)
_AuthenticationUser = Value
End Set
End Property
''' <summary>
''' password for authentication to the target URL
''' if left blank, the current user's credentials will be sent
''' </summary>
Public Property AuthenticationPassword() As String
Get
Return _AuthenticationPassword
End Get
Set(ByVal Value As String)
_AuthenticationPassword = Value
End Set
End Property
''' <summary>
''' the target URL requires authentication
''' if credentials are not explicitly provided, the current user's credentials will automatically be sent
''' </summary>
Public Property AuthenticationRequired() As Boolean
Get
Return _AuthenticationRequired
End Get
Set(ByVal Value As Boolean)
_AuthenticationRequired = Value
End Set
End Property
''' <summary>
''' Retains cookies for all subsequent HTTP requests from this object
''' </summary>
Public Property KeepCookies() As Boolean
Get
Return _KeepCookies
End Get
Set(ByVal Value As Boolean)
_KeepCookies = Value
End Set
End Property
#End Region
''' <summary>
''' The Content-Encoding entity-header field is used as a modifier to the media-type.
''' When present, its value indicates what additional content codings have been applied
''' to the entity-body, and thus what decoding mechanisms must be applied in order to
''' obtain the media-type referenced by the Content-Type header field. Content-Encoding
''' is primarily used to allow a document to be compressed without losing the identity
''' of its underlying media type.
''' </summary>
''' <remarks>
''' http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.5
''' </remarks>
Private Enum HttpContentEncoding
None
Gzip
Compress
Deflate
Unknown
End Enum
''' <summary>
''' attempt to convert this charset string into a named .NET text encoding
''' </summary>
Private Function CharsetToEncoding(ByVal Charset As String) As System.Text.Encoding
If Charset = "" Then Return Nothing
Try
Return System.Text.Encoding.GetEncoding(Charset)
Catch ex As System.ArgumentException
Return Nothing
End Try
End Function
''' <summary>
''' try to determine string encoding using Content-Type HTTP header and
''' raw HTTP content bytes
''' "Content-Type: text/html; charset=us-ascii"
''' <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
''' </summary>
Private Function DetectEncoding(ByVal ContentTypeHeader As String, ByVal ResponseBytes() As Byte) As System.Text.Encoding
Dim s As String
Dim encoding As System.Text.Encoding
'-- first try the header
s = Regex.Match(ContentTypeHeader, "charset=([^;""'/>]+)", _
RegexOptions.IgnoreCase).Groups(1).ToString.ToLower
encoding = CharsetToEncoding(s)
'-- if we can't get it from header, try the body bytes forced to ASCII
If encoding Is Nothing Then
s = Regex.Match(System.Text.Encoding.ASCII.GetString(ResponseBytes), _
"<meta[^>]+content-type[^>]+charset=([^;""'/>]+)", _
RegexOptions.IgnoreCase).Groups(1).ToString.ToLower
encoding = CharsetToEncoding(s)
If encoding Is Nothing Then
Return _DefaultEncoding
End If
End If
Return encoding
End Function
''' <summary>
''' returns a collection of bytes from a Url
''' </summary>
''' <param name="Url">URL to retrieve</param>
Public Sub GetUrlData(ByVal Url As String, ByVal ifModifiedSince As DateTime)
Dim wreq As HttpWebRequest = DirectCast(WebRequest.Create(Url), HttpWebRequest)
'-- do we need to use a proxy to get to the web?
If _ProxyUrl <> "" Then
Dim wp As New WebProxy(_ProxyUrl)
If _ProxyAuthenticationRequired Then
If _ProxyUser <> "" And _ProxyPassword <> "" Then
wp.Credentials = New NetworkCredential(_ProxyUser, _ProxyPassword)
Else
wp.Credentials = CredentialCache.DefaultCredentials
End If
wreq.Proxy = wp
End If
End If
'-- does the target website require credentials?
If _AuthenticationRequired Then
If _AuthenticationUser <> "" And _AuthenticationPassword <> "" Then
wreq.Credentials = New NetworkCredential(_AuthenticationUser, _AuthenticationPassword)
Else
wreq.Credentials = CredentialCache.DefaultCredentials
End If
End If
wreq.Method = "GET"
wreq.Timeout = _RequestTimeoutMilliseconds
wreq.UserAgent = _HttpUserAgent
wreq.Headers.Add("Accept-Encoding", _AcceptedEncodings)
'-- note that, if present, this will trigger a 304 exception
'-- if the URL being retrieved is not newer than the specified
'-- date/time
If ifModifiedSince <> DateTime.MinValue Then
wreq.IfModifiedSince = ifModifiedSince
End If
'-- sometimes we need to transfer cookies to another URL;
'-- this keeps them around in the object
If KeepCookies Then
If _PersistedCookies Is Nothing Then
_PersistedCookies = New CookieContainer
End If
wreq.CookieContainer = _PersistedCookies
End If
'-- download the target URL into a byte array
Dim wresp As HttpWebResponse = DirectCast(wreq.GetResponse, HttpWebResponse)
'-- convert response stream to byte array
Dim ebr As New ExtendedBinaryReader(wresp.GetResponseStream)
_ResponseBytes = ebr.ReadToEnd()
'-- determine if body bytes are compressed, and if so,
'-- decompress the bytes
Dim ContentEncoding As HttpContentEncoding
If wresp.Headers.Item("Content-Encoding") Is Nothing Then
ContentEncoding = HttpContentEncoding.None
Else
Select Case wresp.Headers.Item("Content-Encoding").ToLower
Case "gzip"
ContentEncoding = HttpContentEncoding.Gzip
Case "deflate"
ContentEncoding = HttpContentEncoding.Deflate
Case Else
ContentEncoding = HttpContentEncoding.Unknown
End Select
_ResponseBytes = Decompress(_ResponseBytes, ContentEncoding)
End If
'-- sometimes URL is indeterminate, eg, "http://website.com/myfolder"
'-- in that case the folder and file resolution MUST be done on
'-- the server, and returned to the client as ContentLocation
_ContentLocation = wresp.Headers("Content-Location")
If _ContentLocation Is Nothing Then
_ContentLocation = ""
End If
'-- if we have string content, determine encoding type
'-- (must cast to prevent Nothing)
_DetectedContentType = wresp.Headers("Content-Type")
If _DetectedContentType Is Nothing Then
_DetectedContentType = ""
End If
If Me.ResponseIsBinary Then
_DetectedEncoding = Nothing
Else
If _ForcedEncoding Is Nothing Then
_DetectedEncoding = DetectEncoding(_DetectedContentType, _ResponseBytes)
End If
End If
End Sub
''' <summary>
''' decompresses a compressed array of bytes via the specified HTTP compression type
''' </summary>
''' <returns>decompressed array of bytes</returns>
Private Function Decompress(ByVal b() As Byte, ByVal CompressionType As HttpContentEncoding) As Byte()
Dim s As Stream
Select Case CompressionType
Case HttpContentEncoding.Deflate
s = New Zip.Compression.Streams.InflaterInputStream(New MemoryStream(b), _
New Zip.Compression.Inflater(True))
Case HttpContentEncoding.Gzip
s = New GZip.GZipInputStream(New MemoryStream(b))
Case Else
Return b
End Select
Dim ms As New MemoryStream
Const chunkSize As Integer = 2048
Dim sizeRead As Integer
Dim unzipBytes(chunkSize) As Byte
While True
sizeRead = s.Read(unzipBytes, 0, chunkSize)
If sizeRead > 0 Then
ms.Write(unzipBytes, 0, sizeRead)
Else
Exit While
End If
End While
s.Close()
Return ms.ToArray
End Function
''' <summary>
''' download URL contents to a file, using HTTP compression if possible
''' </summary>
Public Sub DownloadFile(ByVal Url As String, ByVal FilePath As String)
DownloadFile(Url, FilePath, DateTime.MinValue)
End Sub
''' <summary>
''' download URL contents to a file, using HTTP compression if possible
''' URL contents will only be downloaded if newer than the specified date
''' </summary>
Public Sub DownloadFile(ByVal Url As String, ByVal FilePath As String, ByVal ifModifiedSince As DateTime)
GetUrlData(Url, ifModifiedSince)
SaveResponseToFile(FilePath)
End Sub
Private Sub SaveResponseToFile(ByVal FilePath As String)
Dim fs As FileStream
Dim bw As BinaryWriter
Try
fs = New FileStream(FilePath, FileMode.OpenOrCreate)
bw = New BinaryWriter(fs)
bw.Write(_ResponseBytes)
bw.Close()
Finally
If Not fs Is Nothing Then fs.Close()
End Try
Return
End Sub
''' <summary>
''' download URL contents to an array of bytes, using HTTP compression if possible
''' </summary>
Public Function DownloadBytes(ByVal Url As String) As Byte()
Return DownloadBytes(Url, DateTime.MinValue)
End Function
''' <summary>
''' download URL contents to an array of bytes, using HTTP compression if possible
''' URL contents will only be downloaded if newer than the specified date
''' </summary>
Public Function DownloadBytes(ByVal Url As String, ByVal ifModifiedSince As DateTime) As Byte()
GetUrlData(Url, ifModifiedSince)
Return _ResponseBytes
End Function
''' <summary>
''' download URL contents to a string (with appropriate encoding), using HTTP compression if possible
''' </summary>
Public Function DownloadString(ByVal Url As String) As String
GetUrlData(Url, DateTime.MinValue)
Return Me.ResponseString
End Function
''' <summary>
''' download URL contents to a string (with appropriate encoding), using HTTP compression if possible
''' URL contents will only be downloaded if newer than the specified date
''' </summary>
Public Function DownloadString(ByVal Url As String, ByVal ifModifiedSince As DateTime) As String
GetUrlData(Url, ifModifiedSince)
Return Me.ResponseString
End Function
''' <summary>
''' clears any downloaded content and resets all properties to default values
''' </summary>
Public Sub Clear()
ClearDownload()
_DefaultEncoding = System.Text.Encoding.GetEncoding("Windows-1252")
_ForcedEncoding = Nothing
_AuthenticationRequired = False
_ProxyAuthenticationRequired = False
_ProxyUrl = ""
_ProxyUser = ""
_ProxyPassword = ""
_AuthenticationUser = ""
_AuthenticationPassword = ""
_KeepCookies = False
_RequestTimeoutMilliseconds = 60000
_HttpUserAgent = _
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"
End Sub
''' <summary>
''' clears any downloaded content
''' </summary>
Public Sub ClearDownload()
_ResponseBytes = Nothing
_DetectedEncoding = Nothing
_DetectedContentType = ""
_ContentLocation = ""
_PersistedCookies = Nothing
End Sub
#Region " ExtendedBinaryReader"
''' <summary>
''' Extends the <c>System.IO.BinaryReader</c> class by a <c>ReadToEnd</c>
''' method that can be used to read a whole file.
''' </summary>
''' <remarks>
''' http://dotnet.mvps.org/dotnet/faqs/?id=readfile&lang=en
''' </remarks>
Public Class ExtendedBinaryReader
Inherits BinaryReader
''' <summary>
''' Creates a new instance of the <c>ExtendedBinaryReader</c> class.
''' </summary>
''' <param name="Input">A stream.</param>
Public Sub New(ByVal Input As Stream)
MyBase.New(Input)
End Sub
''' <summary>
''' Creates a new instance of the <c>ExtendedBinaryReader</c> class.
''' </summary>
''' <param name="Input">The provided stream.</param>
''' <param name="Encoding">The character encoding.</param>
Public Sub New(ByVal Input As Stream, ByVal Encoding As System.Text.Encoding)
MyBase.New(Input, Encoding)
End Sub
''' <summary>
''' Reads the whole data in the base stream and returns it in an
''' array of bytes.
''' </summary>
''' <returns>The streams whole binary data.</returns>
Public Function ReadToEnd() As Byte()
Return ReadToEnd(Short.MaxValue)
End Function
''' <summary>
''' Reads the whole data in the base stream and returns it in an
''' array of bytes.
''' </summary>
''' <param name="InitialLength">The initial buffer length.</param>
''' <returns>The stream's whole binary data.</returns>
' Based on an implementation by Jon Skeet [MVP]
' (<URL:http://www.yoda.arachsys.com/csharp/readbinary.html>).
Public Function ReadToEnd(ByVal InitialLength As Integer) As Byte()
' If an unhelpful initial length was passed, just use 32K.
If InitialLength < 1 Then
InitialLength = Short.MaxValue
End If
Dim Buffer(InitialLength - 1) As Byte
Dim Read As Integer
Dim Chunk As Integer = _
Me.BaseStream.Read(Buffer, Read, Buffer.Length - Read)
Do While Chunk > 0
Read = Read + Chunk
' If the end of the buffer is reached, check to see if there is
' any more data.
If Read = Buffer.Length Then
Dim NextByte As Integer = Me.BaseStream.ReadByte()
' If the end of the stream is reached, we are done.
If NextByte = -1 Then
Return Buffer
End If
' Nope. Resize the buffer, put in the byte we have just
' read, and continue.
Dim NewBuffer(Buffer.Length * 2 - 1) As Byte
System.Buffer.BlockCopy( _
Buffer, _
0, _
NewBuffer, _
0, _
Buffer.Length _
)
'Array.Copy(Buffer, NewBuffer, Buffer.Length)
NewBuffer(Read) = CByte(NextByte)
Buffer = NewBuffer
Read = Read + 1
End If
Chunk = Me.BaseStream.Read(Buffer, Read, Buffer.Length - Read)
Loop
' The buffer is now too big. Shrink it.
Dim ReturnBuffer(Read - 1) As Byte
System.Buffer.BlockCopy(Buffer, 0, ReturnBuffer, 0, Read)
'Array.Copy(Buffer, ReturnBuffer, Read)
Return ReturnBuffer
End Function
End Class
#End Region
End Class