Click here to Skip to main content
15,881,938 members
Articles / Desktop Programming / Win32

Making a Search Engine

Rate me:
Please Sign up or sign in to vote.
4.94/5 (51 votes)
3 May 2013CPOL6 min read 239.6K   27.6K   124  
This article discusses the making of a search engine.
Imports System.Security.Cryptography
Imports System.Text
Imports System.Net

Public Module func
    Public Function GetMd5Hash(ByVal input As String) As String
        Dim md5hash As MD5 = MD5.Create()
        ' Convert the input string to a byte array and compute the hash. 
        Dim data As Byte() = md5hash.ComputeHash(Encoding.UTF8.GetBytes(input))
        ' Create a new Stringbuilder to collect the bytes 
        ' and create a string. 
        Dim sBuilder As New StringBuilder()
        ' Loop through each byte of the hashed data  
        ' and format each one as a hexadecimal string. 
        Dim i As Integer
        For i = 0 To data.Length - 1
            sBuilder.Append(data(i).ToString("x2"))
        Next i
        ' Return the hexadecimal string. 
        Return sBuilder.ToString()
    End Function

    ' Get the source code of any website
    Public Function get_SouceCode(ByVal url As String) As String
        Try
            ' Create the request using the WebRequestFactory.
            Dim requestScrape As HttpWebRequest = CType(WebRequest.Create(url), HttpWebRequest)
            Dim responseScrape As HttpWebResponse = Nothing
            With requestScrape
                .UserAgent = "Kshitij Crawler"
                .Method = "GET"
                .Timeout = 10000
            End With
            ' Return the response stream.
            Console.WriteLine(String.Format("get_SouceCode::->Waiting for source code {0} ", url))
            responseScrape = CType(requestScrape.GetResponse(), HttpWebResponse)
            Dim sr As IO.StreamReader = New IO.StreamReader(responseScrape.GetResponseStream())
            Dim str As String = sr.ReadToEnd()
            sr.Dispose()
            sr = Nothing
            responseScrape.Close()
            If responseScrape.StatusCode = HttpStatusCode.OK Then
                Return str
            Else
                Return String.Empty
            End If
        Catch ex As Exception
            Console.WriteLine(String.Format("get_SouceCode::->error with {0} >> {1} ", url, ex.Message))
            Return String.Empty
        End Try
    End Function

    Public Function url_type(ByVal url As String) As URL_Protocol
        If (url.Contains("#")) Then 'simply i hate internal linking bcz it cause a lot of prblm
            Return URL_Protocol.none
        ElseIf url.StartsWith("https://") Then
            Return URL_Protocol.https
        ElseIf url.StartsWith("http://") Then
            Return URL_Protocol.http
        Else
            Return URL_Protocol.none
        End If
    End Function

    Public Sub sanitize_link(ByRef url As String)
        If url.Contains("#") Then
            url = url.Substring(0, url.IndexOf("#"))
        End If
    End Sub
End Module

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Student
India India
I just love coding. But due to my studies it became very tough for me to manage both.

Comments and Discussions