Click here to Skip to main content
14,546,383 members
Rate this:
Please Sign up or sign in to vote.
See more:
I need to convert a Html Table to DataTable in C#. I used HtmlAgilityPack but it does not convert it well because of rowspans. The code I am currently using is:

 private static DataTable convertHtmlTableToDataTable()
    {
        WebClient webClient = new WebClient();
        string urlContent = webClient.DownloadString("http://example.com");

        string tableCode = getTableCode(urlContent);

        string htmlCode = tableCode.Replace(" ", " ");

        HtmlDocument doc = new HtmlDocument();
        doc.LoadHtml(htmlCode);
        var headers = doc.DocumentNode.SelectNodes("//tr/th");
        DataTable table = new DataTable();

        foreach (HtmlNode header in headers)
        {
            table.Columns.Add(header.InnerText);
        }
        foreach (var row in doc.DocumentNode.SelectNodes("//tr[td]"))
        {
            table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
        }
        return table;
    }

And this is a part of Html Table:

 <pre lang="HTML"> <table class="tabel" cellspacing="0" border="0">
    <caption style="font-family:Verdana; font-size:20px;">SEMGRP</caption>
    <tr>
        <th class="celula" >Ora</th>
        <th  class="latime_celula celula">Luni</th>
        <th  class="latime_celula celula">Marti</th>
        <th  class="latime_celula celula">Miercuri</th>
        <th  class="latime_celula celula">Joi</th>
        <th  class="latime_celula celula">Vineri</th>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">8-9</td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   
                                            Curs    
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a> 
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                            <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">
                                            Curs    
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a> 
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=12&tip=s">Sala</a>  
                                            <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula"> </td>
        <td class="celula"> </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">
                                        Curs
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">9-10</td>
        <td class="celula"> </td>
        <td class="celula"> </td>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">10-11</td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=303&tip=p">Prof</a>
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=331&tip=p">Prof</a>
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=14&tip=s">Sala</a>  
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=330&tip=p">Prof</a>   
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a> 
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula"> </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=10&tip=s">Sala</a>  <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">11-12</td>
        <td class="celula"> </td>
    </tr>
    <tr>


I tried some solutions but I did not find any good...

What I have tried:

Thanks for any help in advance.
Posted
Updated 22-Apr-17 21:58pm
Rate this:
Please Sign up or sign in to vote.

Solution 1

This seems to be a good library.
Cross framework (WinForms/WPF/PDF/Metro/Mono/etc.), Multipurpose (UI Controls / Image generation / PDF generation / etc.), 100% managed (C#), High performance HTML Rendering library: HTML Renderer - Home[^]
   
Rate this:
Please Sign up or sign in to vote.

Solution 2

private static DataTable convertHtmlTableToDataTable()
{
WebClient webClient = new WebClient();
string urlContent = webClient.DownloadString("http://example.com");

string tableCode = getTableCode(urlContent);

string htmlCode = tableCode.Replace(" ", " ");

HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlCode);
var headers = doc.DocumentNode.SelectNodes("//tr/th");
DataTable table = new DataTable();

foreach (HtmlNode header in headers)
{
table.Columns.Add(header.InnerText);
}
foreach (var row in doc.DocumentNode.SelectNodes("//tr[td]"))
{
table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
}
return table;
}

And this is a part of Html Table:

<table class="tabel" cellspacing="0" border="0"><caption style="font-family:Verdana; font-size:20px;">SEMGRP</caption><tbody><tr><th class="celula">Ora</th><th class="latime_celula celula">Luni</th><th class="latime_celula celula">Marti</th><th class="latime_celula celula">Miercuri</th><th class="latime_celula celula">Joi</th><th class="latime_celula celula">Vineri</th></tr><tr><td class="celula" nowrap="nowrap">8-9</td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">
                                        Curs
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a>
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                        <br>
                                    </td></tr></tbody></table>
    </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">
                                        Curs
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a>
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=12&tip=s">Sala</a>
                                        <br>
                                    </td></tr></tbody></table>
    </td><td class="celula"> </td><td class="celula"> </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">
                                    Curs
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td></tr><tr><td class="celula" nowrap="nowrap">9-10</td><td class="celula"> </td><td class="celula"> </td></tr><tr><td class="celula" nowrap="nowrap">10-11</td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=303&tip=p">Prof</a>
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=331&tip=p">Prof</a>
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=14&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=330&tip=p">Prof</a>
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td><td class="celula"> </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=10&tip=s">Sala</a>  <br>
                                    </td></tr></tbody></table>
    </td></tr><tr><td class="celula" nowrap="nowrap">11-12</td><td class="celula"> </td></tr><tr></tr></tbody></table>
   

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)




CodeProject, 503-250 Ferrand Drive Toronto Ontario, M3C 3G8 Canada +1 416-849-8900 x 100