Click here to Skip to main content
15,894,291 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
I need to convert a Html Table to DataTable in C#. I used HtmlAgilityPack but it does not convert it well because of rowspans. The code I am currently using is:

 private static DataTable convertHtmlTableToDataTable()
    {
        WebClient webClient = new WebClient();
        string urlContent = webClient.DownloadString("http://example.com");

        string tableCode = getTableCode(urlContent);

        string htmlCode = tableCode.Replace(" ", " ");

        HtmlDocument doc = new HtmlDocument();
        doc.LoadHtml(htmlCode);
        var headers = doc.DocumentNode.SelectNodes("//tr/th");
        DataTable table = new DataTable();

        foreach (HtmlNode header in headers)
        {
            table.Columns.Add(header.InnerText);
        }
        foreach (var row in doc.DocumentNode.SelectNodes("//tr[td]"))
        {
            table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
        }
        return table;
    }

And this is a part of Html Table:

 <pre lang="HTML"> <table class="tabel" cellspacing="0" border="0">
    <caption style="font-family:Verdana; font-size:20px;">SEMGRP</caption>
    <tr>
        <th class="celula" >Ora</th>
        <th  class="latime_celula celula">Luni</th>
        <th  class="latime_celula celula">Marti</th>
        <th  class="latime_celula celula">Miercuri</th>
        <th  class="latime_celula celula">Joi</th>
        <th  class="latime_celula celula">Vineri</th>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">8-9</td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   
                                            Curs    
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a> 
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                            <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">
                                            Curs    
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a> 
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=12&tip=s">Sala</a>  
                                            <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula"</td>
        <td class="celula"</td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">
                                        Curs
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">9-10</td>
        <td class="celula"</td>
        <td class="celula"</td>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">10-11</td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=303&tip=p">Prof</a>
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=331&tip=p">Prof</a>
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=14&tip=s">Sala</a>  
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=330&tip=p">Prof</a>   
                                        <br /><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a> 
                                        <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
        <td class="celula"</td>
        <td class="celula" rowspan="2">
                                <table border="0" align="center">
                                    <tr>
                                        <td nowrap="nowrap" align="center">   Curs
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                        <br />
                                        <a class="link_celula" href="afis_n0.php?id_tip=10&tip=s">Sala</a>  <br />
                                        </td>
                                    </tr>
                                </table>
        </td>
    </tr>
    <tr>
        <td class="celula" nowrap="nowrap">11-12</td>
        <td class="celula"</td>
    </tr>
    <tr>


I tried some solutions but I did not find any good...

What I have tried:

Thanks for any help in advance.
Posted
Updated 22-Apr-17 21:58pm

This seems to be a good library.
Cross framework (WinForms/WPF/PDF/Metro/Mono/etc.), Multipurpose (UI Controls / Image generation / PDF generation / etc.), 100% managed (C#), High performance HTML Rendering library: HTML Renderer - Home[^]
 
Share this answer
 
private static DataTable convertHtmlTableToDataTable()
{
WebClient webClient = new WebClient();
string urlContent = webClient.DownloadString("http://example.com");

string tableCode = getTableCode(urlContent);

string htmlCode = tableCode.Replace(" ", " ");

HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlCode);
var headers = doc.DocumentNode.SelectNodes("//tr/th");
DataTable table = new DataTable();

foreach (HtmlNode header in headers)
{
table.Columns.Add(header.InnerText);
}
foreach (var row in doc.DocumentNode.SelectNodes("//tr[td]"))
{
table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
}
return table;
}

And this is a part of Html Table:

HTML
<table class="tabel" cellspacing="0" border="0"><caption style="font-family:Verdana; font-size:20px;">SEMGRP</caption><tbody><tr><th class="celula">Ora</th><th class="latime_celula celula">Luni</th><th class="latime_celula celula">Marti</th><th class="latime_celula celula">Miercuri</th><th class="latime_celula celula">Joi</th><th class="latime_celula celula">Vineri</th></tr><tr><td class="celula" nowrap="nowrap">8-9</td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">
                                        Curs
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a>
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                        <br>
                                    </td></tr></tbody></table>
    </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">
                                        Curs
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=287&tip=p">Prof</a>
                                        <br>
                                        <a class="link_celula" href="afis_n0.php?id_tip=12&tip=s">Sala</a>
                                        <br>
                                    </td></tr></tbody></table>
    </td><td class="celula"> </td><td class="celula"> </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">
                                    Curs
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td></tr><tr><td class="celula" nowrap="nowrap">9-10</td><td class="celula"> </td><td class="celula"> </td></tr><tr><td class="celula" nowrap="nowrap">10-11</td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=303&tip=p">Prof</a>
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=331&tip=p">Prof</a>
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=14&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=330&tip=p">Prof</a>
                                    <br><a class="link_celula" href="afis_n0.php?id_tip=9&tip=s">Sala</a>
                                    <br>
                                    </td></tr></tbody></table>
    </td><td class="celula"> </td><td class="celula" rowspan="2">




                            <table border="0" align="center"><tbody><tr><td nowrap="nowrap" align="center">   Curs
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=293&tip=p">Prof</a>
                                    <br>
                                    <a class="link_celula" href="afis_n0.php?id_tip=10&tip=s">Sala</a>  <br>
                                    </td></tr></tbody></table>
    </td></tr><tr><td class="celula" nowrap="nowrap">11-12</td><td class="celula"> </td></tr><tr></tr></tbody></table>
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900