Click here to Skip to main content
15,891,513 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
here am using itextsharp dll. i am getting an error like Expected > Tag missing error. while converting html to pdf format.pls can any one help me out from this bug


C#
private string ReadWordDocumentEx(string _strFileName)
        {
            string strReturn = string.Empty;
            try
            {

                StringBuilder sbWordToXML = new StringBuilder();
                using (WordprocessingDocument doc =
                WordprocessingDocument.Open(_strFileName, true))
                {
                    CoreProperties coreProperties = CoreProperties.FromCoreFileProperties(doc.CoreFilePropertiesPart);
                    sbWordToXML.Append("<html>");
                    sbWordToXML.Append("<head>");
                    // sbWordToXML.Append("<title>" + coreProperties.Title + " - " + Path.GetFileName(_strFileName) + " - WordVisualizer</title>");
                    WordDocumentDisplayRenderer objWordDocumentDisplayRenderer = new WordDocumentDisplayRenderer();
                    objWordDocumentDisplayRenderer.RenderStyles(ref sbWordToXML, doc);
                    sbWordToXML.Append("</head>");
                    sbWordToXML.Append("<body>");
                    objWordDocumentDisplayRenderer.RenderDocument(ref sbWordToXML, doc);
                    sbWordToXML.Append("   <div id='Document'>");
                    sbWordToXML.Append("   </div>");
                    sbWordToXML.Append("  </body>");
                    sbWordToXML.Append("</html>");
                    strReturn = sbWordToXML.ToString();
                }
}


above this is my convertion method for text to html format.

and below here is am retrieving html to pdf format through this below code. could you help me for this bug

C#
PdfWriter.GetInstance(pdfDocument, new FileStream(_strDestFileName, FileMode.Create));

                StringReader se = new StringReader(strHTMLText);
                iTextSharp.text.html.simpleparser.HTMLWorker obj = new iTextSharp.text.html.simpleparser.HTMLWorker(pdfDocument);
                pdfDocument.Open();
                obj.Parse(se);

 List list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(se, null);

                ArrayList htmlarraylist = new ArrayList(list);

                //List list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sReader, null);



                foreach (iTextSharp.text.IElement elm in htmlarraylist)
                {

                    pdfDocument.Add(elm);
                }
                blnReturn = true;
                return blnReturn;



[edit]Code moved from reply to question - OriginalGriff[/edit]
Posted
Updated 8-Dec-11 22:24pm
v3
Comments
Sergey Alexandrovich Kryukov 9-Dec-11 2:45am    
How do you know that your input file is really well-formed? What parser do you use and what other parsers say, a web browser, for example?
--SA
Member 8459644 9-Dec-11 3:32am    
private string ReadWordDocumentEx(string _strFileName)
{
string strReturn = string.Empty;
try
{

StringBuilder sbWordToXML = new StringBuilder();
using (WordprocessingDocument doc =
WordprocessingDocument.Open(_strFileName, true))
{
CoreProperties coreProperties = CoreProperties.FromCoreFileProperties(doc.CoreFilePropertiesPart);
sbWordToXML.Append("<html>");
sbWordToXML.Append("<head>");
// sbWordToXML.Append("<title>" + coreProperties.Title + " - " + Path.GetFileName(_strFileName) + " - WordVisualizer</title>");
WordDocumentDisplayRenderer objWordDocumentDisplayRenderer = new WordDocumentDisplayRenderer();
objWordDocumentDisplayRenderer.RenderStyles(ref sbWordToXML, doc);
sbWordToXML.Append("</head>");
sbWordToXML.Append("<body>");
objWordDocumentDisplayRenderer.RenderDocument(ref sbWordToXML, doc);
sbWordToXML.Append(" <div id='Document'>");
sbWordToXML.Append(" </div>");
sbWordToXML.Append(" </body>");
sbWordToXML.Append("</html>");
strReturn = sbWordToXML.ToString();
}
}

above this is my convertion method for text to html format.

and below here is am retrieving html to pdf format through this below code. could you help me for this bug

PdfWriter.GetInstance(pdfDocument, new FileStream(_strDestFileName, FileMode.Create));

StringReader se = new StringReader(strHTMLText);
iTextSharp.text.html.simpleparser.HTMLWorker obj = new iTextSharp.text.html.simpleparser.HTMLWorker(pdfDocument);
pdfDocument.Open();
obj.Parse(se);

List<itextsharp.text.ielement> list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(se, null);

ArrayList htmlarraylist = new ArrayList(list);

//List<itextsharp.text.ielement> list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sReader, null);



foreach (iTextSharp.text.IElement elm in htmlarraylist)
{

pdfDocument.Add(elm);
}
blnReturn = true;
return blnReturn;

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900