Click here to Skip to main content
15,881,248 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
I convert the word file to txt file, when the conversion word page break convert as special symbol in txt file, any one know how to delete all page break in word via c#, my coding is below


C#
private void button1_Click(object sender, RibbonControlEventArgs e)
{
    Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument;
    string filename = Globals.ThisAddIn.Application.ActiveDocument.Name;
    ReBitsCls c = new ReBitsCls(doc);
    using (StreamWriter Swriter = new StreamWriter(@"filename + ".txt", true))
    {
        foreach (Microsoft.Office.Interop.Word.Range range in doc.StoryRanges)
        {
            if (range.StoryType.Equals(Word.WdStoryType.wdMainTextStory))
            {
                foreach (Microsoft.Office.Interop.Word.Paragraph Para in range.Paragraphs)
                {
                    Word.Style style=Para.get_Style() as Word.Style;
                    string stylename = style.NameLocal;
                    Word.Style PreStyle = Para.get_Style() as Word.Style;
                    Word.Style NextStyle = Para.get_Style() as Word.Style;
                    if (stylename != "05.07 Icon" && stylename != "08.09 FigBegin" && stylename != "08.10 FigEnd" && stylename != "08.14 TableEnd" && stylename != "08.13 TableBegin" && stylename != "14.00 EMSect" && stylename != "08.40 AppxEnd" && stylename != "08.39 AppxBegin")
                    {
                        if (Para.Previous() != null)
                        {
                            PreStyle = Para.Previous().Range.get_Style() as Word.Style;
                            string s1 = PreStyle.NameLocal;
                        }
                        string s2 = stylename;
                        if (Para.Next() != null)
                        {
                            NextStyle = Para.Next().Range.get_Style() as Word.Style;
                            string s3 = NextStyle.NameLocal;
                        }

                        if (stylename == "11.28 TPTitle")
                        {
                            Swriter.Write("<tp>");
                        }
                        Swriter.Write(Para.Range.Text);   // Content of Data
                        Swriter.Write("\n");              //  Content of Data
                        if (stylename == "11.28 TPTitle" || stylename == "11.29 TPSubtitle" || stylename == "11.30 TPText" || stylename == "11.31 PubName" || stylename == "11.32 PubLocs" || stylename == "11.33 PubDate" || stylename == "11.18 AuthEd" || stylename == "11.20 Affil")
                        {
                            if ((NextStyle.NameLocal != "11.28 TPTitle" && NextStyle.NameLocal != "11.29 TPSubtitle" && NextStyle.NameLocal != "11.30 TPText" && NextStyle.NameLocal != "11.31 PubName" && NextStyle.NameLocal != "11.32 PubLocs" && NextStyle.NameLocal != "11.33 PubDate" && NextStyle.NameLocal != "11.18 AuthEd" && NextStyle.NameLocal != "11.20 Affil"))
                            {
                                Swriter.Write("</tp>");
                            }
                        }

                    }
                }
            }
            if (range.StoryType.Equals(Word.WdStoryType.wdFootnotesStory) || range.Equals(Word.WdStoryType.wdEndnotesStory))
            {
                Swriter.Write(range.Text);
            }
        }
    }
}
Posted
Comments
Sergey Alexandrovich Kryukov 24-Oct-14 23:54pm    
Not clear. Text file don't have a concept of page break, so how it could be ignored or not ignored?
—SA
Maciej Los 25-Oct-14 15:17pm    
You hit a 10, Sergey!
Sergey Alexandrovich Kryukov 25-Oct-14 23:19pm    
Thank you, Maciej.
—SA
Member 10242311 25-Oct-14 1:17am    
yes I know txt file don't have page break, when I convert word to txt , word file contains page break, this is reflect in txt file as "FF" with black color highlight format, and word file contains table also same problem occurred in table data it shows like "BEL" data1 "BEL" data2 "BEL" data3 "BEL", and footnote has similar problem it shows "STX" instead of footnote number
BillWoodruff 25-Oct-14 1:33am    
What is your goal here: to get all the text from a Word file into a text file except for page-breaks, tables, etc. ?

1 solution

There is no functinality called 'page break' in text files!

If you want to save text as text file, simple use SaveAs[^] method, but it exports entire text.

I'd suggest to use Range.Information[^] to recognize what kind of Range it is. If it is a wdInHeaderFooter[^], do not export it!
Please, read it: How to: Programmatically Define and Select Ranges in Documents[^]
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900