Click here to Skip to main content
15,885,906 members
Articles / Programming Languages / C#

Implementing a TextReader to extract various files contents using IFilter

Rate me:
Please Sign up or sign in to vote.
4.89/5 (14 votes)
9 Feb 2011Eclipse3 min read 146.2K   4.1K   82  
A solution that can extract various file contents using an IFilter implementation. Special thanks to Eyal Post and his article 'Using IFilter in C# '.
using System;
using System.Text;
using Eclipse.IndexingService;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.IO;

namespace IFilterTest
{
    
    
    /// <summary>
    ///This is a test class for FilterReaderTest and is intended
    ///to contain all FilterReaderTest Unit Tests
    ///</summary>
    [TestClass()]
    public class FilterReaderTest
    {
        private TestContext testContextInstance;
        private byte[] pdf;
        private byte[] doc;
        private byte[] docx;
        private byte[] htm; //gb2312


        /// <summary>
        ///Gets or sets the test context which provides
        ///information about and functionality for the current test run.
        ///</summary>
        public TestContext TestContext
        {
            get
            {
                return testContextInstance;
            }
            set
            {
                testContextInstance = value;
            }
        }

        #region Additional test attributes
        // 
        //You can use the following additional attributes as you write your tests:
        //
        //Use ClassInitialize to run code before running the first test in the class
        //[ClassInitialize()]
        //public static void MyClassInitialize(TestContext testContext)
        //{
        //}
        //
        //Use ClassCleanup to run code after all tests in a class have run
        [ClassCleanup()]
        public static void MyClassCleanup()
        {
            FilterLoader.ReleaseClassCache();
        }
        //
        //Use TestInitialize to run code before running each test
        [TestInitialize()]
        public void MyTestInitialize()
        {
            pdf = Properties.Resources.This_is_a_simple_text_pdf;
            doc = Properties.Resources.This_is_a_simple_text_doc;
            docx = Properties.Resources.This_is_a_simple_text_docx;
            htm = Properties.Resources.This_is_a_simple_text_htm;


        }
        //
        //Use TestCleanup to run code after each test has run
        //[TestCleanup()]
        //public void MyTestCleanup()
        //{
        //}
        //
        #endregion




        /// <summary>
        ///A test for ReadToEnd
        ///</summary>
        [TestMethod()]
        public void ReadToEndTest()
        {
#if DEBUG
            var fileName = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), Guid.NewGuid() + ".txt");
#else
            var fileName = Path.Combine(TestContext.TestDeploymentDir, Guid.NewGuid() + ".txt");
#endif

            var tmpStr = new StringBuilder();
            var random = new Random();
            for (int i = 0; i < 100000; i++)
            {
                var c = (char) random.Next(0, 127);
                tmpStr.Append(c);
            }
            var expected = tmpStr.ToString();
            File.WriteAllText(fileName, expected, Encoding.UTF8);
            using (var target = new FilterReader(fileName, 0x10000))
            {
                target.Init();
                var actual = target.ReadToEnd();
                Assert.AreEqual(expected, actual);
            }
            File.Delete(fileName);
        }

        /// <summary>
        ///A test for FilterReader Constructor
        ///</summary>
        [TestMethod()]
        public void FilterReaderConstructorTest5()
        {
            var target = new FilterReader(doc, ".doc", 0x50);
            target.Init();
            var actual = target.ReadToEnd();
            var expected = " This is a simple text\r\n\r\nIFilter is wonderful\r\n";
            Assert.AreEqual(expected, actual);
            target = new FilterReader(pdf, ".pdf", 0x2);
            target.Init();
            actual = target.ReadToEnd();
            expected = "\r\nThis is a simple text IFilter is wonderful ";
            Assert.AreEqual(expected, actual);
            target = new FilterReader(htm, ".htm", 0x50);
            target.Init();
            actual = target.ReadToEnd();
            Assert.IsTrue(actual.IndexOf("This is a simple text") != -1);
            Assert.IsTrue(actual.IndexOf("IFilter  is wonderful") != -1);
        }

        /// <summary>
        ///A test for FilterReader Constructor
        ///</summary>
        [TestMethod()]
        public void FilterReaderConstructorTest4()
        {
            var target = new FilterReader(doc, ".doc");
            target.Init();
            var actual =target.ReadToEnd();
            Assert.IsFalse(string.IsNullOrEmpty(actual));
        }

        /// <summary>
        ///A test for FilterReader Constructor
        ///</summary>
        [TestMethod()]
        public void FilterReaderConstructorTest3()
        {
            var target = new FilterReader(htm, 0x100);
            target.Init();
            var actual = target.ReadToEnd();
            Assert.IsFalse(string.IsNullOrEmpty(actual));
        }


        /// <summary>
        ///A test for FilterReader Constructor
        ///</summary>
        [TestMethod()]
        public void FilterReaderConstructorTest()
        {
            var target = new FilterReader(htm);
            target.Init();
            var actual = target.ReadToEnd();
            Assert.IsFalse(string.IsNullOrEmpty(actual));
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Eclipse Public License 1.0


Written By
Technical Lead HP
China China
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions