Click here to Skip to main content
15,897,226 members
Articles / Programming Languages / C#

Steganography 13 - Hiding Binary Data in HTML Documents

Rate me:
Please Sign up or sign in to vote.
4.94/5 (45 votes)
13 Mar 2008CPOL5 min read 181.8K   1.7K   52  
Some ideas on how to hide binary data in text documents
/* This class has been written by
 * Corinna John (Hannover, Germany)
 * cj@binary-universe.net
 * 
 * You may do with this code whatever you like,
 * except selling it or claiming any rights/ownership.
 * 
 * Please send me a little feedback about what you're
 * using this code for and what changes you'd like to
 * see in later versions. (And please excuse my bad english.)
 * 
 * WARNING: This is experimental code.
 * Exception handling has been omitted,
 * to keep the code readable to people who want
 * to understand the algorithm.
 * Please do not expect "Release Quality".
 * */

#region Using directives

using System;
using System.IO;
using System.Data;
using System.Text;
using System.Collections;
using System.Collections.Specialized;

#endregion

namespace SteganoHtml {
    public class HtmlUtility {
        public HtmlUtility() {}

        /// <summary>Counts the key attribute couples in an HTML document</summary>
        /// <param name="sourceFileName">Path and name of the HTML document</param>
        /// <param name="keyTable">DataTable with the key attributes</param>
        /// <returns>Count of bytes that can be hidden in the specified document</returns>
        public int GetCapacity(String sourceFileName, DataTable keyTable) {
            int countCarrierCouples = 0;

            StreamReader reader = new StreamReader(sourceFileName, Encoding.Default);
            String htmlDocument = reader.ReadToEnd();
            reader.Close();

            HtmlTagCollection tags = FindTags(htmlDocument);
            StringBuilder insertTextBuilder = new StringBuilder();
            DataRow[] rows;
            HtmlAttribute secondAttribute;

            foreach (HtmlTag tag in tags) {
                foreach (HtmlAttribute attribute in tag.Attributes) {
                    if (!attribute.Handled) {
						rows = keyTable.Select("firstAttribute = '" + attribute.Name.Replace("'", "''") + "'");
                        if (rows.Length > 0) {
                            secondAttribute = FindAttribute(rows[0]["secondAttribute"].ToString(), tag.Attributes);
                            if (secondAttribute != null) {
                                countCarrierCouples++;
                            }
                        }
                    }
                }
            }

            return countCarrierCouples;
        }

        /// <summary>Encode one bit as a combination of attributes, add the resulting text to a StringBuilder</summary>
        /// <param name="messageByte">Current byte</param>
        /// <param name="bitIndex">Current position in [messageByte]</param>
        /// <param name="firstAttribute">Key attribute</param>
        /// <param name="secondAttribute">Corresponding attribute</param>
        /// <param name="insertTextBuilder">Receives the new HTML text</param>
        private void HideBit(int messageByte, int bitIndex, HtmlAttribute firstAttribute, HtmlAttribute secondAttribute, StringBuilder insertTextBuilder) {
            String firstAttributeText, secondAttributeText;

            if (firstAttribute.Value.Length > 0) {
                firstAttributeText = String.Format("{0}={1}", firstAttribute.Name, firstAttribute.Value);
            } else {
                firstAttributeText = firstAttribute.Name;
            }

            if (secondAttribute.Value.Length > 0) {
                secondAttributeText = String.Format("{0}={1}", secondAttribute.Name, secondAttribute.Value);
            } else {
                secondAttributeText = secondAttribute.Name;
            }

            if (GetBit(messageByte, bitIndex)) {
                //bit is true
                insertTextBuilder.AppendFormat(
                    @" {0} {1}",
                    firstAttributeText,
                    secondAttributeText);
            } else {
                //bit is false
                insertTextBuilder.AppendFormat(
                    @" {0} {1}",
                    secondAttributeText,
                    firstAttributeText);
            }
        }

        /// <summary>Hide a message in an HTML document</summary>
        /// <param name="sourceFileName">Path and name of the HTML document</param>
        /// <param name="destinationFileName">Path and name to save the resulting HTML document</param>
        /// <param name="message">The message to hide</param>
        /// <param name="keyTable">DataTable with the key attributes</param>
        public void Hide(String sourceFileName, String destinationFileName, Stream message, DataTable keyTable) {
            //read the carrier document
            StreamReader reader = new StreamReader(sourceFileName, Encoding.Default);
            String htmlDocument = reader.ReadToEnd();
            reader.Close();

            message.Position = 0;

            //list the HTML tags
            HtmlTagCollection tags = FindTags(htmlDocument);

            StringBuilder insertTextBuilder = new StringBuilder();
            DataRow[] rows;
            HtmlAttribute secondAttribute;
            int offset = 0;
            int bitIndex = 7;
            int messageByte = 0;

            foreach (HtmlTag tag in tags) {

                insertTextBuilder.Remove(0, insertTextBuilder.Length);
                insertTextBuilder.AppendFormat("<{0}", tag.Name);

                foreach (HtmlAttribute attribute in tag.Attributes) {

                    if (!attribute.Handled) { //attribute has not been used, yet
                        
                        //find key row for this attribute
						rows = keyTable.Select(String.Format("firstAttribute = '{0}'", attribute.QueryFormattedName));
                        if (rows.Length > 0) {
                            
                            //find corresponding attribute
                            secondAttribute = FindAttribute(rows[0]["secondAttribute"].ToString(), tag.Attributes);
                            if (secondAttribute != null) {

                                if (bitIndex == 7) {
                                    //get next message byte
                                    bitIndex = 0;
                                    messageByte = message.ReadByte();
                                } else {
                                    //next bit
                                    bitIndex++;
                                }

                                //change the attributes' order 
                                HideBit(messageByte, bitIndex, attribute, secondAttribute, insertTextBuilder);

                                //mark both attributes as handled
                                attribute.Handled = true;
                                secondAttribute.Handled = true;
                            }
                        }

                        if (!attribute.Handled) {
                            //the attribute is not a primary key attribute. Is it a secondary key attribute?
                            bool copyAttribute = false;
							rows = keyTable.Select(String.Format("secondAttribute = '{0}'", attribute.QueryFormattedName));
                            
                            if(rows.Length > 0){
                                //if the corresponding first attribute does not exist in this tag or has already been used,
                                //this attribute will not be used and must be copied.
                                HtmlAttribute firstAttribute = FindAttribute(rows[0]["firstAttribute"].ToString(), tag.Attributes);
                                if (firstAttribute == null) {
                                    copyAttribute = true;
                                }else{
                                    copyAttribute = firstAttribute.Handled;
                                }
                            }
                            
                            else if (rows.Length == 0) {
                                //this attribute is not part of the key and must be copied.
                                copyAttribute = true;
                            }

                            if (copyAttribute) {
                                //copy unused attribute
                                insertTextBuilder.AppendFormat(
                                    @" {0}={1}",
                                    attribute.Name, attribute.Value);

                                attribute.Handled = true;
                            }
                        }
                    }
                }

                //replace old tag with new tag

                tag.BeginPosition += offset;
                tag.EndPosition += offset;

                String insertText = insertTextBuilder.ToString();
                int newLength = insertText.Length;
                if (newLength > 0) {
                    int oldLength = tag.EndPosition - tag.BeginPosition;
                    htmlDocument = htmlDocument.Remove(tag.BeginPosition, oldLength);
                    htmlDocument = htmlDocument.Insert(tag.BeginPosition, insertText);

                    offset += (newLength - oldLength);
                }

                if (messageByte < 0) {
                    break; //finished
                }
            }

            //save the new document
            StreamWriter writer = new StreamWriter(destinationFileName);
            writer.Write(htmlDocument);
            writer.Close();
        }

        /// <summary>Extract one bit, add it to a Stream</summary>
        /// <param name="firstAttributePosition">Position of the key attribute in the source document</param>
        /// <param name="secondAttributePosition">Position of the corresponding attribute in the source document</param>
        /// <param name="messageByte">Current message byte</param>
        /// <param name="bitIndex">Current bit index</param>
        /// <param name="message">Message stream</param>
        /// <returns>New message byte</returns>
        private byte ExtractBit(int firstAttributePosition, int secondAttributePosition, byte messageByte, int bitIndex, Stream message) {
            if (firstAttributePosition < secondAttributePosition) {
                messageByte = SetBit(messageByte, bitIndex, true);
            } else {
                messageByte = SetBit(messageByte, bitIndex, false);
            }

            if (bitIndex == 7) {
                //save to message byte
                message.WriteByte(messageByte);
                messageByte = 0;
            }

            return messageByte;
        }

        /// <summary>Extract a hidden message from an HTML document</summary>
        /// <param name="sourceFileName">Path and name of the HTML document</param>
        /// <param name="message">Empty stream for the message</param>
        /// <param name="keyTable">DataTable with the key attributes</param>
        public void Extract(String sourceFileName, Stream message, DataTable keyTable) {
            //read the carrier document
            StreamReader reader = new StreamReader(sourceFileName, Encoding.Default);
            String htmlDocument = reader.ReadToEnd();
            reader.Close();

            //list the HTML tags
            HtmlTagCollection tags = FindTags(htmlDocument);

            StringBuilder insertTextBuilder = new StringBuilder();
            DataRow[] rows;
            HtmlAttribute secondAttribute;
            int attributePosition, secondAttributePosition;
            
            int messageLength = 0;
            int bitIndex = 0;
            byte messageByte = 0;

            foreach (HtmlTag tag in tags) {
                foreach (HtmlAttribute attribute in tag.Attributes) {

                    if (!attribute.Handled) { //attribute has not been used, yet

                        //find key row for this attribute
                        rows = keyTable.Select(String.Format("firstAttribute = '{0}'", attribute.QueryFormattedName));
                        if (rows.Length > 0) {

                            //find corresponding attribute
                            secondAttribute = FindAttribute(rows[0]["secondAttribute"].ToString(), tag.Attributes);
                            if (secondAttribute != null) {

                                attributePosition = htmlDocument.IndexOf(attribute.Name, tag.BeginPosition);
                                secondAttributePosition = htmlDocument.IndexOf(secondAttribute.Name, tag.BeginPosition);

                                //compare the attributes' positions
                                messageByte = ExtractBit(attributePosition, secondAttributePosition, messageByte, bitIndex, message);

                                //next bit
                                if (bitIndex == 7) {
                                    bitIndex = 0;

                                    if ((message.Length == 1) && (messageLength == 0)) {
                                        //read length
                                        message.Position = 0;
                                        BinaryReader binaryReader = new BinaryReader(message);
                                        messageLength = binaryReader.ReadByte();
                                        reader = null;
                                        message.SetLength(0);
                                        message.Position = 0;
                                    }
                                    else if ((messageLength > 0) && (message.Length == messageLength)) {
                                        break; //finished
                                    }

                                } else {
                                    bitIndex++;
                                }

                                //mark both attributes as handled
                                attribute.Handled = true;
                                secondAttribute.Handled = true;
                            }
                        }

                        if (!attribute.Handled) {
                            rows = keyTable.Select(String.Format("secondAttribute = '{0}'", attribute.QueryFormattedName));
                            if (rows.Length == 0) {
                                //tag not used
                                attribute.Handled = true;
                            }
                        }
                    }
                }

                if ((messageLength > 0) && (message.Length == messageLength)) {
                    break; //finished
                }
            }
        }

        /// <summary>Find the attribute with a specific name</summary>
        /// <param name="name">Name of the attribute</param>
        /// <param name="attributes">Attributes of a tag</param>
        /// <returns>The attribute found in [attributes], or null</returns>
        private HtmlAttribute FindAttribute(String name, HtmlAttributeCollection attributes) {
            HtmlAttribute foundAttribute = null;
            foreach (HtmlAttribute attribute in attributes) {
                if ((!attribute.Handled) && (attribute.Name == name)) {
                    foundAttribute = attribute;
                    break;
                }
            }
            return foundAttribute;
        }

        /// <summary>List all HTML tags of a document</summary>
        /// <param name="htmlDocument"></param>
        /// <returns>List with</returns>
        private HtmlTagCollection FindTags(String htmlDocument) {
            HtmlTagCollection tags = new HtmlTagCollection();
            int indexStart = 0, indexEnd = 0;
            String text;
            do {

                indexStart = htmlDocument.IndexOf('<', indexEnd + 1);
                if (indexStart > 0) {
                    indexEnd = htmlDocument.IndexOf('>', indexStart + 1);
                    if (indexEnd > 0) {
                        if (htmlDocument[indexStart + 1] != '/') {
                            //Ende vom Start-Tag gefunden
                            text = htmlDocument.Substring(indexStart, indexEnd - indexStart);
                            tags.Add(new HtmlTag(text, indexStart, indexEnd));
                        }
                    }
                }

            } while (indexStart > 0);

            return tags;
        }

        /// <summary>Get the value of a bit</summary>
        /// <param name="b">The byte value</param>
        /// <param name="position">The position of the bit</param>
        /// <returns>The value of the bit</returns>
        private bool GetBit(int b, int position) {
            return ((b & (byte)(1 << position)) != 0);
        }

        /// <summary>Set a bit to [newBitValue]</summary>
        /// <param name="b">The byte value</param>
        /// <param name="position">The position (1-8) of the bit</param>
        /// <param name="newBitValue">The new value of the bit in position [position]</param>
        /// <returns>The new byte value</returns>
        private byte SetBit(byte b, int position, bool newBitValue) {
            byte mask = (byte)(1 << position);
            if (newBitValue) {
                return (byte)(b | mask);
            } else {
                return (byte)(b & ~mask);
            }
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
Germany Germany
Corinna lives in Hanover/Germany and works as a C# developer.

Comments and Discussions