65.9K
CodeProject is changing. Read more.
Home

utf8::ostream

starIconstarIconstarIcon
emptyStarIcon
starIcon
emptyStarIcon

3.91/5 (10 votes)

Feb 15, 2004

BSD
viewsIcon

70353

Lightweight utf8 generator

Introduction

Proposed template class is lightweight and fast utf8 generator - output stream. The main purpose of the stream is to provide an easy way to generate XML/HTML "on the fly".

Example:

html::viewer hv;
utf8::oxstream out; 
out << "<HTML>"
    << "<TITLE>" << L"Hello world!" << "</TITLE>"
    << "<BODY>" << L"Hello world!" << "</BODY>"
    << "<HTML>"; 
hv.show(out.data());

Implementation

Main class is a template declared as

template <class T, bool X> class ostream_t : public T {}

Where T is a class - implementation of output (write) buffer.

And X is a boolean parameter. If it is true then stream will do conversion of characters having special XML/HTML meaning, e.g. '<','>', etc.

Class T is an output buffer and shall provide implementation of two write methods.

  void push(unsigned char c);
  void push(const unsigned char *pc, size_t sz);

For details of implementation see source of byte_buffer class below.

Source Code

Source code is pretty compact and you are free to grab it from here and paste anywhere you want:

//
// This file is part of 
// Terra Informatica Lightweight Embeddable HTMEngine control SDK
// Created by Andrew Fedoniouk @ TerraInformatica.com
//
namespace aux 
{
  // byte_buffer class is an in-memory dynamic buffer implementation.
  class byte_buffer 
  {
    unsigned char*  _body;
    size_t          _allocated;
    size_t          _size;   
    unsigned char *reserve(size_t size)
    {
      size_t newsize = _size + size;
      if( newsize > _allocated ) 
      {
        _allocated *= 2;
        if(_allocated < newsize) 
           _allocated = newsize;
        unsigned char *newbody = new unsigned char[_allocated];
        memcpy(newbody,_body,_size);
        delete[] _body;
        _body = newbody;
      }
      return _body + _size;
    }  
   public:    
      byte_buffer():_size(0)      
        { _body = new unsigned char[_allocated = 256]; }
    ~byte_buffer()                { delete[] _body;  }    
    const unsigned char * data()  {  
             if(_size == _allocated) reserve(1); 
             _body[_size] = '\0'; return _body; 
    }    
    size_t length() const         { return _size; }    
    void push(unsigned char c)    { *reserve(1) = c; ++_size; }
    void push(const unsigned char *pc, size_t sz) 
        { memcpy(reserve(sz),pc,sz); _size += sz; }  
  };
}
namespace utf8
{
  // UTF8 stream  // class T must have two methods:
  //   void push(unsigned char c)
  //   void push(const unsigned char *pc, size_t sz)
  // bool X - true - XML markup character conversion 
  // (characters '<','>',etc).
  //          false - no conversion at all.   
template <class T, bool X = true>
  class ostream_t : public T
  {
  public:
    ostream_t()
    { 
      // utf8 byte order mark
      static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF };
      T::push(BOM, sizeof(BOM));
    }    
    // intended to handle only ascii-7 strings
    // use this for markup output 
    ostream_t& operator << (const char* str) 
    { 
      T::push((const unsigned char*)str,strlen(str)); return *this; 
    }    
    // use UNICODE chars for value output
    ostream_t& operator << (const wchar_t* wstr)
    {
      const wchar_t *pc = wstr;
      for(unsigned int c = *pc; c ; c = *(++pc)) 
      {
        if(X)
          switch(c) 
          {
              case '<': *this << "&lt;"; continue;
              case '>': *this << "&gt;"; continue;
              case '&': *this << "&amp;"; continue;
              case '"': *this << "&quot;"; continue;
              case '\'': *this << "&apos;"; continue;
          }
        if (c < (1 << 7)) {
         T::push (c);
        } else if (c < (1 << 11)) {
         T::push ((c >> 6) | 0xc0);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 16)) {
         T::push ((c >> 12) | 0xe0);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 21)) {
         T::push ((c >> 18) | 0xe0);
         T::push (((c >> 12) & 0x3f) | 0x80);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        }
      }
      return *this;
    }
  };  
 // raw ASCII/UNICODE -> UTF8 converter 
  typedef ostream_t<aux::byte_buffer,false> ostream;
 // ASCII/UNICODE -> UTF8 converter with XML support
  typedef ostream_t<aux::byte_buffer,true> oxstream;
}

We are using this code in HTMEngine SDK for creating HTML dialogs and popup windows. Hope it might be used in other places where you need dynamic XML/HTML creation.