Click here to Skip to main content
Click here to Skip to main content

utf8::ostream

By , 27 Feb 2004
 

Introduction

Proposed template class is lightweight and fast utf8 generator - output stream. The main purpose of the stream is to provide an easy way to generate XML/HTML "on the fly".

Example:

html::viewer hv;
utf8::oxstream out; 
out << "<HTML>"
    << "<TITLE>" << L"Hello world!" << "</TITLE>"
    << "<BODY>" << L"Hello world!" << "</BODY>"
    << "<HTML>"; 
hv.show(out.data());

Implementation

Main class is a template declared as

template <class T, bool X> class ostream_t : public T {}

Where T is a class - implementation of output (write) buffer.

And X is a boolean parameter. If it is true then stream will do conversion of characters having special XML/HTML meaning, e.g. '<','>', etc.

Class T is an output buffer and shall provide implementation of two write methods.

  void push(unsigned char c);
  void push(const unsigned char *pc, size_t sz);

For details of implementation see source of byte_buffer class below.

Source Code

Source code is pretty compact and you are free to grab it from here and paste anywhere you want:

//
// This file is part of 
// Terra Informatica Lightweight Embeddable HTMEngine control SDK
// Created by Andrew Fedoniouk @ TerraInformatica.com
//
namespace aux 
{
  // byte_buffer class is an in-memory dynamic buffer implementation.
  class byte_buffer 
  {
    unsigned char*  _body;
    size_t          _allocated;
    size_t          _size;   
    unsigned char *reserve(size_t size)
    {
      size_t newsize = _size + size;
      if( newsize > _allocated ) 
      {
        _allocated *= 2;
        if(_allocated < newsize) 
           _allocated = newsize;
        unsigned char *newbody = new unsigned char[_allocated];
        memcpy(newbody,_body,_size);
        delete[] _body;
        _body = newbody;
      }
      return _body + _size;
    }  
   public:    
      byte_buffer():_size(0)      
        { _body = new unsigned char[_allocated = 256]; }
    ~byte_buffer()                { delete[] _body;  }    
    const unsigned char * data()  {  
             if(_size == _allocated) reserve(1); 
             _body[_size] = '\0'; return _body; 
    }    
    size_t length() const         { return _size; }    
    void push(unsigned char c)    { *reserve(1) = c; ++_size; }
    void push(const unsigned char *pc, size_t sz) 
        { memcpy(reserve(sz),pc,sz); _size += sz; }  
  };
}
namespace utf8
{
  // UTF8 stream  // class T must have two methods:
  //   void push(unsigned char c)
  //   void push(const unsigned char *pc, size_t sz)
  // bool X - true - XML markup character conversion 
  // (characters '<','>',etc).
  //          false - no conversion at all.   
template <class T, bool X = true>
  class ostream_t : public T
  {
  public:
    ostream_t()
    { 
      // utf8 byte order mark
      static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF };
      T::push(BOM, sizeof(BOM));
    }    
    // intended to handle only ascii-7 strings
    // use this for markup output 
    ostream_t& operator << (const char* str) 
    { 
      T::push((const unsigned char*)str,strlen(str)); return *this; 
    }    
    // use UNICODE chars for value output
    ostream_t& operator << (const wchar_t* wstr)
    {
      const wchar_t *pc = wstr;
      for(unsigned int c = *pc; c ; c = *(++pc)) 
      {
        if(X)
          switch(c) 
          {
              case '<': *this << "&lt;"; continue;
              case '>': *this << "&gt;"; continue;
              case '&': *this << "&amp;"; continue;
              case '"': *this << "&quot;"; continue;
              case '\'': *this << "&apos;"; continue;
          }
        if (c < (1 << 7)) {
         T::push (c);
        } else if (c < (1 << 11)) {
         T::push ((c >> 6) | 0xc0);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 16)) {
         T::push ((c >> 12) | 0xe0);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 21)) {
         T::push ((c >> 18) | 0xe0);
         T::push (((c >> 12) & 0x3f) | 0x80);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        }
      }
      return *this;
    }
  };  
 // raw ASCII/UNICODE -> UTF8 converter 
  typedef ostream_t<aux::byte_buffer,false> ostream;
 // ASCII/UNICODE -> UTF8 converter with XML support
  typedef ostream_t<aux::byte_buffer,true> oxstream;
}

We are using this code in HTMEngine SDK for creating HTML dialogs and popup windows. Hope it might be used in other places where you need dynamic XML/HTML creation.

License

This article, along with any associated source code and files, is licensed under The BSD License

About the Author

c-smile
Founder Terra Informatica Software
Canada Canada
Member
Andrew Fedoniouk.
 
MS in Physics and Applied Mathematics.
Designing software applications and systems since 1991.
 
W3C HTML5 Working Group, Invited Expert.
 
Terra Informatica Software, Inc.
http://terrainformatica.com

Sign Up to vote   Poor Excellent
Add a reason or comment to your vote: x
Votes of 3 or less require a comment

Comments and Discussions

 
Hint: For improved responsiveness ensure Javascript is enabled and choose 'Normal' from the Layout dropdown and hit 'Update'.
You must Sign In to use this message board.
Search this forum  
    Spacing  Noise  Layout  Per page   
QuestionVice Versa?memberAndrea Ricchetti5 May '09 - 4:24 
AnswerRe: Vice Versa?memberc-smile5 May '09 - 6:23 
GeneralRe: Vice Versa?memberAndrea Ricchetti7 May '09 - 21:56 
Questionbug?membercrashnation4 Dec '05 - 17:28 
AnswerRe: bug?memberc-smile4 Dec '05 - 18:14 
Yes, it is a bug,
 
It will arise though only if wchar_t is 4 bytes long (e.g. under GCC).
On Windows, where wchar_t is 2 bytes long (wide strings in Windows are UTF16 encoded) this branch will never be executed.
 
Thus the whole function shall look like this:
 
ostream_t& operator << (const wchar_t* wstr)
    {
      const wchar_t *pc = wstr;
      for(unsigned int c = *pc; c ; c = *(++pc)) 
      {
        if(X)
          switch(c) 
          {
              case '<': *this << "<"; continue;
              case '>': *this << ">"; continue;
              case '&': *this << "&"; continue;
              case '"': *this << """; continue;
              case '\'': *this << "&apos;"; continue;
          }
        if (c < (1 << 7)) {
         T::push (c);
        } else if (c < (1 << 11)) {
         T::push ((c >> 6) | 0xc0);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 16)) {
         T::push ((c >> 12) | 0xe0);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 21)) {
         T::push ((c >> 18) | 0xf0);
         T::push (((c >> 12) & 0x3f) | 0x80);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        }
      }
      return *this;
    }
 
Thanks for mentioning it.
GeneralHellomemberMahyar Es.9 Apr '05 - 7:36 
QuestionTyping error?memberAndreechtchev Eugeni18 Aug '04 - 13:20 
AnswerRe: Typing error?memberc-smile20 Aug '04 - 14:16 
GeneralUtf8 and XmlmemberSimon Steele16 Feb '04 - 11:29 
GeneralRe: Utf8 and Xmlmemberc-smile16 Feb '04 - 12:23 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Rant Rant    Admin Admin   

Permalink | Advertise | Privacy | Mobile
Web03 | 2.6.130516.1 | Last Updated 28 Feb 2004
Article Copyright 2004 by c-smile
Everything else Copyright © CodeProject, 1999-2013
Terms of Use
Layout: fixed | fluid