Click here to Skip to main content
15,881,803 members
Articles / Programming Languages / SQL

Generating Keys for a Custom Sort Order

Rate me:
Please Sign up or sign in to vote.
4.90/5 (14 votes)
7 Sep 2012CPOL10 min read 34.9K   154   15  
ORDER BY what-the-user-said
#include "StdAfx.h"
#include "SortOrder.h"
#include <math.h>


PHLib::CSortOrder::CSortOrder(char minChar, char maxChar) : m_minChar(minChar), m_maxChar(maxChar)
{
   _ASSERTE(maxChar > minChar);
   _ASSERTE(maxChar - minChar > 2);
}

// ----- IsValid ----------
/// checks if \c value is a valid Sort Order Key
bool PHLib::CSortOrder::IsValid( CStringA const & value ) const
{
   // must not be empty
   if (!value.GetLength())    // must not be empty
      return false;

   // characters must be within range
   for(int i=0; i<value.GetLength(); ++i)
      if (value[i] < m_minChar || value[i] > m_maxChar)
         return false;

   // must not end with MinChar (because there is no value between "x" and "xa")
   if (value[value.GetLength()-1] == m_minChar)
      return false;
   return true;
}

// ----- Before ----------
/// returns a sort order key that compares less than \c value
CStringA PHLib::CSortOrder::Before( CStringA const & value ) const
{
   // Find a value that is smaller than 'value':

   _ASSERTE(IsValid(value));
   CStringA result = value;

   unsigned at = value.GetLength()-1;
   char ch = value[at];

   // decrement last char, but don't end with MinChar
   // e.g. "abc" to "abb"
   if (ch > m_minChar+1)
   {
      --ch;
      result.SetAt(at, ch);
      return result;
   }

   // subtract with carry
   // e.g. "xbab" to "xaaz"
   while (at > 0)
   {
      result.SetAt(at, m_maxChar);
      --at;
      ch = value[at];
      if (ch > m_minChar)
      {
         --ch;
         result.SetAt(at, ch);
         return result;
      }
   }


   // if this would underflow, we expand the string instead, 
   // e.g. "aab" to "aaazz".
   // 
   // adding one z would be enough, but the string length would grow excessively
   // if we insert sequentially at the beginning. 
   // N inserts at the beginning would grow string length by ca. 
   // N / (MaxChar-MinChar). 
   // ExpandBy constant controls how many get appended making the string length grow by 
   // ExpandBy * N / (MaxChar-MinChar) ** ExpandBy
   // e.g. for MaxChar-MinChar == 25 ('a'..'z'), and N = 10000, this would be:
   // ExpandBy = 1: Grow By 400
   // ExpandBy = 2: Grow By 32
   // ExpandBy = 3: Grow By 3
   result = CStringA(m_minChar, value.GetLength());
   return Expand(result, m_maxChar);
}

// ----- After ----------
/// returns a sort order key that compares greater than \c value
CStringA PHLib::CSortOrder::After( CStringA const & value ) const
{
   _ASSERTE(IsValid(value));
   CStringA result = value;

   int at = value.GetLength()-1;
   char minCharToUse = m_minChar+1; // last char must be 'b', others can be 'a'

   while (at >= 0)
   {
      char ch = value.GetAt(at);
      if (ch < m_maxChar)
      {
         result.SetAt(at, ch+1);
         return result;
      }

      result.SetAt(at, minCharToUse);
      minCharToUse = 'a'; // next time..
      --at;
   }

   // we started with a string like "zzz", 
   // we need to transform this t "zzzab".
   // Why append "ab"? We can't append "aa" because we can't end in 'a'.
   // Why append 'a' twice? See respective comment in Before()
   result = CStringA(m_maxChar, value.GetLength());
   return Expand(result, m_minChar);
}

// ----- Between ----------
/// returns a sort order key that compares less than \c a and greater than \c b
CStringA PHLib::CSortOrder::Between( CStringA const & a, CStringA const & b ) const
{
   if (a.GetLength() == 0)
   {
      _ASSERTE(IsValid(b));
      return Before(b);
   }

   if (b.GetLength() == 0)
   {
      _ASSERTE(IsValid(a));
      return After(a);
   }

   _ASSERTE(IsValid(a) && IsValid(b));
   _ASSERTE(a < b);

   int minLen = min(a.GetLength(), b.GetLength());

   int at = 0;
   while (at<minLen && a[at] == b[at])
      ++at;

   CStringA result;
   if (at < minLen && (int)b[at] - a[at] >= 2)
   {
      result = a;
      char avg = ((int)a[at]+b[at]+1) / 2;
      result.SetAt(at, avg);
      result.Truncate(at+1);
      return result;
   }

   if (a.GetLength() > b.GetLength())
   {
      result = After(a);
      if (result < b)
         return result;
   }

   if (b.GetLength() > a.GetLength())
   {
      result = Before(b);
      if (result > a)
         return result;
   }

   result = a;
   while (result.GetLength() < b.GetLength())
   {
      char ch = b.GetAt(result.GetLength());
      if (ch > m_minChar+1)   // never taken?!
      {
         result += (char)(((int) m_minChar + ch + 1) / 2);
         return result;
      }

      result += m_minChar;
   }

   while (result.GetLength() < a.GetLength())
   {
      char ch = a.GetAt(result.GetLength());
      if (ch < m_maxChar-1) // never taken?!
      {
         result += (char)(((int) m_maxChar + ch + 1) / 2);
         return result;
      }
      result += m_maxChar;
   }

   char AvgChar = ((int) m_minChar + m_maxChar + 1) / 2;
   return Expand(result, AvgChar);
   return result;
}


CStringA PHLib::CSortOrder::Expand(CStringA const & value, char ch) const
{
   const int ExpandBy = 3;

   CStringA result = value;
   for(int i=0; i<ExpandBy-1; ++i)
      result += ch;

   // Make sure we don't end in MinChar:
   if (ch == m_minChar)
      ++ch;
   result += ch; 

   return result;
}

// ----- Initial ----------
/** returns the first sort key for a sequence of \c count items
    Consecutive sort keys can be generated by calling \ref After.
    \c count only needs to be an estimate
*/
CStringA PHLib::CSortOrder::Initial(int count) const
{
   if (count < 2)
      return CStringA((char)(m_minChar+1), 1);

   int len = (int)(log((double)count+1) / log((double)m_maxChar - m_minChar + 1)); // rounding down
   return CStringA(m_minChar, len) + m_minChar;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Klippel
Germany Germany
Peter is tired of being called "Mr. Chen", even so certain individuals insist on it. No, he's not chinese.

Peter has seen lots of boxes you youngsters wouldn't even accept as calculators. He is proud of having visited the insides of a 16 Bit Machine.

In his spare time he ponders new ways of turning groceries into biohazards, or tries to coax South American officials to add some stamps to his passport.

Beyond these trivialities Peter works for Klippel[^], a small german company that wants to make mankind happier by selling them novel loudspeaker measurement equipment.


Where are you from?[^]



Please, if you are using one of my articles for anything, just leave me a comment. Seeing that this stuff is actually useful to someone is what keeps me posting and updating them.
Should you happen to not like it, tell me, too

Comments and Discussions