Click here to Skip to main content
15,885,771 members
Articles / Desktop Programming / MFC

A Java Language IDE

Rate me:
Please Sign up or sign in to vote.
4.33/5 (26 votes)
13 May 2004CPOL3 min read 80.4K   3.4K   41  
This is a partially implemented IDE for the Java platform.
///////////////////////////////////////////////////////////////////////////
//  File:    regexp.cpp
//  Version: 1.1.0.4
//  Updated: 19-Jul-1998
//
//  Copyright:  Marko Macek
//  E-mail:     Marko.Macek@gmx.net or mark@hermes.si
//
//  Some handy stuff to deal with regular expressions
//
//  You are free to use or modify this code to the following restrictions:
//  - Acknowledge me somewhere in your about box, simple "Parts of code by.."
//  will be enough. If you can't (or don't want to), contact me personally.
//  - LEAVE THIS HEADER INTACT
////////////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "regexp.h"

//#define DEBUG

static int RegCount = 0;

#ifdef DEBUG
static void RxDump(int N, RxNode *n);
#endif

static  RxNode *NewNode(int aWhat) {
    RxNode *N = (RxNode *) malloc(sizeof(RxNode));

    if (N) {
        memset(N, 0, sizeof(RxNode));
        N->fWhat = (short)aWhat;
    }
    return N;
}

static RxNode *NewChar(TCHAR Ch) {
    RxNode *A = NewNode(RE_CHAR);

    if (A) {
        A->fChar = (LPTSTR) malloc(1);
        A->fLen = 1;
        A->fChar[0] = Ch;
    }
    return A;
}

static RxNode *NewEscape(LPCTSTR *const Regexp) {
    TCHAR Ch = **Regexp;
    ++*Regexp;
    switch (Ch) {
      case 0: return 0;
      case _T('a'): Ch = _T('\a'); break;
      case _T('b'): Ch = _T('\b'); break;
      case _T('f'): Ch = _T('\f'); break;
      case _T('n'): Ch = _T('\n'); break;
      case _T('r'): Ch = _T('\r'); break;
      case _T('t'): Ch = _T('\t'); break;
      case _T('v'): Ch = _T('\v'); break;
      case _T('e'): Ch = 27; break;
      case _T('s'): return NewNode(RE_WSPACE);
      case _T('S'): return NewNode(RE_NWSPACE);
      case _T('U'): return NewNode(RE_UPPER);
      case _T('L'): return NewNode(RE_LOWER);
      case _T('w'): return NewNode(RE_WORD);
      case _T('W'): return NewNode(RE_NWORD);
      case _T('d'): return NewNode(RE_DIGIT);
      case _T('D'): return NewNode(RE_NDIGIT);
      case _T('C'): return NewNode(RE_CASE);
      case _T('c'): return NewNode(RE_NCASE);
      case _T('N'):
        {
            unsigned int N = 0;
            unsigned int A = 0;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) return 0;
            (*Regexp)++;
            A = N * 100;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) return 0;
            (*Regexp)++;
            A = A + N * 10;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) return 0;
            (*Regexp)++;
            A = A + N;
            Ch = (TCHAR) A;
        }
        break;
    case _T('o'):
        {
            unsigned int N = 0;
            unsigned int A = 0;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 7) return 0;
            (*Regexp)++;
            A = N * 64;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 7) return 0;
            (*Regexp)++;
            A = A + N * 8;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 7) return 0;
            (*Regexp)++;
            A = A + N;
            Ch = (TCHAR) A;
        }
        break;
    case _T('x'):
        {
            unsigned int N = 0;
            unsigned int A = 0;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
            (*Regexp)++;
            A = N << 4;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
            (*Regexp)++;
            A = A + N;
            Ch = (TCHAR) A;
        }
        break;
    }
    return NewChar(Ch);
}


#define NNN 32        // 8 * 32 = 256 (match set)

#ifdef _UNICODE
#define SETOP(set,n) \
    do { \
      set[(TCHAR)(n) >> 3] |= (TCHAR)(1 << ((TCHAR)(n) & 7)); \
    } while (0)
#else // _UNICODE
#define SETOP(set,n) \
    do { \
      set[(unsigned char)(n) >> 3] |= (unsigned char)(1 << ((unsigned char)(n) & 7)); \
    } while (0)
#endif // _UNICODE

static RxNode *NewSet(LPCTSTR * const Regexp) {
#ifdef _UNICODE
    TCHAR set[NNN];
#else // _UNICODE
    unsigned char set[NNN];
#endif // _UNICODE
    int s = 0;
    int c = 0;
    unsigned int i, xx;
#ifdef _UNICODE
    TCHAR Ch, C1 = 0, C2 = 0;
#else // _UNICODE
    unsigned char Ch, C1 = 0, C2 = 0;
#endif // _UNICODE
    int doset = 0;

    memset(set, 0, sizeof(set));
    s = 1;
    if (**Regexp == _T('^')) {
        s = 0;
        ++*Regexp;
    }
    c = 0;

    while (**Regexp) {
        switch (Ch = *((*Regexp)++)) {
          case _T(']'):
            if (doset == 1) return 0;
            {
                RxNode *N = NewNode(s?RE_INSET:RE_NOTINSET);
                N->fChar = (LPTSTR) malloc(sizeof(set));
                N->fLen = sizeof(set);
                if (N->fChar == 0) return 0;
                memcpy(N->fChar, (LPTSTR) set, sizeof(set));
                return N;
            }
          case _T('\\'):
            switch (Ch = *((*Regexp)++)) {
              case 0: return 0;
              case _T('a'): Ch = _T('\a'); break;
              case _T('b'): Ch = _T('\b'); break;
              case _T('f'): Ch = _T('\f'); break;
              case _T('n'): Ch = _T('\n'); break;
              case _T('r'): Ch = _T('\r'); break;
              case _T('t'): Ch = _T('\t'); break;
              case _T('v'): Ch = _T('\v'); break;
              case _T('e'): Ch = 27; break;
              case _T('N'):
                  {
                      unsigned int N = 0;
                      unsigned int A = 0;
                      if (**Regexp == 0) return 0;
                      N = _totupper(**Regexp) - 48; if (N > 9) return 0;
                      (*Regexp)++;
                      A = N * 100;
                      if (**Regexp == 0) return 0;
                      N = _totupper(**Regexp) - 48; if (N > 9) return 0;
                      (*Regexp)++;
                      A = A + N * 10;
                      if (**Regexp == 0) return 0;
                      N = _totupper(**Regexp) - 48; if (N > 9) return 0;
                      (*Regexp)++;
                      A = A + N;
#ifdef _UNICODE
                      Ch = (TCHAR)A;
#else // _UNICODE
                      Ch = (unsigned char)A;
#endif // _UNICODE
                  }
                  break;
            case _T('o'):
                {
                    unsigned int N = 0;
                    unsigned int A = 0;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 7) return 0;
                    (*Regexp)++;
                    A = N * 64;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 7) return 0;
                    (*Regexp)++;
                    A = A + N * 8;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 7) return 0;
                    (*Regexp)++;
                    A = A + N;
#ifdef _UNICODE
                    Ch = (TCHAR)A;
#else // _UNICODE
                    Ch = (unsigned char)A;
#endif // _UNICODE
                }
                break;
            case _T('x'):
                {
                    unsigned int N = 0;
                    unsigned int A = 0;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    (*Regexp)++;
                    A = N << 4;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    (*Regexp)++;
                    A = A + N;
#ifdef _UNICODE
                    Ch = (TCHAR)A;
#else // _UNICODE
                    Ch = (unsigned char)A;
#endif // _UNICODE
                }
                break;
            case _T('s'):
                c += 4;
                SETOP(set, _T('\n'));
                SETOP(set, _T('\t'));
                SETOP(set, _T(' '));
                SETOP(set, _T('\r'));
                continue;
            case _T('S'):
                for (xx = 0; xx <= 255; xx++) {
                    if (xx != _T(' ') && xx != _T('\t') && xx != _T('\n') && xx != _T('\r')) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                continue;
            case _T('w'):
                for (xx = 0; xx <= 255; xx++) {
                    if (_istalnum(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('W'):
                for (xx = 0; xx <= 255; xx++) {
                    if (!isalnum(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('d'):
                for (xx = 0; xx <= 255; xx++) {
                    if (_istdigit(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('D'):
                for (xx = 0; xx <= 255; xx++) {
                    if (!_istdigit(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('U'):
                for (xx = _T('A'); xx <= _T('Z'); xx++) {
                    c++;
                    SETOP(set, xx);
                }
                continue;
            case _T('L'):
                for (xx = _T('a'); xx <= _T('z'); xx++) {
                    c++;
                    SETOP(set, xx);
                }
                continue;
            }
            break;
        }
        if (doset == 0 && ((**Regexp) == _T('-'))) {
            doset = 1;
            C1 = Ch;
            ++*Regexp;
            continue;
        } else if (doset == 1) {
            C2 = Ch;
            if (C2 < C1) return 0;
            for(i = C1; i <= C2; i++) SETOP(set, i);
            doset = 0;
            continue;
        }
        c++;
        SETOP(set, Ch);
    }
    return 0;
}

static int AddNode(RxNode **F, RxNode **N, RxNode *A) {
    if (A) {
        if (*F) {
            (*N)->fNext = A;
            A->fPrev = (*N);
            *N = A;
        } else {
            (*N) = (*F) = A;
            A->fPrev = A->fNext = 0;
        }
        return 1;
    }
    return 0;
}

static int CountWidth(RxNode *N) {
    int w = 0;

    while (N) {
        if (N->fWhat < 32) w += 0;
        else if (N->fWhat >= 32 && N->fWhat < 64)
            w += 1;
        N = N->fNext;
    }
    return w;
}

static int MakeSub(RxNode **F, RxNode **N, TCHAR What) {
    //_tprintf(_T("MakeSub: %c\n", What));
    if (*N) {
        RxNode *No;
        RxNode *New;
        RxNode *Jump, *Skip;
        RxNode *Last = (*N);

        if (Last->fWhat & RE_GROUP) {
            RxNode *P = Last->fPrev;
            int C = 1;

            while ((C > 0) && P) {
                //puts(_T("backtracking...-----"));
                //RxDump(0, P);
                if (P->fWhat & RE_GROUP) {
                    if (P->fWhat & RE_CLOSE) C++;
                    else C--;
                }
                Last = P;
                if (C == 0) break;
                P = P->fPrev;
            }
            //_tprintf(_T("P = %s, c = %d", P ? "ok":"null", C));
            if (C != 0) return 0;
        }
        assert(Last);
        if (What != _T('?') && What != _T('|'))
            if (CountWidth(Last) == 0) {
                //                puts(_T("FAILED count"));
                return 0;
            }
        switch (What) {
        case _T('?'):    /* BRANCH x NOTHING */
            New = NewNode(RE_BRANCH | RE_GREEDY | What);
            No = NewNode(RE_NOTHING);
            if (!New || !No) return 0;
            No->fPrev = *N;
            if (*N)
                (*N)->fNext = No;
            New->fNext = Last;
            New->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (New->fPrev) {
                New->fPrev->fNext = New;
            } else {
                *F = New;
            }
            New->fPtr = No;
            No->fPtr = New;
            *N = No;
            //puts(_T("BRANCH ?"));
            break;

        case _T('*'):
        case _T('@'):
            New = NewNode(RE_BRANCH | What | ((What == _T('*')) ? RE_GREEDY : 0));
            Jump = NewNode(RE_JUMP);
            No = NewNode(RE_NOTHING);

            if (!New || !No || !Jump) return 0;
            No->fPrev = Jump;
            Jump->fNext = No;
            Jump->fPrev = *N;
            if (*N)
                (*N)->fNext = Jump;
            New->fNext = Last;
            New->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (New->fPrev) {
                New->fPrev->fNext = New;
            } else {
                *F = New;
            }
            New->fPtr = No;
            No->fPtr = New;
            Jump->fPtr = New;
            *N = No;
            //puts(_T("BRANCH *"));
            break;

        case _T('#'):
        case _T('+'):
            New = NewNode(RE_BRANCH | What | ((What == _T('+')) ? RE_GREEDY : 0));
            Skip = NewNode(RE_JUMP);
            Jump = NewNode(RE_JUMP);
            No = NewNode(RE_NOTHING);

            if (!New || !No || !Jump) return 0;
            No->fPrev = Jump;
            Jump->fPrev = *N;
            Jump->fNext = No;

            Skip->fNext = New;
            New->fPrev = Skip;
            if (*N)
                (*N)->fNext = Jump;
            New->fNext = Last;
            Skip->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (Skip->fPrev) {
                Skip->fPrev->fNext = Skip;
            } else {
                *F = Skip;
            }
            New->fPtr = No;
            No->fPtr = New;
            Jump->fPtr = New;
            Skip->fPtr = Last;
            *N = No;
            //puts(_T("BRANCH +"));
            break;
        case _T('|'):
            New = NewNode(RE_BRANCH | RE_GREEDY | What);
            Jump = NewNode(RE_BREAK);
            No = NewNode(RE_NOTHING);

            if (!New || !No || !Jump) return 0;
            No->fPrev = Jump;
            Jump->fNext = No;
            Jump->fPrev = *N;
            if (*N)
                (*N)->fNext = Jump;
            New->fNext = Last;
            New->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (New->fPrev) {
                New->fPrev->fNext = New;
            } else {
                *F = New;
            }
            New->fPtr = No;
            No->fPtr = New;
            Jump->fPtr = New;
            *N = No;
            //puts(_T("BRANCH |"));
            break;
        }
        return 1;
    }
    return 0;
}

#define CHECK(n) do { if ((n) == 0) { return 0;} } while (0)

static RxNode *RxComp(LPCTSTR *Regexp) {
    RxNode *F = 0;
    RxNode *N = 0;
    int C;
    TCHAR Ch;

    while (**Regexp) {
        //        puts(*Regexp);
        switch (Ch = (*(*Regexp)++)) {
        case _T('?'):
        case _T('*'):
        case _T('+'):
        case _T('@'):
        case _T('#'):
        case _T('|'):
            CHECK(MakeSub(&F, &N, Ch));
            break;
        case _T('}'):
        case _T(')'):
            return F;
        case _T('{'):
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN)));
            CHECK(AddNode(&F, &N, RxComp(Regexp)));
            while (N->fNext) N = N->fNext;
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE)));
            break;
        case _T('('):
            C = ++RegCount;
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN | RE_MEM | C)));
            CHECK(AddNode(&F, &N, RxComp(Regexp)));
            while (N->fNext) N = N->fNext;
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE | RE_MEM | C)));
            break;
        case _T('\\'):CHECK(AddNode(&F, &N, NewEscape(Regexp)));     break;
        case _T('['): CHECK(AddNode(&F, &N, NewSet(Regexp)));        break;
        case _T('^'): CHECK(AddNode(&F, &N, NewNode(RE_ATBOL)));     break;
        case _T('$'): CHECK(AddNode(&F, &N, NewNode(RE_ATEOL)));     break;
        case _T('.'): CHECK(AddNode(&F, &N, NewNode(RE_ANY)));       break;
        case _T('<'): CHECK(AddNode(&F, &N, NewNode(RE_ATBOW)));     break;
        case _T('>'): CHECK(AddNode(&F, &N, NewNode(RE_ATEOW)));     break;
        default:
            --*Regexp;
            CHECK(AddNode(&F, &N, NewChar(**Regexp)));
            ++*Regexp;
            break;
        }
    }
    return F;
}

RxNode *RxOptimize(RxNode *rx) {
    return rx;
}

RxNode *RxCompile(LPCTSTR Regexp) {
    RxNode *n = 0, *x;
    if (Regexp == 0) return 0;
    RegCount = 0;
    n = RxComp(&Regexp);
    if (n == 0) return 0;
    n = RxOptimize(n);
    x = n;
    while (x->fNext) x = x->fNext;
    x->fNext = NewNode(RE_END);
    return n;
}

void RxFree(RxNode *n) {
    RxNode *p;

    while (n) {
        p = n;
        n = n->fNext;
        switch (p->fWhat) {
        case RE_INSET:
        case RE_NOTINSET:
        case RE_CHAR:
            free(p->fChar);
            break;
        default:
            break;
        }
        free(p);
    }
}

#define ChClass(x) (((((x) >= _T('A')) && ((x) <= _T('Z'))) || (((x) >= _T('a')) && ((x) <= _T('z'))) || (((x) >= _T('0')) && ((x) <= _T('9'))))?1:0)

static RxMatchRes *match;
static LPCTSTR bop;
static LPCTSTR eop;
static int flags = RX_CASE;
static LPCTSTR rex;

int RxMatch(RxNode *rx) {
    RxNode *n = rx;

    //_tprintf(_T(">>"));
    while (n) {
        //_tprintf(_T("%-50.50s\n", rex));
        //RxDump(1, n);
        switch (n->fWhat) {
        case RE_NOTHING:
            break;
        case RE_CASE:
            flags |= RX_CASE;
            break;
        case RE_NCASE:
            flags &= ~RX_CASE;
            break;
        case RE_ATBOL:
            if (rex != bop) return 0;
            break;
        case RE_ATEOL:
            if (rex != eop) return 0;
            break;
        case RE_ANY:
            if (rex == eop) return 0;
            rex++;
            break;
        case RE_WSPACE:
            if (rex == eop) return 0;
            if (*rex != _T(' ') && *rex != _T('\n') && *rex != _T('\r') && *rex != _T('\t')) return 0;
            rex++;
            break;
        case RE_NWSPACE:
            if (rex == eop) return 0;
            if (*rex == _T(' ') || *rex == _T('\n') || *rex == _T('\r') || *rex == _T('\t')) return 0;
            rex++;
            break;
        case RE_WORD:
            if (rex == eop) return 0;
            if (!_istalnum(*rex)) return 0;
            rex++;
            break;
        case RE_NWORD:
            if (rex == eop) return 0;
            if (_istalnum(*rex)) return 0;
            rex++;
            break;
        case RE_DIGIT:
            if (rex == eop) return 0;
            if (!_istdigit(*rex)) return 0;
            rex++;
            break;
        case RE_NDIGIT:
            if (rex == eop) return 0;
            if (_istdigit(*rex)) return 0;
            rex++;
            break;
        case RE_UPPER:
            if (rex == eop) return 0;
            if (!_istupper(*rex)) return 0;
            rex++;
            break;
        case RE_LOWER:
            if (rex == eop) return 0;
            if (!_istlower(*rex)) return 0;
            rex++;
            break;
        case RE_ATBOW:
            if (rex >= eop) return 0;
            if (rex > bop) {
                if ((ChClass(*rex) != 1) || (ChClass(*(rex-1)) != 0)) return 0;
            }
            break;
        case RE_ATEOW:
            if (rex <= bop) return 0;
            if (rex < eop) {
                if ((ChClass(*rex) != 0) || (ChClass(*(rex-1)) != 1)) return 0;
            }
            break;
        case RE_CHAR:
            if (rex == eop) return 0;
            if (flags & RX_CASE) {
                if (*n->fChar != *rex) return 0;
                if (memcmp(rex, n->fChar, n->fLen) != 0) return 0;
            } else {
                for (int i = 0; i < n->fLen; i++)
                    if (_totupper(rex[i]) != _totupper(n->fChar[i]))
                        return 0;
            }
            rex += n->fLen;
            break;
        case RE_INSET:
            if (rex == eop) return 0;
#ifdef _UNICODE
            if ((n->fChar[(TCHAR)(*rex) >> 3] & (1 << ((TCHAR)(*rex) & 7))) == 0) return 0;
#else // _UNICODE
            if ((n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) == 0) return 0;
#endif // _UNICODE
            rex++;
            break;
        case RE_NOTINSET:
            if (rex == eop) return 0;
#ifdef _UNICODE
            if (n->fChar[(TCHAR)(*rex) >> 3] & (1 << ((TCHAR)(*rex) & 7))) return 0;
#else // _UNICODE
            if (n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) return 0;
#endif // _UNICODE
            rex++;
            break;
        case RE_JUMP:
            n = n->fPtr;
            continue;
        case RE_END:
            return 1;
        case RE_BREAK:
            n = n->fNext;
            if (n->fNext == 0) break;
            n = n->fNext;
            if (n->fWhat & RE_BRANCH) {
                while ((n->fWhat & RE_BRANCH) && n->fPtr && ((n->fWhat & 0xFF) == _T('|')))
                    n = n->fPtr->fNext;
            }
            if (n->fWhat & RE_GROUP) {
                int C = 1;
                n = n->fNext;
                while ((C > 0) && n) {
                    if (n->fWhat & RE_GROUP) {
                        if (n->fWhat & RE_OPEN) C++;
                        else C--;
                    }
                    if (C == 0) break;
                    n = n->fNext;
                }
            }
            break;
        default:
            if (n->fWhat & RE_GROUP) {
                if (n->fWhat & RE_MEM) {
                    LPCTSTR save = rex;
                    int b = n->fWhat & 0xFF;
                    int fl = flags;

                    if (RxMatch(n->fNext) == 0) {
                        flags = fl;
                        if (n->fWhat & RE_OPEN)
                            match->Open[b] = -1;
                        else
                            match->Close[b] = -1;
                        return 0;
                    }

                    if (n->fWhat & RE_OPEN) {
                        //                        if (match->Open[b] == -1)
                        match->Open[b] = (int) (save - bop);
                    } else {
                        //                        if (match->Close[b] == -1)
                        match->Close[b] = (int) (save - bop);
                    }
                    return 1;
                }
            } else if (n->fWhat & RE_BRANCH) {
                LPCTSTR save = rex;
                int fl = flags;

                if ((n->fWhat & RE_GREEDY) == 0) {
                    if (RxMatch(n->fPtr) == 1) return 1;
                    flags = fl;
                    rex = save;
                } else {
                    if (RxMatch(n->fNext) == 1) return 1;
                    flags = fl;
                    rex = save;
                    n = n->fPtr;
                    continue;
                }
            }
            break;
        }
        n = n->fNext;
    }
    /* NOTREACHED */
    assert(1 == 0 /* internal regexp error */);
    return 0;
}

int RxTry(RxNode *rx, LPCTSTR s) {
    int fl = flags;
    rex = s;
    for (int i = 0; i < NSEXPS; i++)
        match->Open[i] = match->Close[i] = -1;
    if (RxMatch(rx)) {
        match->Open[0] = (int) (s - bop);
        match->Close[0] = (int) (rex - bop);
        return 1;
    }
    flags = fl;
    return 0;
}

int RxExec(RxNode *Regexp, LPCTSTR Data, int Len, LPCTSTR Start, RxMatchRes *Match, unsigned int RxOpt) {
    TCHAR Ch;
    if (Regexp == 0) return 0;

    match = Match;
    bop = Data;
    eop = Data + Len;

    flags = RxOpt;

    for (int i = 0; i < NSEXPS; i++) Match->Open[i] = Match->Close[i] = -1;

    switch (Regexp->fWhat) { // this should be more clever
    case RE_ATBOL:     // match is anchored
        return RxTry(Regexp, Start);
    case RE_CHAR:    // search for a character to match
        Ch = Regexp->fChar[0];
        if (Start == eop)
            break;
        if (flags & RX_CASE) {
            while (1) {
                while (Start < eop && *Start != Ch)
                    Start++;
                if (Start == eop)
                    break;
                if (RxTry(Regexp, Start))
                    return 1;
                if (++Start == eop)
                    break;
            }
        } else {
            Ch = _totupper(Ch);
            while (1) {
                while (Start < eop && (TCHAR) _totupper(*Start) != Ch)
                    Start++;
                if (Start == eop)
                    break;
                if (RxTry(Regexp, Start))
                    return 1;
                if (++Start == eop)
                    break;
            }
        }
        break;
    default:         // (slow)
        do {
            if (RxTry(Regexp, Start)) return 1;
        } while (Start++ < eop);
        break;
    }
    return 0;
}

#define FLAG_UP_CASE     1
#define FLAG_DOWN_CASE   2
#define FLAG_UP_NEXT     4
#define FLAG_DOWN_NEXT   8

static int add(int *len, LPTSTR *s, LPCTSTR a, int alen, int &flag) {
    int NewLen = *len + alen;
    int i;

    NewLen = NewLen * 2;

    if (alen == 0)
        return 0;

    if (*s) {
        *s = (LPTSTR) realloc(*s, NewLen);
        assert(*s);
        memcpy(*s + *len, a, alen);
    } else {
        *s = (LPTSTR) malloc(NewLen);
        assert(*s);
        memcpy(*s, a, alen);
        *len = 0;
    }
    if (flag & FLAG_UP_CASE) {
        LPTSTR p = *s + *len;

        for (i = 0; i < alen; i++) {
            *p = (TCHAR)_totupper(*p);
            p++;
        }
    } else if (flag & FLAG_DOWN_CASE) {
        LPTSTR p = *s + *len;

        for (i = 0; i < alen; i++) {
            *p = (TCHAR)_totlower(*p);
            p++;
        }
    }
    if (flag & FLAG_UP_NEXT) {
        LPTSTR p = *s + *len;

        *p = (TCHAR)_totupper(*p);
        flag &= ~FLAG_UP_NEXT;
    } else if (flag & FLAG_DOWN_NEXT) {
        LPTSTR p = *s + *len;

        *p = (TCHAR)_totlower(*p);
        flag &= ~FLAG_DOWN_NEXT;
    }
    *len += alen;
    return 0;
}

int RxReplace(LPCTSTR rep, LPCTSTR Src, int /*len*/, RxMatchRes match, LPTSTR *Dest, int *Dlen) {
    int dlen = 0;
    LPTSTR dest = 0;
    TCHAR Ch;
    int n;
    int flag = 0;

    *Dest = 0;
    *Dlen = 0;
    //    add(&dlen, &dest, Src, match.Open[0]);
    while (*rep) {
        switch (Ch = *rep++) {
            //        case _T('&'):
            //            add(&dlen, &dest, Src + match.Open[0], match.Close[0] - match.Open[0], flag);
            //            break;
        case _T('\\'):
            switch (Ch = *rep++) {
            case _T('0'):
            case _T('1'): case _T('2'): case _T('3'):
            case _T('4'): case _T('5'): case _T('6'):
            case _T('7'): case _T('8'): case _T('9'):
                n = Ch - 48;

                if (match.Open[n] != -1 && match.Close[n] != -1) {
                    add(&dlen, &dest, Src + match.Open[n], match.Close[n] - match.Open[n], flag);
                } else return -1;
                break;
            case 0:
                if (dest) free(dest);
                return -1; // error
            case _T('r'): Ch = _T('\r'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('n'): Ch = _T('\n'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('b'): Ch = _T('\b'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('a'): Ch = _T('\a'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('t'): Ch = _T('\t'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('U'): flag |= FLAG_UP_CASE; break;
            case _T('u'): flag |= FLAG_UP_NEXT; break;
            case _T('L'): flag |= FLAG_DOWN_CASE; break;
            case _T('l'): flag |= FLAG_DOWN_NEXT; break;
            case _T('E'):
            case _T('e'): flag &= ~(FLAG_UP_CASE | FLAG_DOWN_CASE); break;
            case _T('x'):
                {
                    int N = 0;
                    int A = 0;

                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    rep++;
                    A = N << 4;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    rep++;
                    A = A + N;
                    Ch = (TCHAR)A;
                }
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            case _T('d'):
                {
                    int N = 0;
                    int A = 0;

                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) return 0;
                    rep++;
                    A = N * 100;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) return 0;
                    rep++;
                    A = N * 10;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) return 0;
                    rep++;
                    A = A + N;
                    Ch = (TCHAR)A;
                }
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            case _T('o'):
                {
                    int N = 0;
                    int A = 0;

                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 7) return 0;
                    rep++;
                    A = N * 64;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 7) return 0;
                    rep++;
                    A = N * 8;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 7) return 0;
                    rep++;
                    A = A + N;
                    Ch = (TCHAR)A;
                }
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            default:
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            }
            break;
        default:
            add(&dlen, &dest, &Ch, 1, flag);
            break;
        }
    }
    //    add(&dlen, &dest, Src + match.Close[0], len - match.Close[0]);
    *Dlen = dlen;
    *Dest = dest;
    return 0;
}

#if 0

static void RxDump(int N, RxNode *n) {
    while (n) {
        for (int i = 0; i < N; i++) _tprintf(_T("    "));
        switch (n->fWhat) {
        case RE_NOTHING:   _tprintf(_T("NOTHING\n")); break;
        case RE_CHAR:      _tprintf(_T("CHAR '%.1s'\n"), n->fChar); break;
        case RE_ATBOL:     _tprintf(_T("^\n")); break;
        case RE_ATEOL:     _tprintf(_T("$\n")); break;
        case RE_ANY:       _tprintf(_T(".\n")); break;
        case RE_INSET:     _tprintf(_T("[\n")/*, n->fChar*/); break;
        case RE_NOTINSET:  _tprintf(_T("[^\n")/*, n->fChar*/); break;
        case RE_ATBOW:     _tprintf(_T("<\n")); break;
        case RE_ATEOW:     _tprintf(_T(">\n")); break;
        case RE_WSPACE:    _tprintf(_T("WSPACE\n")); break;
        case RE_NWSPACE:   _tprintf(_T("NWSPACE\n")); break;
        case RE_UPPER:     _tprintf(_T("UPPER\n")); break;
        case RE_LOWER:     _tprintf(_T("LOWER\n")); break;
        case RE_JUMP:      _tprintf(_T("JUMP\n")); break;
        case RE_BREAK:     _tprintf(_T("BREAK\n")); break;
        case RE_END:       _tprintf(_T("END\n")); break;
        default:
            if (n->fWhat & RE_GROUP) {
                if (n->fWhat & RE_MEM) {
                    if (n->fWhat & RE_OPEN)  _tprintf(_T("(  %d\n"), n->fWhat & 0xFF);
                    if (n->fWhat & RE_CLOSE) _tprintf(_T(")  %d\n"), n->fWhat & 0xFF);
                } else {
                    if (n->fWhat & RE_OPEN)  _tprintf(_T("{\n"));
                    if (n->fWhat & RE_CLOSE) _tprintf(_T("}\n"));
                }
            } else if (n->fWhat & RE_BRANCH) {
                if (n->fWhat & RE_GREEDY) {
                    _tprintf(_T("%c\n"), n->fWhat & 0xFF);
                } else {
                    _tprintf(_T("%c\n"), n->fWhat & 0xFF);
                }
            } else {
                _tprintf(_T("???????????????\n"));
            }
            break;
        }
        n = n->fNext;
    }
}

#define TEST(rc,rx,st) \
    _tcscpy(line,st); \
    assert((a = RxCompile(rx)) != 0); \
    puts(_T("\n--- " rx " -- " st " -- ")); \
    RxDump(0,a);\
    assert(rc == RxExec(a, line, _tcslen(line), line, &b)); \
    RxFree(a);

int main() {
    RxNode *a;
    RxMatchRes b;
    TCHAR line[1024];

    TEST(1, _T("a"), _T("a"));
    TEST(0, _T("b"), _T("a"));
    TEST(1, _T("aaaa"), _T("aaaa"));
    TEST(0, _T("bbbb"), _T("aaaa"));
    TEST(1, _T("."), _T("a"));
    TEST(0, _T("."), _T(""));
    TEST(1, _T("a.."), _T("axx"));
    TEST(0, _T("a.."), _T("b.."));
    TEST(1, _T("a?b"), _T("ab"));
    TEST(1, _T("a?b"), _T("xb"));
    TEST(0, _T("a?C"), _T("xb"));
    TEST(1, _T("{aa}?b"), _T("aab"));
    TEST(1, _T("{aa}?b"), _T("xab"));
    TEST(0, _T("{aa}?C"), _T("xxb"));
    TEST(1, _T("^aa"), _T("aa"));
    TEST(0, _T("^aa"), _T("baa"));
    TEST(1, _T("^aa$"),_T("aa"));
    TEST(0, _T("^aa$"), _T("baab"));
    TEST(1, _T("a*b"), _T("aaab"));
    TEST(0, _T("a*b"), _T("aaaa"));
    TEST(1, _T("{aa}*b"), _T("aaab"));
    TEST(0, _T("{aa}*b"), _T("aaaa"));
    TEST(1, _T("b+"), _T("bb"));
    TEST(1, _T("b+"), _T("b"));
    TEST(0, _T("b+"), _T("a"));
    TEST(1, _T("^b+$"), _T("b"));
    TEST(0, _T("^b+$"), _T("aba"));
    TEST(1, _T("a|b"), _T(" a"));
    TEST(1, _T("a|b"), _T(" b"));
    TEST(0, _T("a|b"), _T(" c"));
    TEST(1, _T("a|b|c|d|e"), _T(" a "));
    TEST(1, _T("a|b|c|d|e"), _T(" c "));
    TEST(1, _T("a|b|c|d|e"), _T(" e "));
    TEST(0, _T("a|b|c|d|e"), _T(" x "));
    TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" a "));
    TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" c "));
    TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" e "));
    TEST(0, _T("{a}|{b}|{c}|{d}|{e}"), _T(" x "));
    TEST(1, _T("^xx{alpha}|{beta}xx$"), _T("xxalphaxx"));
    TEST(1, _T("^xx{alpha}|{beta}xx$"), _T("xxbetaxx"));
    TEST(1, _T("[a-z]"), _T("aaa"));
    TEST(1, _T("^{Error}|{Warning}"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} (.+)"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} ([a-z.]#) ([0-9]#)"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} (.+) ([0-9]+): (.*)$"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} (.+) ([0-9]+): (.*)$"), _T("Error search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^([a-z]+ +)*\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^([a-z]+\\s+)+\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^([a-z]+\\s*)+\\("), _T("blabla bla bla bla("));
    TEST(1, _T("^([a-z]+\\s+)+\\("), _T("blabla bla   bla bla ("));
    TEST(1, _T("^([a-z]+\\s*)+\\("), _T("blabla   bla bla bla("));
    TEST(1, _T("^([a-z]# #)*\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^([a-z]+ @)@\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^[\\x20-\\xFF]+$"), _T("blabla"));
    TEST(1, _T("{a{a{a{a|a}|{a|a}a}a}a|a}"), _T("aaaaaaaaaaaaaaaaa"));

    while (1) {
        _tprintf(_T ("Regexp: ")); fflush(stdout); gets(line);
        if (!*line) break;
        a = RxCompile(line); RxDump(0, a);
        _tprintf(_T ("String: ")); fflush(stdout); gets(line);
        _tprintf(_T ("rc = %d\n"), RxExec(a, line, _tcslen(line), line, &b));
        for (int i = 0; i < NSEXPS; i++) {
            if (b.Open[i] != -1) {
                _tprintf(_T ("%d: %d %d\n"), i, b.Open[i], b.Close[i]);
            }
        }
        RxFree(a);
    }
    return 0;
}

#endif

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
biography? I am not that old yet.

Comments and Discussions