# region Heading
/**************************************************************************************************************/
/* */
/* LibRpn.cs */
/* */
/* Implements an infix-to-Reverse Polish Notation transformer. */
/* */
/* This project was prompted by my need for a simple method of transforming an infix expression to RPN. */
/* That's all it was supposed to do; it wasn't supposed to validate or evaluate, but as you'll see, it */
/* does indeed do some simple validation. (But no evaluation, that would be handled by the next step.) */
/* */
/* I did not follow any well-accepted parsing method because of the validation, which can be turned on and */
/* off by the user. So I wound up with a simple, fairly brute-force, method which I feel is still rather */
/* elegant and object-oriented. */
/* */
/* I _did_ consider using a finite state machine, but it's been many years since I did such things in */
/* college and it seemed that because of the configurable nature of the validation, that it would require */
/* one state table for each of the possible configurations -- three booleans would be eight tables. */
/* */
/* It supports only the basic mathematical operations. By default parentheses don't need to match. */
/* Any values in scientific notation, e.g. 1.234E+001, contained in the provided string need to be within */
/* verbatim text fields (see below). */
/* */
/* The elements in the resultant string will be separated by (at least) SPACES to aid tokenizing by the */
/* next step. */
/* */
/* This is free code, use it as you require. It was a good learning exercise for me and I hope it will be */
/* for you too. If you modify it please use your own namespace. */
/* */
/* If you like it or have suggestions for improvements please let me know at: PIEBALDconsult@aol.com */
/* */
/* Modification history: */
/* 2006-01-12 Sir John E. Boucher Created */
/* */
/**************************************************************************************************************/
# endregion
namespace PIEBALD.Lib
{
/**
<summary>
Implements an infix-to-Reverse Polish Notation transformer.
Written in C#.
</summary>
<remarks>
Supports some minimal validation through options.
</remarks>
*/
public static partial class LibRpn
{
# region Floor
/**
<summary>
Static reference to attempt to ensure thread-safety for the class.
I don't know if this is needed or effective, but I figured, "why not?"
</summary>
*/
private static readonly object floor ;
/**
<summary>
Static constructor in an effort to reduce the "laziness" of the class.
</summary>
*/
static LibRpn()
{
floor = new object() ;
}
# endregion
# region InfixToRpnFormatException class
/**
<summary>
Implements a specialized FormatException for the InfixToRpn method.
</summary>
<remarks>
Tracks the position of the character that caused the exception.
</remarks>
*/
public class InfixToRpnFormatException : System.FormatException
{
/**
<summary>
Holds the position of the character that caused the exception.
Readonly.
</summary>
*/
public readonly int Position ;
/**
<summary>
Constructor for InfixToRpnFormatException class.
</summary>
<param name="Message">
What the problem was.
</param>
<param name="Position">
Position of the offending character.
</param>
*/
internal InfixToRpnFormatException
(
string Message
,
int Position
) : base ( Message )
{
this.Position = Position ;
}
/**
<summary>
Retrieves the value of the Message field.
</summary>
*/
public override string Message
{
get
{
return ( this.ToString() ) ;
}
}
/**
<summary>
Formats a string from the contents.
</summary>
<returns>
The Message and Position in a string.
</returns>
*/
public override string ToString()
{
return ( string.Format
(
"{0} at position {1}"
,
base.Message
,
this.Position
) ) ;
}
}
# endregion
# region ValidatorState enumeration
/**
<summary>
State of the (very simple) validator.
Private.
</summary>
*/
private enum ValidatorState
{
/**
<summary>
We had a second operator.
</summary>
*/
NoMoreOpsAllowed
,
/**
<summary>
We had a left-parenthesis.
</summary>
*/
OnlyOneOpAllowed
,
/**
<summary>
We had a first operator.
</summary>
*/
UnaryOpAllowed
,
/**
<summary>
We had a right-parenthesis.
</summary>
*/
BinaryOpAllowed
,
/**
<summary>
We had a term character.
</summary>
*/
TermBegun
,
/**
<summary>
We had a term-ending whitespace operator.
</summary>
*/
TermEnded
,
/**
<summary>
In verbatim text mode.
</summary>
*/
VerbatimTextBegun
} ;
# endregion
# region RpnOptions enumeration
/**
<summary>
Options for controlling certain features of the transformation.
</summary>
<remarks>
Has the Flags attribute, so values can be combined.
</remarks>
*/
[System.Flags]
public enum RpnOptions
{
/**
<summary>
No options selected.
</summary>
*/
None = 0
,
/**
<summary>
Retain the whitespace, control characters, and null characters from the source string.
</summary>
<remarks>
The default behaviour is to insert spaces where operators and parentheses had been in the
source string, but ignore the whitespace.
With this option selected the whitespace will be echoed to the resulting RPN string.
</remarks>
*/
RetainWhiteSpace = 1
,
/**
<summary>
Throw an InfixToRpnFormatException if a mismatched parenthesis is detected.
</summary>
<remarks>
The default behaviour is to ignore this because the resultant string won't contain them and
it's easy enough to behave as if the missing parenthesis were at the appropriate end.
With this option selected, a mismatched parenthesis will cause an InfixToRpnFormatException
with the position of the unmatched parenthesis to be thrown.
</remarks>
*/
FailOnMismatchedParenthesis = 2
,
/**
<summary>
Throw an InfixToRpnFormatException when various other situations occur.
</summary>
<remarks>
The default behaviour is to ignore these because the user may pass in an expression in which
these things are not errors.
With this option selected, the following situations will be checked and violations will cause
an InfixToRpnFormatException with the position of the first errant character to be thrown.
<list type="bullet">
<item>
<description>
Empty parentheses is not allowed --()
</description>
</item>
<item>
<description>
Back-to-back parentheses is not allowed -- )(
</description>
</item>
<item>
<description>
Two consecutive operators are only allowed if the second is a (unary) plus or minus
</description>
</item>
<item>
<description>
Plus and minus are the only allowed unary operators
</description>
</item>
<item>
<description>
Two consecutive terms are not allowed
</description>
</item>
</list>
</remarks>
*/
FailOnMalformedExpression = 4
,
/**
<summary>
What part of "All" don't you understand?
</summary>
*/
All = 1+2+4
} ;
# endregion
# region OperatorPrecedence enumeration
/**
<summary>
Precedence values for Operators.
Private.
</summary>
*/
private enum OperatorPrecedence
{
/**
<summary>
Binary addition and subtraction.
</summary>
*/
Low
,
/**
<summary>
Multiplication and division.
</summary>
*/
Medium
,
/**
<summary>
Exponentionation.
</summary>
*/
High
,
/**
<summary>
Unary plus and minus.
</summary>
*/
Highest
} ;
# endregion
# region Operator structure
/**
<summary>
Holder for an operator character and its precedence.
Private.
</summary>
*/
private struct Operator
{
/**
<summary>
Holds the operator's character value.
Readonly.
</summary>
*/
public readonly char Character ;
/**
<summary>
Holds the operator's precedence level.
Readonly.
</summary>
*/
public readonly OperatorPrecedence Precedence ;
/**
<summary>
Constructor for the Operator structure.
</summary>
<param name="Character">
Which operator it is.
</param>
<param name="Precedence">
What the precedence is.
</param>
*/
public Operator
(
char Character
,
OperatorPrecedence Precedence
)
{
this.Character = Character ;
this.Precedence = Precedence ;
}
/**
<summary>
Formats a string from the contents.
</summary>
<returns>
The operator character as a string.
</returns>
*/
public override string
ToString
(
)
{
return ( this.Character.ToString() ) ;
}
}
# endregion
# region OperatorStack class
/**
<summary>
Holder for a stack of operators.
Private.
</summary>
<remarks>
Each OperatorStack defines a parenthesis-level.
When a left-parenthesis is encountered a new OperatorStack should be created.
When a right-parenthesis is encountered the top OperatorStack should be emptied.
</remarks>
*/
private class OperatorStack : System.Collections.Stack
{
/**
<summary>
Holds the position of the left-parenthesis that prompted the new level.
Readonly.
</summary>
*/
public readonly int Position ;
/**
<summary>
Constructor for the OperatorStack class.
</summary>
*/
internal OperatorStack() : this ( 0 ) {}
/**
<summary>
Constructor for the OperatorStack class.
</summary>
<param name="Position">
The position of the left-parenthesis that prompted the new level.
</param>
*/
internal OperatorStack
(
int Position
)
{
this.Position = Position ;
}
/**
<summary>
Pushes an Operator onto the stack after popping off any higher or equal precedence operators.
</summary>
<param name="Op">
The Operator to push.
</param>
<exception cref="System.ArgumentException">
Will be thrown if a non-Operator is passed.
</exception>
<returns>
A string containing any higher or equal precedence operators popped from the stack.
</returns>
*/
internal new string Push
(
object Op
)
{
System.Text.StringBuilder result = new System.Text.StringBuilder ( this.Count ) ;
if ( Op is Operator )
{
/* Highest precedence means it's a unary operator so insert a zero */
if ( ((Operator) Op).Precedence == OperatorPrecedence.Highest )
{
result.Append ( "0" ) ;
}
/* Always replace the operator with a SPACE */
result.Append ( " " ) ;
while
(
( this.Count > 0 )
&&
( this.Peek().Precedence >= ((Operator) Op).Precedence )
)
{
result.Append ( this.Pop() ) ;
result.Append ( " " ) ;
}
base.Push ( Op ) ;
}
else
{
throw ( new System.ArgumentException
(
"OperatorStacks accept only Operators"
,
"Op"
) ) ;
}
return ( result.ToString() ) ;
}
/**
<summary>
Peeks at the top Operator on the stack.
</summary>
<returns>
A copy of the top Operator.
</returns>
*/
internal new Operator Peek()
{
return ( (Operator) base.Peek() ) ;
}
/**
<summary>
Pops the top Operator from the stack.
</summary>
<returns>
A copy of the top Operator.
</returns>
*/
internal new Operator Pop()
{
return ( (Operator) base.Pop() ) ;
}
/**
<summary>
Pops all the Operators from the stack.
</summary>
<returns>
A string containing the operators popped from the stack.
</returns>
*/
internal new string
Clear
(
)
{
System.Text.StringBuilder result = new System.Text.StringBuilder ( this.Count ) ;
while ( this.Count > 0 )
{
result.Append ( " " ) ;
result.Append ( this.Pop() ) ;
}
return ( result.ToString() ) ;
}
}
# endregion
# region OperatorStackStack class
/**
<summary>
Holder for a stack of stacks of operators.
Private.
</summary>
<remarks>
Defines nothing new, simply limits itself to holding OperatorStacks.
When a left-parenthesis is encountered a new OperatorStack should be pushed on.
When a right-parenthesis is encountered the top OperatorStack should be popped.
</remarks>
*/
private class OperatorStackStack : System.Collections.Stack
{
/**
<summary>
Constructor for the OperatorStackStack class.
</summary>
*/
internal OperatorStackStack(){}
/**
<summary>
Constructor for the OperatorStackStack class.
</summary>
<param name="Stack">
An initial stack to push.
</param>
*/
internal OperatorStackStack
(
OperatorStack Stack
) : this()
{
this.Push ( Stack ) ;
}
/**
<summary>
Pushes an OperatorStack onto the stack.
</summary>
<param name="Stack">
The OperatorStack to push.
</param>
<exception cref="System.ArgumentException">
Will be thrown if a non-OperatorStack is passed.
</exception>
*/
public override void Push
(
object Stack
)
{
if ( Stack is OperatorStack )
{
base.Push ( Stack ) ;
}
else
{
throw ( new System.ArgumentException
(
"OperatorStackStacks accept only OperatorStacks"
,
"Stack"
) ) ;
}
}
/**
<summary>
Peeks at the top OperatorStack on the stack.
</summary>
<returns>
A copy of the top OperatorStack.
</returns>
*/
public new OperatorStack Peek()
{
return ( (OperatorStack) base.Peek() ) ;
}
/**
<summary>
Pops the top OperatorStack from the stack.
</summary>
<returns>
A copy of the top OperatorStack.
</returns>
*/
public new OperatorStack Pop()
{
return ( (OperatorStack) base.Pop() ) ;
}
}
# endregion
# region InfixToRpn method
/**
<summary>
Transforms the provided string from infix to Reverse Polish Notation (as best it can).
The terms and operators in the resultant string will be separated by SPACE characters.
Supported operators are:
<list type="table">
<item>
<term>
+
</term>
<description>
Binary and unary addition, unary is treated as if it were (0+x)
</description>
</item>
<item>
<term>
-
</term>
<description>
Binary and unary subtraction, unary is treated as if it were (0-x)
</description>
</item>
<item>
<term>
*
</term>
<description>
Multiplication
</description>
</item>
<item>
<term>
/
</term>
<description>
Division
</description>
</item>
<item>
<term>
%
</term>
<description>
Modulus
</description>
</item>
<item>
<term>
^
</term>
<description>
Exponentiation
</description>
</item>
<item>
<term>
` '
</term>
<description>
Specifies a span of verbatim text, can be used to enclose terms in scientific notation
</description>
</item>
</list>
</summary>
<remarks>
The developer is not responsible for what you put in.
Strictly Garbage-In-Garbage-Out -- actually I can't even guarantee that!
</remarks>
<param name="Subject">
The string to transform.
</param>
<param name="Options">
Bitmapped RpnOptions to control the transformation.
</param>
<returns>
A string containing a Reverse Polish Notation expression.
</returns>
<example>
<code>
string rpn = PIEBALD.Lib.LibRpn.InfixToRpn
(
infix
,
PIEBALD.Lib.LibRpn.RpnOptions.FailOnMalformedExpression |
PIEBALD.Lib.LibRpn.RpnOptions.FailOnMismatchedParenthesis
)
</code>
</example>
<exception cref="InfixToRpnFormatException">
Will be thrown for certain eventualities when requested to do so through the Options parameter.
</exception>
*/
public static string
InfixToRpn
(
string Subject
,
RpnOptions Options
)
{
System.Text.StringBuilder result ;
/* Attempt to improve thread-safety */
lock ( floor )
{
# region Local variables
/* Prime the OperatorStackStack with an OperatorStack */
OperatorStackStack ops = new OperatorStackStack ( new OperatorStack() ) ;
/* Make helper-booleans to simplify testing for options */
bool retainwhitespace = ( Options & RpnOptions.RetainWhiteSpace )
== RpnOptions.RetainWhiteSpace ;
bool failonmismatchedparenthesis = ( Options & RpnOptions.FailOnMismatchedParenthesis )
== RpnOptions.FailOnMismatchedParenthesis ;
bool failonmalformedexpression = ( Options & RpnOptions.FailOnMalformedExpression )
== RpnOptions.FailOnMalformedExpression ;
/* Used to pass the position of a character to an InfixToRpnFormatException */
int index = 0 ;
/* Helps track whether or not the expression is malformed */
ValidatorState validatorstate = ValidatorState.OnlyOneOpAllowed ;
# endregion
result = new System.Text.StringBuilder ( Subject.Length * 2 ) ;
foreach ( char thischar in Subject )
{
/* Index will be one-based rather than zero-based. But this is really */
/* due to assuming a "virtual left parenthesis" at position zero. */
index++ ;
# region Handle Verbatim Text Characters
/* Seems like a clunky implementation, but what the hell it works */
if ( validatorstate == ValidatorState.VerbatimTextBegun )
{
switch ( thischar )
{
case '`' :
{
ops.Push ( new OperatorStack ( -index ) ) ;
break ;
}
case '\'' :
{
ops.Pop() ;
/* If all verbatim text has been terminated then exit verbatim text mode */
if ( ops.Peek().Position >= 0 )
{
validatorstate = ValidatorState.TermEnded ;
}
break ;
}
default :
{
result.Append ( thischar ) ;
break ;
}
}
continue ;
}
# endregion
# region Handle WhiteSpace
if ( ( thischar == char.MinValue ) ||
char.IsWhiteSpace ( thischar ) ||
char.IsControl ( thischar ) )
{
if ( retainwhitespace )
{
result.Append ( thischar ) ;
}
if ( validatorstate == ValidatorState.TermBegun )
{
validatorstate = ValidatorState.TermEnded ;
}
continue ;
}
# endregion
switch ( thischar )
{
# region Handle Left-Parenthesis
case '(' :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( ( validatorstate == ValidatorState.TermBegun ) ||
( validatorstate == ValidatorState.TermEnded ) ||
( validatorstate == ValidatorState.BinaryOpAllowed ) )
{
throw ( new InfixToRpnFormatException
(
"No operator preceding left-parenthesis"
,
index
) ) ;
}
}
# endregion
ops.Push ( new OperatorStack ( index ) ) ;
validatorstate = ValidatorState.OnlyOneOpAllowed ;
break ;
}
# endregion
# region Handle Right-Parenthesis
case ')' :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( ( validatorstate == ValidatorState.UnaryOpAllowed ) ||
( validatorstate == ValidatorState.NoMoreOpsAllowed ) )
{
throw ( new InfixToRpnFormatException
(
"Operator followed by right-parenthesis"
,
index
) ) ;
}
if ( validatorstate == ValidatorState.OnlyOneOpAllowed )
{
throw ( new InfixToRpnFormatException
(
"Empty parentheses"
,
index
) ) ;
}
}
# endregion
/* If we don't have a matching left-parenthesis */
if ( ops.Count == 1 )
{
# region Error Handling
if ( failonmismatchedparenthesis )
{
throw ( new InfixToRpnFormatException
(
"Mismatched right-parenthesis"
,
index
) ) ;
}
# endregion
result.Append ( ops.Peek().Clear() ) ;
}
else
{
result.Append ( ops.Pop().Clear() ) ;
}
validatorstate = ValidatorState.BinaryOpAllowed ;
break ;
}
# endregion
# region Handle Addition and Subtraction, both binary and unary
case '-' :
case '+' :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( validatorstate == ValidatorState.NoMoreOpsAllowed )
{
throw ( new InfixToRpnFormatException
(
"Too many operators in a row"
,
index
) ) ;
}
}
# endregion
if ( ( validatorstate == ValidatorState.UnaryOpAllowed ) ||
( validatorstate == ValidatorState.OnlyOneOpAllowed ) )
{
/* Unary */
result.Append ( ops.Peek().Push
(
new Operator ( thischar , OperatorPrecedence.Highest )
) ) ;
validatorstate = ValidatorState.NoMoreOpsAllowed ;
}
else
{
/* Binary */
result.Append ( ops.Peek().Push
(
new Operator ( thischar , OperatorPrecedence.Low )
) ) ;
validatorstate = ValidatorState.UnaryOpAllowed ;
}
break ;
}
# endregion
# region Handle Multiplication, Division, and Modulus
case '*' :
case '/' :
case '%' :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( ( validatorstate == ValidatorState.UnaryOpAllowed ) ||
( validatorstate == ValidatorState.NoMoreOpsAllowed ) )
{
throw ( new InfixToRpnFormatException
(
"Too many operators in a row"
,
index
) ) ;
}
}
# endregion
result.Append ( ops.Peek().Push
(
new Operator ( thischar , OperatorPrecedence.Medium )
) ) ;
validatorstate = ValidatorState.UnaryOpAllowed ;
break ;
}
# endregion
# region Handle Exponentiation
case '^' :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( ( validatorstate == ValidatorState.UnaryOpAllowed ) ||
( validatorstate == ValidatorState.NoMoreOpsAllowed ) )
{
throw ( new InfixToRpnFormatException
(
"Too many operators in a row"
,
index
) ) ;
}
}
# endregion
result.Append ( ops.Peek().Push
(
new Operator ( thischar , OperatorPrecedence.High )
) ) ;
validatorstate = ValidatorState.UnaryOpAllowed ;
break ;
}
# endregion
# region Handle Verbatim Text Beginning
case '`' :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( validatorstate == ValidatorState.BinaryOpAllowed )
{
throw ( new InfixToRpnFormatException
(
"No operator following right-parenthesis"
,
index
) ) ;
}
if ( validatorstate == ValidatorState.TermEnded )
{
throw ( new InfixToRpnFormatException
(
"Second consecutive term detected"
,
index
) ) ;
}
}
# endregion
/* Use an OperatorStack with a negative Position */
ops.Push ( new OperatorStack ( -index ) ) ;
validatorstate = ValidatorState.VerbatimTextBegun ;
break ;
}
# endregion
# region Handle Everything else
default :
{
# region Error Handling
if ( failonmalformedexpression )
{
if ( validatorstate == ValidatorState.BinaryOpAllowed )
{
throw ( new InfixToRpnFormatException
(
"No operator preceding term"
,
index
) ) ;
}
if ( validatorstate == ValidatorState.TermEnded )
{
throw ( new InfixToRpnFormatException
(
"Second consecutive term detected"
,
index
) ) ;
}
}
# endregion
result.Append ( thischar ) ;
validatorstate = ValidatorState.TermBegun ;
break ;
}
# endregion
}
}
# region Handle the end of the string
# region Error Handling
if ( failonmismatchedparenthesis )
{
if ( validatorstate == ValidatorState.VerbatimTextBegun )
{
throw ( new InfixToRpnFormatException
(
"No terminator found for verbatim text"
,
ops.Pop().Position * -1
) ) ;
}
if ( ( validatorstate == ValidatorState.UnaryOpAllowed ) ||
( validatorstate == ValidatorState.NoMoreOpsAllowed ) )
{
throw ( new InfixToRpnFormatException
(
"Dangling operator"
,
index
) ) ;
}
if ( ops.Count > 1 )
{
throw ( new InfixToRpnFormatException
(
"Mismatched left-parenthesis"
,
ops.Pop().Position
) ) ;
}
}
# endregion
while ( ops.Count > 0 )
{
result.Append ( ops.Pop().Clear() ) ;
}
# endregion
}
return ( result.ToString() ) ;
}
# endregion
}
}