Click here to Skip to main content
15,895,142 members
Articles / Programming Languages / C#

Scanf in C# using Regex

Rate me:
Please Sign up or sign in to vote.
4.94/5 (14 votes)
27 Sep 20044 min read 207.6K   1.4K   54  
An article on scanf functionality for C# implemented using Regex
///
/// Geoffrey Slinker
///
using System;
using System.Collections;
using System.Text.RegularExpressions;
using System.Runtime.Serialization;

namespace Scanning
{
	/// <summary>
	/// Summary description for Scanner.
	/// </summary>
	public class Scanner
	{
		protected readonly Hashtable typePatterns;
		public Scanner()
		{
			typePatterns = new Hashtable();

			typePatterns.Add("String",@"[\w\d\S]+");
			typePatterns.Add("Int16",  @"-[0-9]+|[0-9]+");
			typePatterns.Add("UInt16",  @"[0-9]+");
			typePatterns.Add("Int32",  @"-[0-9]+|[0-9]+");
			typePatterns.Add("UInt32",  @"[0-9]+");
			typePatterns.Add("Int64",   @"-[0-9]+|[0-9]+");
			typePatterns.Add("UInt64",   @"[0-9]+");
			typePatterns.Add("Single",   @"[-]|[.]|[-.]|[0-9][0-9]*[.]*[0-9]+");
			typePatterns.Add("Double",   @"[-]|[.]|[-.]|[0-9][0-9]*[.]*[0-9]+");
			typePatterns.Add("Boolean",   @"true|false");
			typePatterns.Add("Byte",  @"[0-9]{1,3}");
			typePatterns.Add("SByte",  @"-[0-9]{1,3}|[0-9]{1,3}");
			typePatterns.Add("Char",  @"[\w\S]{1}");
			typePatterns.Add("Decimal", @"[-]|[.]|[-.]|[0-9][0-9]*[.]*[0-9]+");
		}

		/// <summary>
		/// Scan memics scanf.
		/// A master regular expression pattern is created that will group each "word" in the text and using regex grouping
		/// extract the values for the field specifications.
		/// Example text: "Hello true 6.5"  fieldSpecification: "{String} {Boolean} {Double}"
		/// The fieldSpecification will result in the generation of a master Pattern:
		/// ([\w\d\S]+)\s+(true|false)\s+([-]|[.]|[-.]|[0-9][0-9]*[.]*[0-9]+)
		/// This masterPattern is ran against the text string and the groups are extracted.
		/// </summary>
		/// <param name="text"></param>
		/// <param name="fieldSpecification">A string that may contain simple field specifications of the form {Int16}, {String}, etc</param>
		/// <returns>object[] that contains values for each field</returns>
		public object[] Scan(string text, string fieldSpecification)
		{
			object[] targets = null;
			try
			{
				ArrayList targetMatchGroups = new ArrayList();
				ArrayList targetTypes = new ArrayList();

				string matchingPattern = "";
				Regex reggie = null;
				MatchCollection matches = null;

				//masterPattern is going to hold a "big" regex pattern that will be ran against the original text
				string masterPattern = fieldSpecification.Trim();
				matchingPattern =  @"(\S+)";
				masterPattern = Regex.Replace(masterPattern,matchingPattern,"($1)");		//insert grouping parens

				//store the group location of the format tags so that we can select the correct group values later.
				matchingPattern = @"(\([\w\d\S]+\))";
				reggie = new Regex(matchingPattern);
				matches = reggie.Matches(masterPattern);
				for(int i = 0; i < matches.Count; i++)
				{
					Match m = matches[i];
					string sVal = m.Groups[1].Captures[0].Value;

					//is this value a {n} value. We will determine this by checking for {
					if(sVal.IndexOf('{') >= 0)
					{
						targetMatchGroups.Add(i);
						string p = @"\(\{(\w*)\}\)";	//pull out the type
						sVal = Regex.Replace(sVal,p,"$1");
						targetTypes.Add(sVal);
					}
				}
			
				//Replace all of the types with the pattern that matches that type
				masterPattern = Regex.Replace(masterPattern,@"\{String\}",  (String)typePatterns["String"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Int16\}",  (String)typePatterns["Int16"]);
				masterPattern = Regex.Replace(masterPattern,@"\{UInt16\}",  (String)typePatterns["UInt16"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Int32\}",  (String)typePatterns["Int32"]);
				masterPattern = Regex.Replace(masterPattern,@"\{UInt32\}",  (String)typePatterns["UInt32"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Int64\}",  (String)typePatterns["Int64"]);
				masterPattern = Regex.Replace(masterPattern,@"\{UInt64\}",   (String)typePatterns["UInt64"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Single\}",   (String)typePatterns["Single"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Double\}",   (String)typePatterns["Double"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Boolean\}",   (String)typePatterns["Boolean"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Byte\}",  (String)typePatterns["Byte"]);
				masterPattern = Regex.Replace(masterPattern,@"\{SByte\}",  (String)typePatterns["SByte"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Char\}",  (String)typePatterns["Char"]);
				masterPattern = Regex.Replace(masterPattern,@"\{Decimal\}", (String)typePatterns["Decimal"]);
				
				masterPattern = Regex.Replace(masterPattern,@"\s+","\\s+");	//replace the white space with the pattern for white space

				//run our generated pattern against the original text.
				reggie = new Regex(masterPattern);
				matches = reggie.Matches(text);
				//PrintMatches(matches);

				//allocate the targets
				targets = new object[targetMatchGroups.Count];
				for(int x = 0; x < targetMatchGroups.Count; x++)
				{
					int i = (int)targetMatchGroups[x];	
					string tName = (string)targetTypes[x];
					if(i < matches[0].Groups.Count)
					{
						//add 1 to i because i is a result of serveral matches each resulting in one group.
						//this query is one match resulting in serveral groups.
						string sValue = matches[0].Groups[i+1].Captures[0].Value;
						targets[x] = ReturnValue(tName,sValue);
					}
				}
			}
			catch(Exception ex)
			{
				throw new ScanExeption("Scan exception",ex);
			}
			
			return targets;
		}//Scan

		/// Scan memics scanf.
		/// A master regular expression pattern is created that will group each "word" in the text and using regex grouping
		/// extract the values for the field specifications.
		/// Example text: "Hello true 6.5"  fieldSpecification: "{0} {1} {2}" and the target array has objects of these types: "String, ,Boolean, Double"
		/// The targets are scanned and each target type is extracted in order to build a master pattern based on these types
		/// The fieldSpecification and target types will result in the generation of a master Pattern:
		/// ([\w\d\S]+)\s+(true|false)\s+([-]|[.]|[-.]|[0-9][0-9]*[.]*[0-9]+)
		/// This masterPattern is ran against the text string and the groups are extracted and placed back into the targets
		/// <param name="text"></param>
		/// <param name="fieldSpecification"></param>
		/// <param name="targets"></param>
		public void Scan(string text, string fieldSpecification, params object[] targets)
		{
			try
			{
				ArrayList targetMatchGroups = new ArrayList();

				string matchingPattern = "";
				Regex reggie = null;
				MatchCollection matches = null;

				//masterPattern is going to hold a "big" regex pattern that will be ran against the original text
				string masterPattern = fieldSpecification.Trim();
				matchingPattern =  @"(\S+)";
				masterPattern = Regex.Replace(masterPattern,matchingPattern,"($1)");		//insert grouping parens

				//store the group location of the format tags so that we can select the correct group values later.
				matchingPattern = @"(\([\w\d\S]+\))";
				reggie = new Regex(matchingPattern);
				matches = reggie.Matches(masterPattern);
				for(int i = 0; i < matches.Count; i++)
				{
					Match m = matches[i];
					string sVal = m.Groups[1].Captures[0].Value;

					//is this value a {n} value. We will determine this by checking for {
					if(sVal.IndexOf('{') >= 0)
					{
						targetMatchGroups.Add(i);
					}
				}
			
				matchingPattern = @"(\{\S+\})";	//match each paramter tag of the format {n} where n is a digit
				reggie = new Regex(matchingPattern);
				matches = reggie.Matches(masterPattern);

				for(int i = 0; i < targets.Length && i < matches.Count; i++)
				{
					string groupID = String.Format("${0}",(i+1));
					string innerPattern = "";

					Type t = targets[i].GetType();
					innerPattern = ReturnPattern(t.Name);

					//replace the {n} with the type's pattern
					string groupPattern = "\\{" + i + "\\}";
					masterPattern = Regex.Replace(masterPattern,groupPattern,innerPattern);
				}

				masterPattern = Regex.Replace(masterPattern,@"\s+","\\s+");	//replace white space with the whitespace pattern

				//run our generated pattern against the original text.
				reggie = new Regex(masterPattern);
				matches = reggie.Matches(text);
				for(int x = 0; x < targetMatchGroups.Count; x++)
				{
					int i = (int)targetMatchGroups[x];
					if(i < matches[0].Groups.Count)
					{
						//add 1 to i because i is a result of serveral matches each resulting in one group.
						//this query is one match resulting in serveral groups.
						string sValue = matches[0].Groups[i+1].Captures[0].Value;
						Type t = targets[x].GetType();
						targets[x] = ReturnValue(t.Name,sValue);
					}
				}
			}
			catch(Exception ex)
			{
				throw new ScanExeption("Scan exception",ex);
			}
		}	//Scan

		/// <summary>
		/// Return the Value inside of an object that boxes the built in type or references the string
		/// </summary>
		/// <param name="typeName"></param>
		/// <param name="sValue"></param>
		/// <returns></returns>
		private object ReturnValue(string typeName, string sValue)
		{
			object o = null;
			switch(typeName)
			{
				case "String":
					o = sValue;
					break;
														
				case "Int16":
					o = Int16.Parse(sValue);
					break;

				case "UInt16":
					o = UInt16.Parse(sValue);
					break;

				case "Int32":
					o = Int32.Parse(sValue);
					break;

				case "UInt32":
					o = UInt32.Parse(sValue);
					break;

				case "Int64":
					o = Int64.Parse(sValue);
					break;

				case "UInt64":
					o = UInt64.Parse(sValue);
					break;

				case "Single":
					o = Single.Parse(sValue);
					break;

				case "Double":
					o = Double.Parse(sValue);
					break;

				case "Boolean":
					o = Boolean.Parse(sValue);
					break;

				case "Byte":
					o = Byte.Parse(sValue);
					break;

				case "SByte":
					o = SByte.Parse(sValue);
					break;

				case "Char":
					o = Char.Parse(sValue);
					break;

				case "Decimal":
					o = Decimal.Parse(sValue);
					break;
			}
			return o;
		}//ReturnValue

		/// <summary>
		/// Return a pattern for regular expressions that will match the built in type specified by name
		/// </summary>
		/// <param name="typeName"></param>
		/// <returns></returns>
		private string ReturnPattern(string typeName)
		{
			string innerPattern = "";
			switch(typeName)
			{
				case "Int16":
					innerPattern = (String)typePatterns["Int16"];
					break;

				case "UInt16":
					innerPattern = (String)typePatterns["UInt16"];
					break;

				case "Int32":
					innerPattern = (String)typePatterns["Int32"];
					break;

				case "UInt32":
					innerPattern = (String)typePatterns["UInt32"];
					break;

				case "Int64":
					innerPattern = (String)typePatterns["Int64"];
					break;

				case "UInt64":
					innerPattern = (String)typePatterns["UInt64"];
					break;

				case "Single":
					innerPattern = (String)typePatterns["Single"];
					break;

				case "Double":
					innerPattern = (String)typePatterns["Double"];
					break;

				case "Boolean":
					innerPattern = (String)typePatterns["Boolean"];
					break;

				case "Byte":
					innerPattern = (String)typePatterns["Byte"];
					break;

				case "SByte":
					innerPattern = (String)typePatterns["SByte"];
					break;

				case "Char":
					innerPattern = (String)typePatterns["Char"];
					break;

				case "Decimal":
					innerPattern = (String)typePatterns["Decimal"];
					break;

				case "String":
					innerPattern = (String)typePatterns["String"];
					break;
			}
			return innerPattern;
		}	//ReturnPattern

		static void PrintMatches(MatchCollection matches)
		{
			Console.WriteLine("===---===---===---===");
			int matchCount = 0;
			Console.WriteLine("Match Count = " + matches.Count);
			foreach(Match m in matches)
			{
				if(m == Match.Empty) Console.WriteLine("Empty match");
				Console.WriteLine("Match"+ (++matchCount));
				for (int i = 0; i < m.Groups.Count; i++) 
				{
					Group g = m.Groups[i];
					Console.WriteLine("Group"+i+"='" + g + "'");
					CaptureCollection cc = g.Captures;
					for (int j = 0; j < cc.Count; j++) 
					{
						Capture c = cc[j];
						System.Console.Write("Capture"+j+"='" + c + "', Position="+c.Index + "   <");
						for(int k = 0; k < c.ToString().Length; k++)
						{
							Console.Write(((Int32)(c.ToString()[k])));
						}
						Console.WriteLine(">");
					}
				}
			}
		}
	}

	/// <summary>
	/// Exceptions that are thrown by this namespace and the Scanner Class
	/// </summary>
	class ScanExeption : Exception
	{
		public ScanExeption() : base()
		{
		}

		public ScanExeption(string message) : base(message)
		{
		}

		public ScanExeption(string message, Exception inner) : base(message, inner)
		{
		}

		public ScanExeption(SerializationInfo info, StreamingContext context) : base(info, context)
		{
		}
	}
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Software Developer (Senior)
United States United States
Master Degree in C.S. .NET, Unix, Macintosh (OS X, 9, 8...), PC server side, and MFC. 17 years experience. Graphics, Distributed processing, Object Oriented Methods and Models.
Java, C#, C++. Webservices. XML. Real name is Geoffrey Slinker.

Comments and Discussions