This problem can be solved using regular expressions.
The expressions are a bit complex, but the following code is pretty straight forward.
In order to be able to understand and change the regular expressions in the future, I recommend to study the subject and also to download a tool that can be used to test the expressions.
I usually recommend this site for learning regex:
Regular Expressions Info/[
^]
And I use this tool myself:
Regex Test[
^]
Now for the solution
I opted for creating two expressions.
The first expression will parse the whole string, pick out x and y but will treat the end of the string as one sub string.
[R
x][MAX COV=
y]A:
DEL(A,n(m,0)),LB(A,sb,lb),lOC(A,k,s,l).
^\[R(?<x>[0-9]+)\]\[MAX COV=(?<y>[0-9]+)\]A:\s</y></x>
The second expression will use the end part,
DEL(A,n(
m,0)),LB(A,
sb,
lb),LOC(A,
k,
s,
l), as input and extract the different sub-strings
((?=(?<name>DEL)\(A,n\((?<m>[0-9]+),0\)\))|(?=(?<name>LB)\(A,sb(?<sb>[0-9]+),lb(?<lb>[0-9]+)\))|(?=(?<name>LOC)\(A,k(?<k>[0-9]+),s(?<s>[0-9]+),l(?<l>[0-9]+)\)))(,|.)
The two expressions are using named groups to make it easy to extract the variables later in code.
To make it easier to handle the various variables, I created some structs
public struct DEL
{
public DEL(int m)
{
this.m = m;
}
public int m { get; set; }
}
public struct LB
{
public LB(int lb, int sb)
{
this.lb = lb;
this.sb = sb;
}
public int lb { get; set; }
public int sb { get; set; }
}
public struct LOC
{
public LOC(int k, int s, int l)
{
this.k = k;
this.s = s;
this.l = l;
}
public int k { get; set; }
public int s { get; set; }
public int l { get; set; }
}
public class ExtractedVariables
{
public ExtractedVariables()
{
x = 0;
y = 0;
del = new DEL();
lbList = new List<LB>();
locList = new List<LOC>();
}
public int x { get; set;}
public int y { get; set; }
public DEL del { get; set; }
public List<LB> lbList { get; set; }
public List<LOC> locList { get; set; }
}
The regular expressions I declared as static member variables
private static Regex stringExpression = new Regex("^\\[R(?<x>[0-9]+)\\]\\[MAX COV=(?<y>[0-9]+)\\]A:\\s(?<vars>[\\S\\s]+)$");
private static Regex variableExpression = new Regex("((?=(?<name>DEL)\\(A,n\\((?<m>[0-9]+),0\\)\\))|(?=(?<name>LB)\\(A,sb(?<sb>[0-9]+),lb(?<lb>[0-9]+)\\))|(?=(?<name>LOC)\\(A,k(?<k>[0-9]+),s(?<s>[0-9]+),l(?<l>[0-9]+)\\)))(,|.)");
Then a little method that handles one string at the time
public ExtractedVariables ExctractVariables(string input)
{
ExtractedVariables result = new ExtractedVariables();
Match m = stringExpression.Match(input);
if (m.Success)
{
result.x = int.Parse(m.Groups["x"].Value);
result.y = int.Parse(m.Groups["y"].Value);
string variables = m.Groups["vars"].Value;
foreach (Match mSub in variableExpression.Matches(variables))
{
string name = mSub.Groups["name"].Value;
switch (name)
{
case "DEL":
result.del = new DEL(int.Parse(mSub.Groups["m"].Value));
break;
case "LB":
result.lbList.Add(new LB(int.Parse(mSub.Groups["lb"].Value), int.Parse(mSub.Groups["sb"].Value)));
break;
case "LOC":
result.locList.Add(new LOC(int.Parse(mSub.Groups["k"].Value), int.Parse(mSub.Groups["s"].Value), int.Parse(mSub.Groups["l"].Value)));
break;
default: throw new Exception(string.Format("Unknown sub string name '{0}'.", name));
}
}
}
return result;
}
And at last some test code
string[] inputStr = new string[3];
inputStr[0] = "[R1][MAX COV=2]A: DEL(A,n(2,0)),LB(A,sb1,lb1),LOC(A,k1,s1,l1).";
inputStr[1] = "[R5][MAX COV=4]A: LB(A,sb2,lb2),LOC(A,k2,s2,l2),DEL(A,n(3,0)),LB(A,sb3,lb3).";
inputStr[2] = "[R12][MAX COV=2]A: DEL(A,n(1,0)),LB(A,sb3,lb3),LOC(A,k3,s3,l3),lOC(A,k3,s3,l3).";
foreach (string s in inputStr)
{
ExtractedVariables result = ExctractVariables(s);
Debug.WriteLine("x = {0}", result.x);
Debug.WriteLine("y = {0}", result.y);
Debug.WriteLine("\tm = {0}", result.del.m);
foreach (LB lb in result.lbList)
{
Debug.WriteLine("\tlb = {0}\tsb = {1}", lb.lb, lb.sb);
}
foreach (LOC loc in result.locList)
{
Debug.WriteLine("\tk = {0}\ts = {1}\tl = {2}", loc.k, loc.s, loc.l);
}
}
[UPDATE]
To extract sb<x> and lb<x> as strings, just move lb and sb inside the respective group.
private static Regex variableExpression = new Regex("((?=(?<name>DEL)\\(A,n\\((?<m>[0-9]+),0\\)\\))|(?=(?<name>LB)\\(A,(?<sb>sb[0-9]+),(?<lb>lb[0-9]+)\\))|(?=(?<name>LOC)\\(A,k(?<k>[0-9]+),s(?<s>[0-9]+),l(?<l>[0-9]+)\\)))(,|.)");
Then of course, you need to change the struct
public struct LB
{
public LB(string lb, string sb)
{
this.lb = lb;
this.sb = sb;
}
public string lb { get; set; }
public string sb { get; set; }
}
and the LB case
case "LB":
result.lbList.Add(new LB(mSub.Groups["lb"].Value, mSub.Groups["sb"].Value));
break;