Click here to Skip to main content
15,886,362 members
Please Sign up or sign in to vote.
1.00/5 (3 votes)
See more:
Here the updated code but still its not working....
#include<stdio.h>
<pre>#include<stdio.h>
#include<conio.h>
#include<iostream>
#include<string.h>
#include<stdlib.h>

using namespace std;
void tokenization(void);
FILE *vl;
char inputstring[100];
int index=0;
int line = 1;
char sep[]={32,'(',')',',',';','{','}','[',']','+','-','=','/','\n','\0','^','*','&','|','#','!'};
void main(void)
{
	vl=fopen("input.txt","r");
	int rf;
	rf = getc(vl);
	while(rf!=EOF)
	{
		inputstring[index++]=rf;
		rf=getc(vl);
	}
	inputstring[index]='\0';
	fclose(vl);
	puts("Code:");
	puts(inputstring);
	printf("\n\n");
	tokenization();
	getche();
}
void tokenization()
{
	int i=0;
	char token[100];
	int j=0;
	bool check = false;
	do
	{
		if(inputstring[i]=='\n')
			line++;
		while ((inputstring[i]==32&&inputstring[i]!='\0')||inputstring[i]=='\n') 
			i++;
		switch (inputstring[i])
		{
			case '{' :
				printf("\nValue Part : {\tClass Part : c_br_open\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '}' :
				printf("\nValue Part : }\tClass Part : c_br_close\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '(' : 
				printf("\nValue Part : (\t\tClass Part : r_br_open\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case ')' : 
				printf("\nValue Part : )\tClass Part : r_br_close\t\tLine=%d",line);
				check=true;
				i++;
				break;
				
			case '[' : 
				printf("\nValue Part : [\tClass Part : sq_br_open\t\tLine=%d",line);
				check=true;
				i++;
				break;
				
			case ']' : 
				printf("\nValue Part : )\tClass Part : sq_br_close\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case ';' : 
				printf("\nValue Part : ;\tError at line %d",line);
				check=true;
				i++;
				break;
			case ':' : 
				printf("\nValue Part : :\tClass Part : Colon\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '@' : 
				printf("\nValue Part : @\tError at line %d",line);
				check=true;
				i++;
				break;
			case '#' : 
				printf("\nValue Part : #\tError at line %d",line);
				check=true;
				i++;
				break;
			case '.' : 
				printf("\nValue Part : .\tClass Part : Dot\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '^' :
				printf("\nValue Part : ^\tClass Part : MDPOperators\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '*'  :
				printf("\nValue Part : *\tClass Part : MDPOperators\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '<':
				i++;
				if (inputstring[i]=='=')
				{
					printf("\nValue Part : <=\tClass Part : RelOperators\t\tLine=%d",line);
					check=true;
					i++;
					break;
				}
				else
				{
					printf("\nValue Part :< \tClass Part :RelOperators\t\tLine=%d",line);
					check=true;
					break;
				}
			case'>':
				i++;
				if (inputstring[i]=='=')
				{
					printf("\nValue Part :>=\tClass Part : RelOperators\t\tLine=%d",line);
					check=true;
					i++;
					break;
				}
				else
				{
					printf("\nValue Part :>\tClass Part :RelOperators\t\tLine=%d",line);
					check=true;
					break;
				}
			case '!':
				i++;
				if (inputstring[i]=='=')
				{
					printf("\nValue Part : !=\tClass Part : RelOperators\t\tLine=%d",line);
					check=true;
					i++;
					break;
				}
				else
				{

					printf("\nValue Part :!,\tClass Part : Not\t\tLine=%d",line);
					check=true;
					break;
				}
			case ',' : 
				printf("\nValue Part : ,\tClass Part : Comma\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '+' :
				printf("\nValue Part :+\t\tClass Part : A_SOperators\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '-' : 
				printf("\nValue Part:-\t\tClass Part : A_SOperators\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '=' : 
				printf("\nValue Part : =\tClass Part : AsgOperator\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '|':
				i++;
				if (inputstring[i]=='|')
				{
					printf("\nValue Part : ||\tClass Part : RelOperators\t\tLine=%d",line);
					check=true;
					i++;
					break;
				}
				else
				{
					printf("\nError at line %d",line);
					break;
				}
			case '$' : 
				printf("\nValue Part : ,\tClass Part : OR\t\tLine=%d",line);
				check=true;
				i++;
				break;
			case '&' : 
				printf("\nValue Part : ,\tClass Part : And\tLine=%d",line);
				check=true;
				i++;
				break;
			case '/':
				i++;
				if (inputstring[i]=='>')
				{
					printf("\nValue Part : />\tClass Part : Terminator\tLine=%d",line);
					check=true;
					i++;
					break;
				}
				else
				{
					printf("\nValue Part :/\tClass Part : MDPOperators\tLine=%d",line);
					check=true;
					i++;
					line++;
					break;
				}
			case '%' :
				if(inputstring[i]=='%')
			{
				i++;
				do
				{
				  inputstring[i++];
				} while (inputstring[i]!='%');
				check =true;
				i++;
				}
			default:
				break;
		}
		if(check==false)
			{
			if (inputstring[i]=='\'')
			{
				i+2;
				if(inputstring[i] == '\'')
				{
					printf("\t\tClass part : Char constant\t ");
					i++;
					break;
				}
				else
				{
					printf("Error");
					i++;
					break;
				}
			}
			if(inputstring[i]=='"')
			{
				i++;
				do
				{
					token[j++]=inputstring[i++];
				} while (inputstring[i]!='"');
				token[j]='\0';
				check =true;
				i++;
				printf("\nValue Part : ");
				for(int k=0;k<strlen(token);k++) 
				  putchar(token[k]);
				printf("\t\tClass Part : STRING\t\tLine:%d",line);
			}
		}
		if(check==false)
		{
			/*while(inputstring[i]!=32||inputstring[i]!='('||inputstring[i]!=')'||inputstring[i]!=','||inputstring[i]!=';'||inputstring[i]!='{'
				||inputstring[i]!='}'||inputstring[i]!='['||inputstring[i]!=']'||inputstring[i]!='+'||inputstring[i]!='-'||inputstring[i]!='='||inputstring[i]!='/'
				||inputstring[i]!='\n'||inputstring[i]!='\0'||inputstring[i]!='^'||inputstring[i]!='*'||inputstring[i]!='&'||inputstring[i]!='|'
				||inputstring[i]!='#'||inputstring[i]!='!')*/
			for(int k=0;k<20;k++)
			{
				while(inputstring[i] != sep[k])
				{
					if(inputstring[i]!='\0')
					{
						token[j++]=inputstring[i++];
						//check=true;
						break;
						//k++;
					}
				else 
					break;
				}
				break;
			}
			
			token[j]='\0';
		switch(token[0])
			{
			case 'f':
				if(token[1]=='n'&&token[2]=='u'&&token[3]=='m')
					{
						printf("\nValue Part : fnum\tClass Part : Datatype\tLine: %d",line);
						check=true;
						break;
					}
				if(token[1]=='a'&&token[2]=='l'&&token[3]=='s'&&token[4]=='e')
					{
						printf("\nValue Part : false\tClass Part :TF\tLine: %d",line);
						check=true;
						break;
					}
				if(token[1]=='o'&&token[2]=='r')
					{
						printf("\nValue Part : for\tClass Part :For\tLine: %d",line);
						check=true;
						break;
					}
			case't':
				if(token[1]=='r'&&token[2]=='u'&&token[3]=='e')
					{
						printf("\nValue Part : true\tClass Part :TF\tLine: %d",line);
						check=true;
						break;
					}
			case'b':
				if(token[1]=='r'&&token[2]=='e'&&token[3]=='a'&&token[4]=='k')
					{
						printf("\nValue Part : break\tClass Part :break\tLine: %d",line);
						check=true;
						break;
					}
				if(token[1]=='o'&&token[2]=='o'&&token[3]=='l')
					{
						printf("\nValue Part : bool\tClass Part :bool\tLine: %d",line);
						check=true;
						break;
					}
			case'w':
				if(token[1]=='h'&&token[2]=='i'&&token[3]=='l'&&token[4]=='e')
					{
						printf("\nValue Part : while\tClass Part :while\tLine: %d",line);
						check=true;
						break;
					}
			case'v':
				if(token[1]=='o'&&token[2]=='i'&&token[3]=='d')
					{
						printf("\nValue Part : void\tClass Part :void\tLine: %d",line);
						check=true;
						break;
					}
			case'm':
				if(token[1]=='a'&&token[2]=='i'&&token[3]=='n')
					{
						printf("\nValue Part : main\tClass Part :main\tline %d",line);
						check=true;
						break;
					}
			case'e':
				if(token[1]=='l'&&token[2]=='s'&&token[3]=='e')
					{
						printf("\nValue Part : else\tClass Part :else\tline %d",line);
						check=true;
						break;
					}
			case'c':
				if(token[1]=='a'&&token[2]=='s'&&token[3]=='e')
					{
						printf("\nValue Part : case\tClass Part :case\tline %d",line);
						check=true;
						break;
					}
				if(token[1]=='o'&&token[2]=='n'&&token[3]=='t'&&token[4]=='i'&&token[5]=='n'&&token[6]=='u'&&token[7]=='e')
					{
						printf("\nValue Part : continue\tClass Part :continue\t line %d",line);
						check=true;
						break;
					}
			case 'l':
				if(token[1]=='n'&&token[2]=='u'&&token[3]=='m')
					{
						printf("\nValue Part : lnum\tClass Part : Datatype\tline %d",line);
						check=true;
						break;
					}
			case 'd':
				if(token[1]=='e'&&token[2]=='f'&&token[3]=='a'&&token[4]=='u'&&token[5]=='l'&&token[6]=='t')
					{
						printf("\nValue Part : default\tClass Part : Default\tline%d",line);
						check=true;
						break;
					}
			case 's':
				if(token[1]=='n'&&token[2]=='u'&&token[3]=='m')
					{
						printf("\nValue Part : snum\tClass Part : Datatype\tline %d",line);
						check=true;
						break;
					}
				if(token[1]=='w'&&token[2]=='i'&&token[3]=='t'&&token[4]=='c'&&token[5]=='h')
					{
						printf("\nValue Part : switch\tClass Part : Switch\tline %d",line);
						check=true;
						break;
					}
				if (token[1]=='t' && token[2]=='r')
				{
					if(token[3]=='i'&&token[4]=='n'&&token[5]=='g')
					{
						printf("\nValue Part : string\tClass Part : Datatype\tLine %d",line);
						check=true;
						break;
					}
					if(token[3]=='u'&&token[4]=='c'&&token[5]=='t')
					{
						printf("\nValue Part : struct\tClass Part : Struct\tLine %d",line);
						check=true;
						break;
					}

				}
				

			case 'i':
				if(token[1]=='f')
					{
						printf("\nValue Part : if\tClass Part : If\tLine%d",line);
						check=true;
						break;
					}
			case 'a':
				if(token[1]=='l'&&token[2]=='p'&&token[3]=='h'&&token[4]=='a')
					{
						printf("\nValue Part : alpha\tClass Part : Datatype\tLine %d",line);
						check=true;
						break;
					}
			case 'n':
				if(token[1]=='u'&&token[2]=='m')
					{
						printf("\ntValue Part:  : num\tClass Part : Datatype\tLine%d",line);
						check=true;
						break;
					}
			case 'r':
				if(token[1]=='e')
					{
						if(token[2]=='t'&&token[3]=='u'&&token[4]=='r'&&token[5]=='n')
						{
							printf("\ntValuePart:  : return\tClass Part : Return\tLine%d",line);
							check=true;
							break;
						}
						if(token[2]=='p'&&token[3]=='e'&&token[4]=='a'&&token[5]=='t')
						{
							printf("\ntValuePart:  : repeat\tClass Part : Repeat\t line : %d",line);
							check=true;
							break;
						}
					}

			case 'g':
				if(token[1]=='o'&&token[2]=='t'&&token[3]=='o')
					{
						printf("\nValue Part : goto\tClass Part : Goto\t line : %d",line);
						check=true;
						break;
					}



			default :
				break;
			}
		}
		if(check==false)
		{
			j=0;
			while(token[j]!='\0')
			{
				if(token[j]<58&&token[j]>47)
					j++;
				else break;
			}
			if(token[j]=='\0')
			{
				printf("\nValue Part : ");
				for(int k=0;k<strlen(token);k++) 
					putchar(token[k]);
				printf("\t\tClass Part : Num_const\t\tLine :%d ",line);
				
			}
			j=0;
			if((token[j]>64&&token[j]<91)||(token[j]>96&&token[j]<123)||token[j]=='_')
			{
				j++;
				while(token[j]!='\0')
					if((token[j]>64&&token[j]<91)||(token[j]>96&&token[j]<123)||token[j]=='_'||(token[j]<58&&token[j]>47))
						j++;
					else break;
			}
			if(token[j]=='\0') 
			{
				printf("\nValue Part : ");
				for(int k=0;k<strlen(token);k++) 
					putchar(token[k]);
				printf("\t\tClass Part : ID\t\tLine :%d ",line);
				
			}
		}
		check=false; j=0;
	}
	
	while(inputstring[i]!='\0');
	printf("\nValue Part : EOF\t\tClass Part : EOF");
}

the third last while loop is not working properly it seperates char by char not word by word
Posted
Updated 29-Sep-14 8:33am
v4
Comments
CHill60 29-Sep-14 11:40am    
Have you tried debugging it?
Member 10099183 29-Sep-14 11:48am    
Yes but i didn't understand the problem
CHill60 29-Sep-14 13:37pm    
What do you mean by "still not working"?
Member 10099183 29-Sep-14 13:59pm    
the while loop is not working properly it is not generating output in output i have to generate a token set as lexical analyzer for given input.
CHill60 29-Sep-14 14:10pm    
Given that you have 8 while loops in that code it "the while loop is not working properly" doesn't really help to explain the problem

Which one? I see two and it looks like the second one has an extraneous right-parenthesis.

I also wounder about (inputstring[i]==32&&inputstring[i]!='\0') in the first one.

Also, in the second one you appear to test for \n three times.


Are you saying it doesn't compile? Or it compiles but doesn't work as expected?

I don't see any reason to have included so much code.
 
Share this answer
 
Comments
Member 10099183 29-Sep-14 13:07pm    
it is compiling but don't run correctly
Member 10099183 29-Sep-14 13:20pm    
so what i should do??
jeron1 29-Sep-14 13:37pm    
First describe exactly what you expect the code to do.
Member 10099183 29-Sep-14 13:47pm    
I want a make a lexical analyzer which generate token for given input
jeron1 29-Sep-14 14:04pm    
I'm sorry I wasn't clear, I was refering to the while (........) conditional block, or it's current incarnation, a for loop.

for(int k=0;k<20;k++)
{
while(inputstring[i] != sep[k])
{
if(inputstring[i]!='\0')
{
token[j++]=inputstring[i++];
k++;
}
else
break;
}
}
Let's clear this clutter by eliminating the inner while loop.

C++
if(inputstring[i]=='\n')
    line++;
while ((inputstring[i]==32&&inputstring[i]!='\0')||inputstring[i]=='\n')
    i++;
switch (inputstring[i])


Try this ...

C++
switch (inputstring[i])
{
    case 0:
        break;
    case '\n':
        line++;
        i++;
        break;
    case '\t':
    case ' ':
        i++;
        break;


Code should be easier to read. This will skip over tab and space.

Your for-loop to check the list of separators makes no sense because you're already looking for the separator characters in the switch statement.

Consider adding a state variable. The state variable will tell you which token you are currently scanning or accumulating. Include an open or undecided state for when you are between tokens.

Personally (and I hate to say this), but I'd scrap this character by character approach and use a left-match recursive descent approach to tokenize.

Parse XML at SAX Speed without DOM or SAX[^]

The above article includes code for doing left-match forward parsing allowing you to write code like ...

C++
if ( parseMatch(cursor, "while") )
    token = TOKEN_WHILE;
else if ( skipspace(cursor) )
    token = TOKEN_SPACE;
else if ( parseMatch(cursor, "{") )
    token = TOKEN_OPENBRACE


The article includes C++ code but the ideas easily translate to "C".
 
Share this answer
 
v2

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900