Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdarg.h>
- #include <string.h>
- #include <malloc.h>
- #define SIZE 2046
- #define SAFEALLOC(var,Type) if((var=(Type*)malloc(sizeof(Type)))==NULL)err("not enough memory");
- enum{
- ID, END,
- //constants
- CT_INT, CT_REAL, CT_CHAR, CT_STRING,
- //keywords
- BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE,
- //delimiters
- COMMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC,
- //operators
- ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ,
- SPACE, LINECOMMENT, COMMENT
- }; //tokens codes
- typedef struct _Token
- {
- int code;
- union{
- char *text;
- long int i;
- double r;
- };
- int line;
- struct _Token *next;
- }Token;
- Token *tokens;
- Token *lastToken = NULL;
- const char *pCrtCh;
- int line;
- void err(const char *fmt,...)
- {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error: ");
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- void tkerr(const Token *tk,const char *fmt,...)
- {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error in line %d: ",tk->line);
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- Token *addTk(int code)
- {
- Token *tk;
- SAFEALLOC(tk,Token)
- tk->code=code;
- tk->line=line;
- tk->next=NULL;
- if(lastToken)
- {
- lastToken->next=tk;
- }
- else
- {
- tokens=tk;
- }
- lastToken=tk;
- return tk;
- }
- char* createString(const char *pStartCh,const char *pCrtCh)
- {
- int nCh=pCrtCh-pStartCh+1;
- char *str;
- str=(char *)malloc(nCh*sizeof(char));
- snprintf(str,nCh,"%s",pStartCh);
- return str;
- }
- char escCode(char ch)
- {
- char newCh;
- switch(ch)
- {
- case 'a': newCh='\a'; break;
- case 'b': newCh='\b'; break;
- case 'f': newCh='\f'; break;
- case 'n': newCh='\n'; break;
- case 'r': newCh='\r'; break;
- case 't': newCh='\t'; break;
- case '\'': newCh='\''; break;
- case '\?': newCh='\?'; break;
- case '\"': newCh='\"'; break;
- case '\\': newCh='\\'; break;
- case '0': newCh='\0'; break;
- }
- return newCh;
- }
- void printTokens()
- {
- Token *current=tokens;
- while(current!=NULL)
- {
- printf("%i", current->code);
- switch(current->code)
- {
- case ID:
- case CT_STRING:
- printf(":%s",current->text);
- break;
- case CT_CHAR:
- printf(":%c",(int)current->i);
- break;
- case CT_INT:
- printf(":%li",current->i);
- break;
- case CT_REAL:
- printf(":%lf",current->r);
- break;
- }
- printf(" ");
- current=current->next;
- }
- printf("\n");
- }
- int getNextToken()
- {
- int state=0,nCh;
- char ch;
- const char *pStartCh;
- Token *tk;
- long int ct_int;
- double ct_real;
- int ct_char;
- char *ct_string;
- char *p;
- int noBacks=0;
- int i;
- while(1)
- {
- ch=*pCrtCh;
- //if(ch=='\0')printf("ch e null in state %i\n",state);
- //printf("ch=%c\n",ch);
- switch(state)
- {
- case 0:
- if(ch<='9'&&ch>='1')
- {
- pStartCh=pCrtCh;
- pCrtCh++;
- state=1;
- }
- else if(ch=='0')
- {
- pStartCh=pCrtCh;
- pCrtCh++;
- state=2;
- }
- else if(ch=='/')
- {
- pCrtCh++;
- state=14;
- }
- else if(ch==' '||ch=='\r'||ch=='\t')
- {
- pCrtCh++;
- //state=0;
- }
- else if(ch=='\n')
- {
- line++;
- pCrtCh++;
- //state=0;
- }
- else if(ch=='\'')
- {
- pStartCh=pCrtCh; // ?
- pCrtCh++;
- state=18;
- }
- else if(ch=='\"')
- {
- pStartCh=pCrtCh; // ?
- pCrtCh++;
- state=22;
- }
- else if(ch==',')
- {
- pCrtCh++;
- state=26;
- }
- else if(ch==';')
- {
- pCrtCh++;
- state=27;
- }
- else if(ch=='(')
- {
- pCrtCh++;
- state=28;
- }
- else if(ch==')')
- {
- pCrtCh++;
- state=29;
- }
- else if(ch=='[')
- {
- pCrtCh++;
- state=30;
- }else if(ch==']')
- {
- pCrtCh++;
- state=31;
- }else if(ch=='{')
- {
- pCrtCh++;
- state=32;
- }
- else if(ch=='}')
- {
- pCrtCh++;
- state=33;
- }
- else if(ch=='+')
- {
- pCrtCh++;
- state=34;
- }
- else if(ch=='-')
- {
- pCrtCh++;
- state=35;
- }
- else if(ch=='*')
- {
- pCrtCh++;
- state=36;
- }
- else if(ch=='.')
- {
- pCrtCh++;
- state=38;
- }
- else if(ch=='&')
- {
- pCrtCh++;
- state=39;
- }
- else if(ch=='|')
- {
- pCrtCh++;
- state=41;
- }
- else if(ch=='!')
- {
- pCrtCh++;
- state=43;
- }
- else if(ch=='=')
- {
- pCrtCh++;
- state=46;
- }
- else if(ch=='<')
- {
- pCrtCh++;
- state=49;
- }
- else if(ch=='>')
- {
- pCrtCh++;
- state=52;
- }
- else if(ch=='_' || (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z'))
- {
- pStartCh=pCrtCh;
- pCrtCh++;
- state=55;
- }
- else if(ch=='\0') // the end of the input string
- {
- printf("At END.\n");
- addTk(END);
- return END;
- }
- else
- { printf("invalid character ch=%c\n",ch);
- tkerr(addTk(END),"invalid character");}
- break;
- case 1:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- //state=1;
- }
- else if(ch=='.')
- {
- pCrtCh++;
- state=8;
- }
- else if(ch=='e'||ch=='E')
- {
- pCrtCh++;
- state=10;
- }
- else
- state=6;
- break;
- case 2:
- if(ch=='x'||ch=='X')
- {
- pCrtCh++;
- state=4;
- }
- else
- state=3;
- break;
- case 3:
- if(ch>='0'&&ch<='7')
- {
- pCrtCh++;
- //state=3;
- }
- else if(ch=='.')
- {
- pCrtCh++;
- state=8;
- }
- else if(ch=='e'||ch=='E')
- {
- pCrtCh++;
- state=10;
- }
- else
- state=6;
- break;
- case 4:
- if((ch>='0'&&ch<='9') || (ch>='a'&&ch<='f') || (ch>='A'&&ch<='F'))
- {
- pCrtCh++;
- state=5;
- }
- //Am nevoie de error?
- else
- tkerr(addTk(END),"not a valid int");
- break;
- case 5:
- if((ch>='0'&&ch<='9') || (ch>='a'&&ch<='f') || (ch>='A'&&ch<='F'))
- {
- pCrtCh++;
- //state=5;
- }
- else
- state=6;
- break;
- case 6: //CT_INT
- //nCh=pCrtCh-pStartCh;
- //Char *ct_int=createString(pStartCh,pCrtCh);
- ct_int=strtol(pStartCh,NULL,0);
- tk=addTk(CT_INT);
- tk->i=ct_int;
- return CT_INT;
- case 8:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- state=9;
- }
- // error?
- else
- tkerr(addTk(END),"after a point should be a digit");
- break;
- case 9:
- if(ch=='e'||ch=='E')
- {
- pCrtCh++;
- state=10;
- }
- else if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- //state=9;
- }
- else state=13;
- break;
- case 10:
- if(ch=='+'||ch=='-')
- {
- pCrtCh++;
- state=11;
- }
- else
- state=11;
- break;
- case 11:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- state=12;
- }
- // error?
- else
- //printf("at error ch=%c\n",ch);
- tkerr(addTk(END),"after + or - should come a digit");
- break;
- case 12:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- //state=12;
- }
- else
- state=13;
- break;
- case 13:
- ct_real=strtod(pStartCh,NULL);
- tk=addTk(CT_REAL);
- tk->r=ct_real;
- return CT_REAL;
- case 14:
- if(ch=='*')
- {
- pCrtCh++;
- state=15;
- }
- else if(ch=='/')
- {
- pCrtCh++;
- state=17;
- }
- else
- state=37;
- break;
- case 15:
- if(ch=='*')
- {
- pCrtCh++;
- state=16;
- }
- else
- {
- pCrtCh++;
- //state=15;
- }
- break;
- case 16:
- if(ch=='*')
- {
- pCrtCh++;
- //state=16;
- }
- else if(ch=='/')
- {
- pCrtCh++;
- state=0;
- }
- else
- state=15;
- break;
- case 17:
- if(ch=='\r'||ch=='\0')
- {
- pCrtCh++;
- state=0;
- }
- else if(ch=='\n')
- {
- line++;
- pCrtCh++;
- state=0;
- }
- else
- {
- pCrtCh++;
- //state=17;
- }
- break;
- case 18:
- if(ch=='\\')
- {
- //pStartCh=pCrtCh; // ?
- pCrtCh++;
- state=19;
- }
- else if(ch!='\'')
- {
- //pStartCh=pCrtCh; // ?
- ct_char=ch;
- pCrtCh++;
- state=20;
- }
- // ch=='\''
- // else error?
- else
- tkerr(addTk(END)," empty character constant");
- break;
- case 19:
- //if(ch=='\a'||ch=='\b'||ch=='\f'||ch=='\n'||ch=='\r'||ch=='\t'||ch=='\v'||ch=='\''||ch=='\"'||ch=='\?'||ch=='\\'||ch=='\0')
- if(strchr("abfnrtv'?\"\\0", ch))
- {
- ct_char=escCode(ch);
- pCrtCh++;
- state=20;
- }
- // error?
- else
- tkerr(addTk(END),"should come a character to be escaped");
- break;
- case 20:
- if(ch=='\'')
- {
- pCrtCh++;
- state=21;
- }
- // error?
- else
- //printf("not a qoute ch=%c",ch);
- tkerr(addTk(END),"not a quote");
- break;
- case 21:
- //nCh=pCrtCh-pStartCh;
- //ct_char=createString(pStartCh+1,pCrtCh-1);
- //tk=addTk(CT_CHAR);
- //tk->text=ct_char;
- //ct_char=(int)(*(pStartCh+1));
- //ct_char=strtol(pStartCh+1,NULL,0);
- tk=addTk(CT_CHAR);
- tk->i=ct_char;
- return CT_CHAR;
- case 22:
- if(ch=='\\')
- {
- //pStartCh=pCrtCh; // ?
- noBacks++;
- pCrtCh++;
- state=23;
- }
- else if(ch!='\"')
- {
- //pStartCh=pCrtCh; // ?
- pCrtCh++;
- state=24;
- }
- else if (ch=='\"')
- {
- pCrtCh++;
- state=25;
- }
- break;
- case 23:
- //if(ch=='\a'||ch=='\b'||ch=='\f'||ch=='\n'||ch=='\r'||ch=='\t'||ch=='\v'||ch=='\''||ch=='\"'||ch=='\?'||ch=='\\'||ch=='\0')
- if(strchr("abfnrtv'?\"\\0", ch))
- {
- pCrtCh++;
- state=24;
- }
- //else error?
- else
- tkerr(addTk(END),"should come a character to be escaped");
- break;
- case 24:
- if(ch=='\"')
- {
- pCrtCh++;
- state=25;
- }
- else
- state=22;
- break;
- case 25:
- ct_string=createString(pStartCh+1,pCrtCh-1);
- while((p=strchr(ct_string,'\\'))!=NULL)
- {
- //p=strchr(ct_string,'\\');
- //puts(p);
- memmove(p,p+1,strlen(p));
- *p=escCode(*p);
- }
- /*
- printf("No of \\ is %i\n",noBacks);
- printf("String before is %s\n",ct_string);
- for(i=0;i<noBacks;i++)
- {
- p=strchr(ct_string,'\\');
- strcpy(p,p+1);
- *p=escCode(*p);
- printf("Escaped %c\n",*p);
- printf("String at %i is %s\n",i,ct_string);
- }
- */
- tk=addTk(CT_STRING);
- tk->text=ct_string;
- return CT_STRING;
- case 26:
- addTk(COMMA);
- return COMMA;
- case 27:
- addTk(SEMICOLON);
- return SEMICOLON;
- case 28:
- addTk(LPAR);
- return LPAR;
- case 29:
- addTk(RPAR);
- return RPAR;
- case 30:
- addTk(LBRACKET);
- return LBRACKET;
- case 31:
- addTk(RBRACKET);
- return RBRACKET;
- case 32:
- addTk(LACC);
- return LACC;
- case 33:
- addTk(RACC);
- return RACC;
- case 34:
- addTk(ADD);
- return ADD;
- case 35:
- addTk(SUB);
- return SUB;
- case 36:
- addTk(MUL);
- return MUL;
- case 37:
- addTk(DIV);
- return DIV;
- case 38:
- addTk(DOT);
- return DOT;
- case 39:
- if(ch=='&')
- {
- pCrtCh++;
- state=40;
- }
- //else error?
- else
- tkerr(addTk(END),"should come a &");
- break;
- case 40:
- addTk(AND);
- return AND;
- case 41:
- if(ch=='|')
- {
- pCrtCh++;
- state=42;
- }
- //else error?
- else
- tkerr(addTk(END),"should come a |");
- break;
- case 42:
- addTk(OR);
- return OR;
- case 43:
- if(ch=='=')
- {
- pCrtCh++;
- state=45;
- }
- else
- state=44;
- break;
- case 44:
- addTk(NOT);
- return NOT;
- case 45:
- addTk(NOTEQ);
- return NOTEQ;
- case 46:
- if(ch=='=')
- {
- pCrtCh++;
- state=48;
- }
- else
- state=47;
- break;
- case 47:
- addTk(ASSIGN);
- return ASSIGN;
- case 48:
- addTk(EQUAL);
- return EQUAL;
- case 49:
- if(ch=='=')
- {
- pCrtCh++;
- state=51;
- }
- else
- state=50;
- break;
- case 50:
- addTk(LESS);
- return LESS;
- case 51:
- addTk(LESSEQ);
- return LESSEQ;
- case 52:
- if(ch=='=')
- {
- pCrtCh++;
- state=54;
- }
- else
- state=53;
- break;
- case 53:
- addTk(GREATER);
- return GREATER;
- case 54:
- addTk(GREATEREQ);
- return GREATEREQ;
- case 55:
- if(ch=='_' || (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z'))
- {
- pCrtCh++;
- //state=55;
- }
- else
- state=56;
- break;
- case 56:
- // the id length
- nCh=pCrtCh-pStartCh;
- // keywords tests
- if(nCh==5&&!memcmp(pStartCh,"break",5))
- tk=addTk(BREAK);
- else if(nCh==4&&!memcmp(pStartCh,"char",4))
- tk=addTk(CHAR);
- else if(nCh==6&&!memcmp(pStartCh,"double",6))
- tk=addTk(DOUBLE);
- else if(nCh==4&&!memcmp(pStartCh,"else",4))
- tk=addTk(ELSE);
- else if(nCh==3&&!memcmp(pStartCh,"for",3))
- tk=addTk(FOR);
- else if(nCh==2&&!memcmp(pStartCh,"if",2))
- tk=addTk(IF);
- else if(nCh==3&&!memcmp(pStartCh,"int",3))
- tk=addTk(INT);
- else if(nCh==6&&!memcmp(pStartCh,"return",6))
- tk=addTk(RETURN);
- else if(nCh==6&&!memcmp(pStartCh,"struct",6))
- tk=addTk(STRUCT);
- else if(nCh==4&&!memcmp(pStartCh,"void",4))
- tk=addTk(VOID);
- else if(nCh==5&&!memcmp(pStartCh,"while",5))
- tk=addTk(WHILE);
- // if no keyword, then it is an ID
- else
- {
- tk=addTk(ID);
- tk->text=createString(pStartCh,pCrtCh);
- }
- return tk->code;
- }
- //printf("state=%i\n",state);
- }
- }
- void getTokens()
- {
- do
- {
- getNextToken();
- }while(*pCrtCh);
- }
- /*
- int main()
- {
- FILE *file=fopen("code.c","r+");
- if(file==NULL)
- {
- printf("The file could not be opened.\n");
- exit(1);
- }
- char *buffer;
- char *input;
- int no;
- input=(char *)malloc(SIZE*sizeof(char));
- buffer=(char *)malloc(SIZE*sizeof(char));
- strcpy(input,"");
- while((no=fread(buffer,sizeof(char),SIZE,file))>0)
- {
- input=(char *)realloc(input, no*sizeof(char));
- strcat(input, buffer);
- }
- input=(char *)realloc(input, (strlen(input)+1)*sizeof(char));
- input[strlen(input)]='\0';
- pCrtCh=input;
- getTokens();
- printTokens();
- return 0;
- }
- */
- int main()
- {
- FILE *file=fopen("8.c","r+");
- if(file==NULL)
- {
- printf("The file could not be opened.\n");
- exit(1);
- }
- char *input;
- int size;
- fseek(file, 0, SEEK_END); // seek to end of file
- size = ftell(file); // get current file pointer
- fseek(file, 0, SEEK_SET); // seek back to beginning of file
- input=(char *)malloc((size+1)*sizeof(char));
- fread(input,sizeof(char),size,file);
- input[size]='\0';
- pCrtCh=input;
- getTokens();
- printTokens();
- fclose(file);
- return 0;
- }
Add Comment
Please, Sign In to add comment