Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdarg.h>
- #include <string.h>
- #include <malloc.h>
- #define SIZE 2046
- #define SAFEALLOC(var,Type) if((var=(Type*)malloc(sizeof(Type)))==NULL)err("not enough memory");
- enum{
- ID, END,
- //constants
- CT_INT, CT_REAL, CT_CHAR, CT_STRING,
- //keywords
- BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE,
- //delimiters
- COMMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC,
- //operators
- ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ,
- SPACE, LINECOMMENT, COMMENT
- }; //tokens codes
- typedef struct _Token
- {
- int code;
- union{
- char *text;
- long int i;
- double r;
- };
- int line;
- struct _Token *next;
- }Token;
- Token *tokens;
- Token *lastToken = NULL;
- const char *pCrtCh;
- int line;
- void err(const char *fmt,...)
- {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error: ");
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- void tkerr(const Token *tk,const char *fmt,...)
- {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error in line %d: ",tk->line);
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- Token *addTk(int code)
- {
- Token *tk;
- SAFEALLOC(tk,Token)
- tk->code=code;
- tk->line=line;
- tk->next=NULL;
- if(lastToken)
- {
- lastToken->next=tk;
- }
- else
- {
- tokens=tk;
- }
- lastToken=tk;
- return tk;
- }
- char* createString(const char *pStartCh,const char *pCrtCh)
- {
- int nCh=pCrtCh-pStartCh+1;
- char *str;
- str=(char *)malloc(nCh*sizeof(char));
- snprintf(str,nCh,"%s",pStartCh);
- return str;
- }
- char escCode(char ch)
- {
- char newCh;
- switch(ch)
- {
- case 'a': newCh='\a'; break;
- case 'b': newCh='\b'; break;
- case 'f': newCh='\f'; break;
- case 'n': newCh='\n'; break;
- case 'r': newCh='\r'; break;
- case 't': newCh='\t'; break;
- case '\'': newCh='\''; break;
- case '\?': newCh='\?'; break;
- case '\"': newCh='\"'; break;
- case '\\': newCh='\\'; break;
- case '0': newCh='\0'; break;
- }
- return newCh;
- }
- int getNextToken()
- {
- int state=0,nCh;
- char ch;
- const char *pStartCh;
- Token *tk;
- long int ct_int;
- double ct_real;
- int ct_char;
- char *ct_string;
- char *p;
- int noBacks=0;
- int i;
- while(1)
- {
- ch=*pCrtCh;
- printf("state: %d, character:%c\n",state,ch);
- switch(state)
- {
- case 0:
- if(ch==' '||ch=='\r'||ch=='\t')
- {
- pCrtCh++; // consume the characterand remains in state 0
- }
- else if(ch=='\n')//handled separately in order to update the current line
- {
- line++;
- pCrtCh++;
- }
- else if(ch>='1' || ch<='9')
- {
- pCrtCh++;
- state=1;
- }
- else if(ch=='0')
- {
- pCrtCh++;
- state=2;
- }
- if(ch=='/')
- {
- pCrtCh++;
- state=24;
- }
- else if(ch=='\'')
- {
- pStartCh=pCrtCh; // ?
- pCrtCh++;
- state=14;
- }
- else if(ch=='\"')
- {
- pStartCh=pCrtCh; // ?
- pCrtCh++;
- state=18;
- }
- else if(ch==',')
- {
- pCrtCh++;
- addTk(COMMA);
- return COMMA;
- }
- else if(ch==';')
- {
- pCrtCh++;
- addTk(SEMICOLON);
- return SEMICOLON;
- }
- else if(ch=='(')
- {
- pCrtCh++;
- addTk(LPAR);
- return LPAR;
- }
- else if(ch==')')
- {
- pCrtCh++;
- addTk(RPAR);
- return RPAR;
- }
- else if(ch=='[')
- {
- pCrtCh++;
- addTk(LBRACKET);
- return LBRACKET;
- }
- else if(ch==']')
- {
- pCrtCh++;
- addTk(RBRACKET);
- return RBRACKET;
- }
- else if(ch=='{')
- {
- pCrtCh++;
- addTk(LACC);
- return LACC;
- }
- else if(ch=='}')
- {
- pCrtCh++;
- addTk(RACC);
- return RACC;
- }
- else if(ch=='+')
- {
- pCrtCh++;
- addTk(ADD);
- return ADD;
- }
- else if(ch=='-')
- {
- pCrtCh++;
- addTk(SUB);
- return SUB;
- }
- else if(ch=='*')
- {
- pCrtCh++;
- addTk(MUL);
- return MUL;
- }
- else if(ch=='.')
- {
- pCrtCh++;
- addTk(DOT);
- return DOT;
- }
- else if(ch=='&')
- {
- pCrtCh++;
- state=43;
- }
- else if(ch=='|')
- {
- pCrtCh++;
- state=45;
- }
- else if(ch=='!')
- {
- pCrtCh++;
- state=47;
- }
- else if(ch=='=')
- {
- pCrtCh++;
- state=48;
- }
- else if(ch=='<')
- {
- pCrtCh++;
- state=51;
- }
- else if(ch=='>')
- {
- pCrtCh++;
- state=54;
- }
- else if(ch==0) //the end of the input string
- {
- addTk(END);
- return END;
- }
- else tkerr(addTk(END),"invalid character");
- break;
- case 1:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- //state=1;
- }
- else if(ch=='.')
- {
- pCrtCh++;
- state=8;
- }
- else if(ch=='e'||ch=='E')
- {
- pCrtCh++;
- state=10;
- }
- else
- state=6;
- break;
- case 2:
- if(ch=='x'||ch=='X')
- {
- pCrtCh++;
- state=4;
- }
- else
- state=3;
- break;
- case 3:
- if(ch>='0'&&ch<='7')
- {
- pCrtCh++;
- //state=3;
- }
- else if(ch=='.')
- {
- pCrtCh++;
- state=8;
- }
- else if(ch=='e'||ch=='E')
- {
- pCrtCh++;
- state=10;
- }
- else
- state=6;
- break;
- case 4:
- if((ch>='0'&&ch<='9') || (ch>='a'&&ch<='f') || (ch>='A'&&ch<='F'))
- {
- pCrtCh++;
- state=5;
- }
- else
- tkerr(addTk(END),"not a valid int");
- break;
- case 5:
- if((ch>='0'&&ch<='9') || (ch>='a'&&ch<='f') || (ch>='A'&&ch<='F'))
- {
- pCrtCh++;
- }
- else
- state=6;
- break;
- case 6: //CT_INT
- ct_int=strtol(pStartCh,NULL,0);
- tk=addTk(CT_INT);
- tk->i=ct_int;
- return CT_INT;
- case 8:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- state=9;
- }
- // error?
- else
- tkerr(addTk(END),"after a point should be a digit");
- break;
- case 9:
- if(ch=='e'||ch=='E')
- {
- pCrtCh++;
- state=10;
- }
- else if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- }
- else state=13;
- break;
- case 10:
- if(ch=='+'||ch=='-')
- {
- pCrtCh++;
- state=11;
- }
- else
- state=11;
- break;
- case 11:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- state=12;
- }
- else
- tkerr(addTk(END),"after + or - should come a digit");
- break;
- case 12:
- if(ch>='0'&&ch<='9')
- {
- pCrtCh++;
- }
- else
- state=13;
- break;
- case 13:
- ct_real=strtod(pStartCh,NULL);
- tk=addTk(CT_REAL);
- tk->r=ct_real;
- return CT_REAL;
- case 14:
- if(ch=='\\')
- {
- pCrtCh++;
- state=15;
- }
- else if(ch!='\'')
- {
- ct_char=ch;
- pCrtCh++;
- state=16;
- }
- else
- tkerr(addTk(END),"not a quote");
- else
- tkerr(addTk(END),"should come a character to be escaped");
- break;
- case 15:
- if(strchr("abfnrtv'?\"\\0", ch))
- {
- ct_char=escCode(ch);
- pCrtCh++;
- state=17;
- }
- else
- tkerr(addTk(END),"should come a character to be escaped");
- case 16:
- if(ch=='\'')
- {
- pCrtCh++;
- state=17;
- }
- else
- tkerr(addTk(END),"not a quote");
- break;
- case 17:
- tk=addTk(CT_CHAR);
- tk->i=ct_char;
- return CT_CHAR;
- case 18:
- if(ch=='\\')
- {
- pCrtCh++;
- state=19;
- }
- else if(ch!='"\\')
- {
- pCrtCh++;
- state=20;
- }
- else if (ch=='\"')
- {
- pCrtCh++;
- state=21;
- }
- case 19:
- if(strchr("abfnrtv'?\"\\0", ch))
- {
- pCrtCh++;
- state=20;
- }
- else
- tkerr(addTk(END),"should come a character to be escaped");
- case 20:
- if(ch=='\"')
- {
- pCrtCh++;
- state=21;
- }
- else
- state=18;
- break;
- case 21:
- ct_string=createString(pStartCh+1,pCrtCh-1);
- while((p=strchr(ct_string,'\\'))!=NULL)
- {
- //p=strchr(ct_string,'\\');
- //puts(p);
- memmove(p,p+1,strlen(p));
- *p=escCode(*p);
- }
- tk=addTk(CT_STRING);
- tk->text=ct_string;
- return CT_STRING;
- case 24:
- if(ch=='*')
- {
- pCrtCh++;
- state=27;
- }
- else if(ch=='/')
- {
- pCrtCh++;
- state=25;
- }
- else
- {
- addTk(DIV);
- return DIV;
- }
- break;
- case 25: //LINECOMMENT
- if(ch=='\r'||ch=='\0')
- {
- pCrtCh++;
- state=0;
- }
- else if(ch=='\n')
- {
- line++;
- pCrtCh++;
- state=0;
- }
- else
- {
- pCrtCh++;
- }
- case 27:
- if(ch=='*')
- {
- pCrtCh++;
- state=28;
- }
- else pCrtCh++;
- break;
- case 28:
- if(ch=='/')
- {
- pCrtCh++;
- state=0; //COMMENT
- }
- else if(ch=='*')
- {
- pCrtCh++;
- state=28;
- }
- else
- {
- pCrtCh++;
- state=27;
- }
- break;
- /*case 2:
- nCh=pCrtCh-pStartCh;//the id length
- //keywords tests
- if(nCh==5&&!memcmp(pStartCh,"break",5))tk=addTk(BREAK);
- else if(nCh==4&&!memcmp(pStartCh,"char",4))tk=addTk(CHAR);
- //...all keywords...
- else //if no keyword, then it is an ID
- {
- tk=addTk(ID);
- tk->text=createString(pStartCh,pCrtCh);
- }
- return tk->code;
- */
- case 43:
- if(ch=='&')
- {
- pCrtCh++;
- addTk(AND);
- }
- else
- tkerr(addTk(END),"should come a &");
- break;
- case 45:
- if(ch=='|')
- {
- pCrtCh++;
- addTk(OR);
- }
- else
- tkerr(addTk(END),"should come a |");
- break;
- case 45:
- if(ch=='=')
- {
- pCrtCh++;
- addTk(NOTEQ);
- return NOTEQ;
- }
- else{
- addTk(NOT);
- return NOT;
- }
- case 48:
- if(ch=='=')
- {
- pCrtCh++;
- addTk(EQUAL);
- return EQUAL;
- }
- else
- {
- addTk(ASSIGN);
- return ASSIGN;
- }
- case 51:
- if(ch=='=')
- {
- pCrtCh++;
- addTk(LESSEQ);
- return LESSEQ;
- }
- else
- {
- addTk(LESS);
- return LESS;
- }
- case 54:
- if(ch=='=')
- {
- pCrtCh++;
- addTk(GREATEREQ);
- return GREATEREQ;
- }
- else
- {
- addTk(GREATER);
- return GREATER;
- }
- }
- }
- }
- void printTokens()
- {
- Token *current=tokens;
- while(current!=NULL)
- {
- printf("%i", current->code);
- switch(current->code)
- {
- case ID:
- case CT_STRING:
- printf(":%s",current->text);
- break;
- case CT_CHAR:
- printf(":%c",(int)current->i);
- break;
- case CT_INT:
- printf(":%li",current->i);
- break;
- case CT_REAL:
- printf(":%lf",current->r);
- break;
- }
- printf(" ");
- current=current->next;
- }
- printf("\n");
- }
- void getTokens()
- {
- do
- {
- getNextToken();
- }while(*pCrtCh);
- }
- int main()
- {
- FILE *file=fopen("8.c","r+");
- if(file==NULL)
- {
- printf("The file could not be opened.\n");
- exit(1);
- }
- char *input;
- int size;
- fseek(file, 0, SEEK_END); // seek to end of file
- size = ftell(file); // get current file pointer
- fseek(file, 0, SEEK_SET); // seek back to beginning of file
- input=(char *)malloc((size+1)*sizeof(char));
- fread(input,sizeof(char),size,file);
- input[size]='\0';
- pCrtCh=input;
- getTokens();
- printTokens();
- fclose(file);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement