Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<stdio.h>
- #include<stdlib.h>
- #include<errno.h>
- #include<ctype.h>
- #include<stdarg.h>
- #include<string.h>
- #define SAFEALLOC(var,Type) if((var=(Type*)malloc(sizeof(Type)))==NULL) err("not enough memory");
- int line;
- const char *pCrtCh;
- enum{
- ID, END,
- //constants
- CT_INT, CT_REAL, CT_CHAR, CT_STRING,
- //keywords
- BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE,
- //delimiters
- COMMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC,
- //operators
- ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ,
- SPACE, LINECOMMENT, COMMENT
- }; //tokens codes
- typedef struct _Token{
- int code; // code (name)
- union{
- char *text; // used for ID, CT_STRING(dynamically allocated )
- long int i; // used for CT_INT, CT_CHAR
- double r;// used for CT_REAL
- };
- int line; // the input file line
- struct _Token *next; // link to the next token
- }Token;
- void err(const char *fmt,...)
- {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error: ");
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- Token *lastToken, *tokens;
- Token *addTk(int code)
- {
- Token *tk;
- SAFEALLOC(tk,Token);
- tk->code=code;
- tk->line=line;
- tk->next=NULL;
- if(lastToken){
- lastToken->next=tk;
- }else{
- tokens=tk;
- }
- lastToken=tk;
- return tk;
- }
- void tkerr (const Token *tk, const char *fmt,...)
- {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error in line %d:", tk->line);
- vfprintf(stderr,fmt,va);
- fputc('\n', stderr);
- va_end(va);
- exit(-1);
- }
- char* createString(const char *s1, const char *s2)
- {
- char *result = malloc(strlen(s1)+strlen(s2)+1);//+1 for the zero-terminator
- strcpy(result, s1);
- strcat(result, s2);
- return result;
- }
- int getNextToken()
- {
- int state=0, nCh;
- char ch;
- const char *pStartCh;
- Token *tk;
- while(1)
- {
- ch=*pCrtCh;
- printf("state: %d, character:%c\n",state,ch);
- switch(state)
- {
- case 0:
- if(ch==' '||ch=='\r'||ch=='\t')
- {
- pCrtCh++; // consume the characterand remains in state 0
- }
- else if(ch=='\n')//handled separately in order to update the current line
- {
- line++;
- pCrtCh++;
- }
- else if(ch>='1' || ch<='9')
- {
- pCrtCh++;
- state=1;
- }
- else if(ch=='0')
- {
- pCrtCh++;
- state=2;
- }
- else if(ch==0) //the end of the input string
- {
- addTk(END);
- return END;
- }
- else tkerr(addTk(END),"invalid character");
- break;
- case 1:
- while(ch>='0'&&ch<='9') pCrtCh++;
- state=3;//CT INT
- break;
- case 2:
- pStart=pCrtCh;
- pCrtCh++;
- state=3;
- break;
- case 3:
- while(ch>='0'&&ch<='7') pCrtCh++;
- state=4;
- /*case 2:
- nCh=pCrtCh-pStartCh;//the id length
- //keywords tests
- if(nCh==5&&!memcmp(pStartCh,"break",5))tk=addTk(BREAK);
- else if(nCh==4&&!memcmp(pStartCh,"char",4))tk=addTk(CHAR);
- //...all keywords...
- else //if no keyword, then it is an ID
- {
- tk=addTk(ID);
- tk->text=createString(pStartCh,pCrtCh);
- }
- return tk->code;
- */
- case 48:
- if(ch=='=')
- {
- pCrtCh++;
- state=49;
- }
- else state = 50;
- break;
- case 49:
- addTk(ASSIGN);
- return ASSIGN;
- case 50:
- addTk(EQUAL);
- return EQUAL;
- }
- }
- }
- void printTokens()
- {
- Token *current=tokens;
- while(current!=NULL)
- {
- printf("%i", current->code);
- switch(current->code)
- {
- case ID:
- case CT_STRING:
- printf(":%s",current->text);
- break;
- case CT_CHAR:
- printf(":%c",(int)current->i);
- break;
- case CT_INT:
- printf(":%li",current->i);
- break;
- case CT_REAL:
- printf(":%lf",current->r);
- break;
- }
- printf(" ");
- current=current->next;
- }
- printf("\n");
- }
- void getTokens()
- {
- do
- {
- getNextToken();
- }while(*pCrtCh);
- }
- int main(void)
- {
- getTokens();
- printTokens();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement