Advertisement
NovaYoshi

syntactical analyzer

Oct 3rd, 2017
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 22.48 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <ctype.h>
  5. #include <stdarg.h>
  6.  
  7. // ----- FLOAT CHECKER -----
  8. enum {
  9.   FStart,      // starting state
  10.   FSign,       // + or - at the state
  11.   FInteger,    // the integer amount
  12.   FPoint,      // the decimal point
  13.   FFraction,   // the fractional amount
  14.   FExpMark,    // the E before an exponent amount
  15.   FExpSign,    // + or - before the exponent amount
  16.   FExpInteger, // exponent amount
  17.   FError,      // error
  18. };
  19.  
  20. struct float_state {
  21.   int plus_minus, is_digit, decimal_point, e, valid;
  22. } float_state_machine[] = {
  23. //  +/-       0-9          .       E     valid
  24.   {FSign,    FInteger,    FError, FError,   0}, // Start
  25.   {FError,   FInteger,    FError, FError,   0}, // Sign
  26.   {FError,   FInteger,    FPoint, FExpMark, 2}, // Integer
  27.   {FError,   FFraction,   FError, FError,   0}, // Point
  28.   {FError,   FFraction,   FError, FExpMark, 1}, // Fraction
  29.   {FExpSign, FExpInteger, FError, FError,   0}, // ExpMark
  30.   {FError,   FExpInteger, FError, FError,   0}, // ExpSign
  31.   {FError,   FExpInteger, FError, FError,   1}, // ExpInteger
  32.   {FError,   FError,      FError, FError,   0}, // Dead
  33. };
  34.  
  35. // Returns 1 if float, 2 if integer, or 0 if neither
  36. int is_float(const char *string) {
  37.   int state = FStart; // state
  38.  
  39.   while(*string) {
  40.     char c = *(string++); // get a character
  41.     if(c == '+' || c == '-')
  42.       state = float_state_machine[state].plus_minus;
  43.     else if(isdigit(c))
  44.       state = float_state_machine[state].is_digit;
  45.     else if(c == '.')
  46.       state = float_state_machine[state].decimal_point;
  47.     else if(c == 'E')
  48.       state = float_state_machine[state].e;
  49.     else
  50.       state = FError;
  51.   }
  52.   return float_state_machine[state].valid;
  53. }
  54.  
  55. // Data structure for symbol table entries
  56. struct symbol_data {
  57.   const char *lexeme;
  58.   int token_category;
  59.   struct symbol_data *next;
  60. };
  61.  
  62. struct symbol_data *symbol_table = NULL;
  63.  
  64. // ----- TOKEN INFORMATION -----
  65. enum token_category {
  66.   t_identifier,    // an identifier
  67.   t_integer,       // any integer
  68.   t_real,          // any real number
  69.   t_string,        // a string literal
  70.   t_addsub,        // +, -
  71.   t_muldiv,        // *, /
  72.   t_logical,       // < > >= <= = <>
  73.   t_lparen,        // (
  74.   t_rparen,        // )
  75.   t_lcurly,        // {
  76.   t_rcurly,        // }
  77.   t_lsquare,       // [
  78.   t_rsquare,       // ]
  79.   t_assignment,    // :=
  80.   t_comma,         // ,
  81.   t_if,
  82.   t_then,
  83.   t_else,
  84.   t_do,
  85.   t_while,
  86.   t_return,
  87.   t_bool,
  88.   t_const,
  89.   t_int,
  90.   t_float,
  91.   t_long,
  92.   t_char,
  93.   t_void,
  94.   t_read,
  95.   t_end_statement, // ;
  96.   t_max_tokens,
  97.  
  98.   t_first_keyword = t_if,
  99.   t_last_keyword = t_read,
  100. };
  101.  
  102. // The token_strings list lays out the token category name (first element)
  103. // as well as the strings for each of the tokens in the category (all other elements).
  104. const char *token_strings[t_max_tokens][20] = {
  105.   {"identifier", NULL},       {"integer", NULL},
  106.   {"real", NULL},             {"string", NULL},
  107.   {"add/sub", "+", "-", NULL},{"mul/div", "*", "/", NULL},
  108.   {"logical", "<", ">", "<=", ">=", "=", "<>", NULL},
  109.   {"lparen", "(", NULL},      {"rparen", ")", NULL},
  110.   {"lcurly", "{", NULL},      {"rcurly", "}", NULL},
  111.   {"lsquare", "[", NULL},     {"rsquare", "]", NULL},
  112.   {"assignment", ":=", NULL}, {"comma", ",", NULL},
  113.   {"if", "if", NULL},         {"then", "then", NULL},     {"else", "else", NULL},
  114.   {"do", "do", NULL},         {"while", "while", NULL},
  115.   {"return", "return", NULL}, {"bool", "bool", NULL},
  116.   {"const", "const", NULL},   {"int", "int", NULL},
  117.   {"float", "float", NULL},   {"long", "long", NULL},
  118.   {"char", "char", NULL},     {"void", "void", NULL},
  119.   {"read", "read", NULL},     {"end_statement", ";", NULL},
  120. };
  121.  
  122. // Data structure for a token
  123. struct lexeme_token {
  124.   int token_category;         // which token category
  125.   int token_value;            // which token in the token category
  126.   struct symbol_data *symbol; // symbol table entry
  127.   struct lexeme_token *next;
  128. };
  129.  
  130. // Makes a string representation of a token
  131. const char *token_print(struct lexeme_token *token) {
  132.   static char buffer[50];
  133.   if(token->symbol)
  134.     sprintf(buffer, "(%s, %s)", token->symbol->lexeme, token_strings[token->token_category][0]);
  135.   else if(!strcmp(token_strings[token->token_category][token->token_value], token_strings[token->token_category][0]))
  136.     sprintf(buffer, "(%s)", token_strings[token->token_category][0]);
  137.   else
  138.     sprintf(buffer, "(%s, %s)", token_strings[token->token_category][token->token_value], token_strings[token->token_category][0]);
  139.   return buffer;
  140. }
  141.  
  142. // ----- STATE VARIABLES -----
  143.  
  144. char lexeme[100] = ""; // buffer to accumulate the lexeme in
  145. int lexeme_index = 0;  // index in the buffer
  146. struct lexeme_token *token_list = NULL;
  147. struct lexeme_token *token_current = NULL;
  148. FILE *program_file;
  149. int c; // current character
  150.  
  151. // Error, so exit the whole program
  152. void error(const char *format, ...) {
  153.   va_list argptr;
  154.   va_start(argptr, format);
  155.   printf("Error: ");
  156.   vprintf(format, argptr);
  157.   putchar('\n');
  158.   va_end(argptr);
  159.   exit(-1);
  160. }
  161.  
  162. // ----- SYMBOL TABLE -----
  163.  
  164. // Allocate a new symbol for the symbol table
  165. struct symbol_data *alloc_symbol(const char *lexeme, int token_category) {
  166.   struct symbol_data *new_symbol;
  167.   if(new_symbol == NULL)
  168.     error("Couldn't allocate symbol");
  169.   new_symbol = (struct symbol_data*)calloc(1, sizeof(struct symbol_data));
  170.   new_symbol->lexeme = strdup(lexeme);
  171.   new_symbol->token_category = token_category;
  172.   new_symbol->next = NULL;
  173.   return new_symbol;
  174. }
  175.  
  176. // Find a symbol in the symbol table and optionally auto-create it if it's not found
  177. struct symbol_data *find_symbol(const char *lexeme, int token_category, int auto_create) {
  178.   struct symbol_data *symbol;
  179.   struct symbol_data *new_symbol;
  180.  
  181.   // If the symbol table is empty, handle that
  182.   if(!symbol_table) {
  183.     if(auto_create) {
  184.       new_symbol = alloc_symbol(lexeme, token_category);
  185.       symbol_table = new_symbol;
  186.       return new_symbol;
  187.     } else
  188.       return NULL;
  189.   }
  190.  
  191.   // Find the symbol if it exists
  192.   struct symbol_data *previous = NULL;
  193.   for(symbol = symbol_table; symbol; symbol = symbol->next) {
  194.     previous = symbol;
  195.     if(!strcmp(symbol->lexeme, lexeme) && symbol->token_category == token_category)
  196.       return symbol;
  197.   }
  198.  
  199.   // Symbol does not exist, can I create it?
  200.   if(!auto_create)
  201.     return NULL;
  202.   // Create it, add it to the table
  203.   new_symbol = alloc_symbol(lexeme, token_category);
  204.   previous->next = new_symbol;
  205.   return new_symbol;
  206. }
  207.  
  208. // ----- LEXICAL ANALYZER -----
  209.  
  210. // Reads the next character from the file
  211. void get_char() {
  212.   c = fgetc(program_file);
  213. }
  214.  
  215. // Adds a character, and ends the string there
  216. void add_char() {
  217.   lexeme[lexeme_index++] = c;
  218.   if(lexeme_index >= sizeof(lexeme)) {
  219.     error("Lexeme is too long (%s)", lexeme);
  220.   }
  221.   lexeme[lexeme_index] = 0;
  222.   c = 0;
  223. }
  224.  
  225. // Clear the lexeme buffer
  226. void clear_lexeme() {
  227.   lexeme[0] = 0;
  228.   lexeme_index = 0;
  229. }
  230.  
  231. // Is the lexeme a keyword?
  232. int is_keyword() {
  233.   int i;
  234.   for(i = t_first_keyword; i <= t_last_keyword; i++) {
  235.     if(!strcmp(token_strings[i][0], lexeme)) {
  236.       return i;
  237.     }
  238.   }
  239.   return -1;
  240. }
  241.  
  242. // Adds a token to the list of tokens
  243. void add_token(int token_category, int symbol) {
  244.   // Allocate and set the token
  245.   struct lexeme_token *token = (struct lexeme_token*)calloc(1, sizeof(struct lexeme_token));
  246.   if(!token) {
  247.     error("Can't allocate token");
  248.   }
  249.  
  250.   if(symbol) {
  251.   // If it's a symbol, maintain the symbol table
  252.     token->symbol = find_symbol(lexeme, token_category, 1);
  253.   } else {
  254.     // If it's not a symbol, loop to find the correct token category subtype to use
  255.     for(int i=0; token_strings[token_category][i]; i++)
  256.       if(!strcmp(token_strings[token_category][i], lexeme)) {
  257.         token->token_value = i;
  258.         break;
  259.       }
  260.   }
  261.   token->token_category = token_category;
  262.   token->next = NULL;
  263.  
  264.   // Add it to the list
  265.   if(token_current) {
  266.     token_current->next = token;
  267.   }
  268.   token_current = token;
  269.  
  270.   // Start the list if it's not already started
  271.   if(!token_list) {
  272.     token_list = token;
  273.   }
  274.  
  275.   // Get ready for the next lexeme
  276.   clear_lexeme();
  277. }
  278.  
  279. // Adds a token consisting of one character
  280. void add_char_token(int token_category) {
  281.   add_char();
  282.   add_token(token_category, 0);
  283. }
  284.  
  285. // Run the lexical analyzer
  286. struct lexeme_token *lexical_analyzer(FILE *File) {
  287.   program_file = File;
  288.   token_list = NULL;
  289.   token_current = NULL;
  290.   clear_lexeme();
  291.  
  292.   // Lexical analyzer main loop
  293.   while(1) {
  294.     // Read a character if we don't have one
  295.     if(c == 0) {
  296.       get_char();
  297.     }
  298.     if(c == EOF) {   // Stop if the end was reached
  299.       break;
  300.     }
  301.  
  302.     // Recognize all the different types of tokens
  303.     if(isspace(c)) { // Ignore space characters
  304.       c = 0;
  305.     } else if(c == '-' || c =='+') {
  306.       add_char_token(t_addsub);
  307.     } else if(c == '*') {
  308.       add_char_token(t_muldiv);
  309.     } else if(c == '(') {
  310.       add_char_token(t_lparen);
  311.     } else if(c == ')') {
  312.       add_char_token(t_rparen);
  313.     } else if(c == '{') {
  314.       add_char_token(t_lcurly);
  315.     } else if(c == '}') {
  316.       add_char_token(t_rcurly);
  317.     } else if(c == '[') {
  318.       add_char_token(t_lsquare);
  319.     } else if(c == ']') {
  320.       add_char_token(t_rsquare);
  321.     } else if(c == ',') {
  322.       add_char_token(t_comma);
  323.     // Comparison operators
  324.     } else if(c == '=') {
  325.       add_char();
  326.       add_token(t_logical, 0);
  327.     } else if(c == '>') {
  328.       add_char();
  329.       get_char();
  330.       if(c == '=') { // >=
  331.         add_char();
  332.       }
  333.       add_token(t_logical, 0);
  334.     } else if(c == '<') {
  335.       add_char();
  336.       get_char();
  337.       if(c == '=' || c == '>') { // <= <>
  338.         add_char();
  339.       }
  340.       add_token(t_logical, 0);
  341.     // Statement separators
  342.     } else if(c == ';') {
  343.       add_char();
  344.       add_token(t_end_statement, 0);
  345.     // "strings"
  346.     } else if(c == '\"') {
  347.       add_char();
  348.       int old_char; // add_char clears c, so keep a copy here
  349.       do {
  350.         get_char();
  351.         old_char = c;
  352.         add_char();
  353.       } while(old_char != '\"' && old_char != EOF);
  354.       add_token(t_string, 1);
  355.     // := assignment statements
  356.     } else if(c == ':') {
  357.       add_char();
  358.       get_char();
  359.       if(c == '=') {
  360.         add_char();
  361.         add_token(t_assignment, 0);
  362.       } else {
  363.         error(": found, expected :=");
  364.       }
  365.     // identifiers or keywords
  366.     } else if(isalpha(c) || c == '_') {
  367.       while(isalnum(c) || c == '_') {
  368.         add_char();
  369.         get_char();
  370.       }
  371.       // Look up what keyword it is, if it's a keyword at all
  372.       int keyword_num = is_keyword();
  373.       if(keyword_num != -1) {
  374.         add_token(keyword_num, 0);
  375.       } else {
  376.         add_token(t_identifier, 1);
  377.       }
  378.     // Integers and floats
  379.     } else if(isdigit(c)) {
  380.       while(isdigit(c) || c == 'E' || c == '.') {
  381.         add_char();
  382.         get_char();
  383.       }
  384.       int is_a_float = is_float(lexeme);
  385.       if(is_a_float == 0)
  386.         error("Invalid number %s", lexeme);
  387.       if(is_a_float == 1) // 1 = float
  388.         add_token(t_real, 1);
  389.       if(is_a_float == 2) // 2 = integer
  390.         add_token(t_integer, 1);
  391.     // If a comment is found, skip to the end of the line
  392.     } else if(c == '/') {
  393.       get_char();
  394.       if(c == '/') {
  395.         do {
  396.           get_char();
  397.         } while(c != '\n' && c != EOF);
  398.       } else { // if only one / then it's division
  399.         clear_lexeme();
  400.         c = '/';
  401.         add_char_token(t_muldiv);
  402.       }
  403.     } else {
  404.       error("Unexpected character %c", c);
  405.     }
  406.  
  407.   }
  408.   return token_list;
  409. }
  410.  
  411. // ----- PARSE TREE FUNCTIONS -----
  412.  
  413. struct syntax_node {
  414.   struct lexeme_token *token;
  415.   struct syntax_node *child, *next;
  416. };
  417.  
  418. struct syntax_node *tree_head = NULL;
  419. struct syntax_node *tree_current = NULL;
  420.  
  421. // Make a new tree node
  422. struct syntax_node *tree_new() {
  423.   struct syntax_node *node = (struct syntax_node*)calloc(1, sizeof(struct syntax_node));
  424.   node->token = token_current;
  425.   return node;
  426. }
  427.  
  428. // Directly set one node's child
  429. void tree_add_child(struct syntax_node *parent, struct syntax_node *child) {
  430.   if(!parent->child) {
  431.     // Put it directly in the child pointer if possible
  432.     parent->child = child;
  433.   } else {
  434.     // If there's already a child, find the next free spot for it
  435.     struct syntax_node *temp = tree_current->child;
  436.     while(temp->next)
  437.       temp = temp->next;
  438.     temp->next = child;
  439.   }
  440. }
  441.  
  442. // Add a child to the current node and make that the new current node
  443. void tree_child() {
  444.   struct syntax_node *node = tree_new();
  445.   if(!tree_head) {
  446.     // no head? set this as the new head
  447.     tree_head = node;
  448.   } else if(!tree_current) {
  449.     // add another thing to global scope
  450.     tree_current = tree_head;
  451.     while(tree_current->next)
  452.       tree_current = tree_current->next;
  453.     tree_current->next = node;    
  454.   } else if(!tree_current->child) {
  455.     // there's no child yet, add one
  456.     tree_current->child = node;
  457.   } else {
  458.     // there's already a child, add a sibling
  459.     tree_current = tree_current->child;
  460.     while(tree_current->next)
  461.       tree_current = tree_current->next;
  462.     tree_current->next = node;
  463.   }
  464.   tree_current = node;
  465. }
  466.  
  467. // ----- SYNTACTICAL ANALYZER -----
  468.  
  469. // Moves onto the next token in teh program
  470. void next_token() {
  471.   if(!token_current) {
  472.     puts("Already at the end of the program");
  473.     exit(0);
  474.   }
  475.   token_current = token_current->next;
  476. }
  477.  
  478. enum {
  479.   NEEDED = 1, // mandatory token
  480.   OMIT   = 2, // don't include in syntax tree
  481.   TEST   = 4, // just test, don't actually accept the token
  482. };
  483.  
  484. // Accept a token from the program and move onto the next one
  485. int accept(int flags, ...) {
  486.   int passing = 0;
  487.   va_list argptr;
  488.   va_start(argptr, flags);
  489.  
  490.   // Loop through the list of acceptable token types
  491.   while(1) {
  492.     int value = va_arg(argptr,int);
  493.     if(value == -1) // -1 means end of list
  494.       break;
  495.     if(token_current->token_category == value)
  496.       passing = 1;
  497.   }
  498.   va_end(argptr);
  499.  
  500.   // If only testing, only return the passing variable
  501.   if(flags & TEST)
  502.     return passing;
  503.   // If it's needed, it's an error if it's not found
  504.   if(!passing && (flags & NEEDED))
  505.     error("Unexpected token, %s", token_strings[token_current->token_category][0]);
  506.   // If it's passing, accept it and get the next token
  507.   if(passing) {
  508.     if(!(flags & OMIT))
  509.       tree_child();
  510.     next_token();
  511.   }
  512.   return passing;
  513. }
  514.  
  515. void expression();
  516.  
  517. // Allow there to be an array index after the identifier
  518. void array_index() {
  519.   struct syntax_node *save = tree_current;
  520.   if(accept(0, t_lsquare, -1)) {
  521.     expression();
  522.     accept(NEEDED|OMIT, t_rsquare, -1);
  523.   }
  524.   tree_current = save;
  525. }
  526.  
  527. // Allow additional variables after this one, for as many commas there are
  528. void additional_variables() {
  529.   struct syntax_node *save = tree_current;
  530.   while(1) {
  531.     tree_current = save;
  532.     if(accept(OMIT, t_comma, -1)) {
  533.       accept(NEEDED, t_identifier, -1);
  534.       array_index();
  535.       continue;
  536.     }
  537.     accept(NEEDED|OMIT, t_end_statement, -1);
  538.     break;
  539.   }
  540.   tree_current = save;
  541. }
  542.  
  543. // Factor, can be any identifier, number, or string
  544. void factor() {
  545.   struct syntax_node *save = tree_current;
  546.   if(accept(0, t_identifier, -1)) {
  547.     array_index();
  548.     if(accept(0, t_lparen, -1)) { // function call
  549.       if(!accept(OMIT, t_rparen, -1)) {
  550.         do {
  551.           expression();
  552.         } while(accept(OMIT, t_comma, -1));
  553.         accept(NEEDED|OMIT, t_rparen, -1);
  554.       }
  555.     }
  556.   } else if(accept(0, t_integer, t_real, t_string, -1)) {
  557.  
  558.   } else if(accept(0, t_lparen, -1)) {
  559.     expression();
  560.     accept(NEEDED|OMIT, t_rparen, -1);
  561.   }
  562.   tree_current = save;
  563. }
  564.  
  565. // Allow multiplication
  566. void term() {
  567.   struct syntax_node *save = tree_current;
  568.  
  569.   // make a temporary node to attach the left hand side onto
  570.   struct syntax_node temp = {NULL, NULL, NULL};
  571.   tree_current = &temp;
  572.   factor();
  573.   tree_current = save;
  574.  
  575.   struct syntax_node *operator = NULL;
  576.   if(accept(0, t_muldiv, -1)) {
  577.     tree_add_child(tree_current, temp.child);
  578.     term();
  579.   } else {
  580.     tree_add_child(save, temp.child);
  581.   }
  582.   tree_current = save;
  583. }
  584.  
  585. // The most outer level, with addition
  586. void expression() {
  587.   struct syntax_node *save = tree_current;
  588.   accept(0, t_addsub, -1); // optional sign
  589.  
  590.   // make a temporary node to attach the left hand side onto
  591.   struct syntax_node temp = {NULL, NULL, NULL};
  592.   tree_current = &temp;
  593.   term();
  594.   tree_current = save;
  595.  
  596.   struct syntax_node *operator = NULL;
  597.   if(accept(0, t_addsub, -1)) {
  598.     tree_add_child(tree_current, temp.child);
  599.     expression();
  600.   } else {
  601.     tree_add_child(save, temp.child);
  602.   }
  603.   tree_current = save;
  604. }
  605.  
  606. // A comparison of some sort
  607. void condition() {
  608.   struct syntax_node *save = tree_current;
  609.  
  610.   // make a temporary node to attach the left hand side onto
  611.   struct syntax_node temp = {NULL, NULL, NULL};
  612.   tree_current = &temp;
  613.   expression();
  614.   tree_current = save;
  615.  
  616.   struct syntax_node *operator = NULL;
  617.   accept(NEEDED, t_logical, -1);
  618.   tree_add_child(tree_current, temp.child);
  619.   expression();
  620.   tree_current = save;
  621. }
  622.  
  623. // One statement of any kind
  624. void statement() {
  625.   struct syntax_node *save = tree_current;
  626.  
  627.   if(accept(0, t_const, -1)) {
  628.   // Constant definition
  629.      struct syntax_node *save_const = tree_current;
  630.      accept(NEEDED, t_bool, t_int, t_float, t_long, t_char, -1);
  631.      accept(NEEDED, t_identifier, -1);
  632.      accept(NEEDED|OMIT, t_assignment, -1);
  633.      expression();
  634.      accept(NEEDED|OMIT, t_end_statement, -1);
  635.   } else if(accept(0, t_bool, t_int, t_float, t_long, t_char, -1)) {
  636.   // Variable declarations inside the function
  637.      accept(NEEDED, t_identifier, -1);
  638.      array_index();
  639.      additional_variables();
  640.   } else if(accept(0, t_read, -1)) {
  641.   // Read one or more variables  
  642.      accept(NEEDED, t_identifier, -1);
  643.      additional_variables();
  644.   } else if(accept(0, t_identifier, -1)) {
  645.   // Assignment or function call
  646.      struct syntax_node *identifier = tree_current;
  647.  
  648.      // accept an array index if found
  649.      int left_is_array = 0;
  650.      struct syntax_node temp = {NULL, NULL, NULL};
  651.      if(accept(TEST, t_lsquare, -1)) {
  652.        tree_current = &temp;
  653.        left_is_array = 1;
  654.        array_index();
  655.        tree_current = identifier;
  656.      }
  657.      if(accept(0, t_assignment, -1)) { // assignment
  658.        struct syntax_node *assignment = tree_current;
  659.  
  660.        if(left_is_array)
  661.          assignment->child = temp.child;
  662.  
  663.        struct lexeme_token *identifier_token = identifier->token;
  664.        struct lexeme_token *assignment_token = assignment->token;
  665.  
  666.        // Swap the tokens
  667.        identifier->token = assignment_token;
  668.        assignment->token = identifier_token;
  669.  
  670.        tree_current = identifier;
  671.  
  672.        expression();
  673.      } else if(accept(0, t_lparen, -1)) { // function call
  674.        if(!accept(OMIT, t_rparen, -1)) {
  675.          // If there's arguments,
  676.          // read expressions until there's no more commas
  677.          do {
  678.            expression();
  679.          } while(accept(OMIT, t_comma, -1));
  680.          accept(NEEDED|OMIT, t_rparen, -1);
  681.        }
  682.        accept(NEEDED|OMIT, t_end_statement, -1);
  683.      }
  684.   } else if(accept(0, t_lcurly, -1)) {
  685.   // Multiple statements
  686.     while(!accept(OMIT, t_rcurly, -1))
  687.       statement();
  688.   } else if(accept(OMIT, t_end_statement, -1)) {
  689.   // Empty statement
  690.   } else if(accept(0, t_if, -1)) {
  691.     condition();
  692.     accept(NEEDED, t_then, -1);
  693.     statement();
  694.   } else if(accept(0, t_while, -1)) {
  695.     condition();
  696.     accept(NEEDED, t_do, -1);
  697.     statement();
  698.   } else if(accept(0, t_else, -1)) {
  699.     statement();
  700.   } else if(accept(0, t_return, -1)) {
  701.     expression();
  702.     accept(NEEDED|OMIT, t_end_statement, -1);
  703.   } else {
  704.     error("Bad token %s", token_print(token_current));
  705.   }
  706.  
  707.   tree_current = save;
  708. }
  709.  
  710. // Constant definition
  711. void constant() {
  712.   struct syntax_node *save = tree_current;
  713.   accept(NEEDED, t_bool, t_int, t_float, t_long, t_char, -1);
  714.   accept(NEEDED, t_identifier, -1);
  715.   accept(NEEDED|OMIT, t_assignment, -1);
  716.   expression();
  717.   accept(NEEDED|OMIT, t_end_statement, -1);
  718.   tree_current = save;
  719. }
  720.  
  721. // Definition of an identifier (variable or function)
  722. void identifier_definition() {
  723.   struct syntax_node *save = tree_current;
  724.  
  725.   // variable or function declaration
  726.   accept(NEEDED, t_identifier, -1); // name
  727.   struct syntax_node *function_save = tree_current;
  728.   if(accept(0, t_lparen, -1)) {
  729.   // function definition, start of variable list
  730.  
  731.     if(accept(0, t_void, -1)) {
  732.       // no parameters
  733.     } else {
  734.       struct syntax_node *parameter_save = tree_current;
  735.       do {
  736.         tree_current = parameter_save;
  737.         accept(NEEDED, t_bool, t_int, t_float, t_long, t_char, t_void, -1);
  738.         accept(NEEDED, t_identifier, -1);
  739.         tree_current = parameter_save;
  740.       } while(accept(OMIT, t_comma, -1)); // keep going if there's a comma
  741.     }
  742.     accept(NEEDED|OMIT, t_rparen, -1);
  743.  
  744.     tree_current = function_save;
  745.     statement();
  746.   } else {
  747.   // variable declarations
  748.     array_index();
  749.     additional_variables();
  750.   }
  751.  
  752.   tree_current = save;
  753. }
  754.  
  755. // Run the syntactical analyzer
  756. void syntactical_analyzer(struct lexeme_token *list) {
  757.   token_current = list;
  758.  
  759.   while(token_current) {
  760.     tree_current = NULL; // NULL because it's in the global space
  761.  
  762.     // Get a type name
  763.     if(accept(0, t_const, -1)) {
  764.       constant();
  765.     } else if(accept(NEEDED, t_bool, t_int, t_float, t_long, t_char, t_void, -1)) {
  766.       identifier_definition();
  767.     }
  768.   }
  769. }
  770.  
  771. // ----- MAIN PROGRAM -----
  772.  
  773. // Prints out a parse tree graphically
  774. void print_parse_tree(struct syntax_node *node, int level) {
  775.   while(node) {
  776.     for(int i=0; i<level; i++)
  777.       printf("   ");
  778.     printf(token_print(node->token));
  779.     putchar('\n');
  780.  
  781.     if(node->child)
  782.       print_parse_tree(node->child, level+1);
  783.     node = node->next;
  784.   }
  785. }
  786.  
  787. int main(int argc, char *argv[]) {
  788.   FILE *File = fopen("sample.txt", "rb");
  789.  
  790.   struct lexeme_token *list = lexical_analyzer(File);
  791.  
  792.   puts("Token list:");
  793.   for(token_current = token_list; token_current; token_current = token_current->next) {
  794.     puts(token_print(token_current));
  795.   }
  796.  
  797.   puts("\n\n\nSymbol table:");
  798.   for(struct symbol_data *symbol = symbol_table; symbol; symbol = symbol->next) {
  799.     printf("(%s, %s)\n", symbol->lexeme, token_strings[symbol->token_category][0]);
  800.   }
  801.  
  802.   syntactical_analyzer(list);
  803.  
  804.   puts("\n\n\nSyntax tree:");
  805.   print_parse_tree(tree_head, 0);
  806.  
  807.   fclose(File);
  808. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement