Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- def isWhiteSpace(s):
- return bool(re.match(r'\s', s))
- def isDigit(s):
- return bool(re.match(r'[0-9]', s))
- def isQuote(s):
- return s == '"'
- def isPunc(s):
- return s in ':}{,[]'
- def isKeyword(s):
- return s in [ 'true', 'false', 'null' ]
- def isAlpha(s):
- return bool(re.match(r'[a-zA-Z]', s))
- class Lexer:
- def __init__(self, json):
- self.json = json
- self.i = 0
- self.c = ''
- self.tokens = []
- def end(self):
- return self.i >= len(self.json)
- def addToken(self, tp, val = ''):
- self.tokens.append({ "type": tp, "val": val })
- def peek(self):
- if self.end():
- return None
- self.c = self.json[self.i]
- return self.c
- def advance(self):
- self.i += 1
- return self.peek()
- def lex(self):
- while not self.end():
- self.peek()
- if isWhiteSpace(self.c):
- self.advance()
- elif isDigit(self.c):
- num = self.c
- while isDigit(self.advance()):
- num += self.c
- if self.c == '.':
- num += self.c
- while isDigit(self.advance()):
- num += self.c
- self.addToken('num', num)
- elif isQuote(self.c):
- s = ''
- while not isQuote(self.advance()):
- s += self.c
- self.addToken('str', s)
- self.advance()
- elif isPunc(self.c):
- self.addToken(self.c)
- self.advance()
- elif isAlpha(self.c):
- word = self.c
- while isAlpha(self.advance()):
- word += self.c
- if not isKeyword(word):
- self.error('Unrecognized keyword \'' + word + '\'. Expected true | false | null')
- self.addToken('kw', word)
- else:
- self.error('Unrecognized token ' + self.c)
- return self.tokens
- def error(self, message):
- raise Exception('[LEXER]:[ERROR] ' + message);
Add Comment
Please, Sign In to add comment