Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- if params.len < 1 then exit("Example: tokenizer [SourceCodePath]")
- //Name - GSTokenParser
- //Author - LINKI
- //Version - 0.1
- //
- // GeneralClass
- //
- GS = { }
- //
- // TokenType
- //
- GS.TokenType = { }
- GS.TokenType.add = function(type)
- self[type] = self.index //NUM = 0
- self[self.index] = type //0 = NUM
- self.index = self.index + 1
- end function
- GS.TokenType.ctor = function()
- self.index = 0
- //Basic
- self.add("NUM") // 123... 9.9
- self.add("STR") // "String"
- self.add("WORD") // var, func
- self.add("TRUE") // true
- self.add("FALSE") // false
- self.add("NULL") // null
- //Statements
- self.add("IF") // if
- self.add("THEN") // then
- self.add("ELSE") // else
- self.add("WHILE") // while
- self.add("FOR") // for
- self.add("IN") // in
- self.add("FUNC") // function
- self.add("SELF") // self
- self.add("END") // end
- self.add("RET") // return
- self.add("REF") // @
- //Math
- self.add("ADD") // +
- self.add("SUB") // -
- self.add("MUL") // *
- self.add("DIV") // /
- self.add("PNT") // %
- self.add("CRT") // ^
- //Logic
- self.add("NOT") // !/not
- self.add("AND") // &&/and
- self.add("OR") // ||/or
- self.add("EQ") // =
- self.add("LESS") // <
- self.add("GRT") // >
- self.add("NOTEQ") // !=
- self.add("EQEQ") // ==
- self.add("LESSEQ") // <=
- self.add("GRTEQ") // >=
- //BlockSymbols
- self.add("LPNT") // (
- self.add("RPNT") // )
- self.add("LBKT") // [
- self.add("RBKT") // ]
- self.add("LBRC") // {
- self.add("RBRC") // }
- //Symbols
- self.add("SEMI") // ;
- self.add("COLON") // :
- self.add("COMMA") // ,
- self.add("DOT") // .
- self.add("EOF") // \0
- self.remove("index")
- self.remove("ctor")
- end function
- GS.TokenType.ctor()//Initialize
- //
- // Token
- //
- GS.Token = { }
- GS.Token.ctor = function(type, value = null)
- self.type = type
- if value != null then self.value = value
- self.remove("ctor")
- end function
- GS.Token.create = function(type, value = null)
- t = new self
- t.ctor(type, value)
- t.remove("__isa")
- return t
- end function
- GS.Token.hasValue = function(token)
- return token.hasIndex("value")
- end function
- GS.Token.toString = function(token)
- buff = GS.TokenType[token.type]
- if self.hasValue(token) then buff = buff + ":" + token.value
- return buff
- end function
- //
- // StatementsTable
- //
- GS.StatementsTable = { }
- GS.StatementsTable.ctor = function()
- self["if"] = GS.TokenType.IF
- self["then"] = GS.TokenType.THEN
- self["else"] = GS.TokenType.ELSE
- self["while"] = GS.TokenType.WHILE
- self["for"] = GS.TokenType.FOR
- self["in"] = GS.TokenType.IN
- self["function"] = GS.TokenType.FUNC
- self["self"] = GS.TokenType.SELF
- self["end"] = GS.TokenType.END
- self["return"] = GS.TokenType.RET
- self["not"] = GS.TokenType.NOT
- self["and"] = GS.TokenType.AND
- self["or"] = GS.TokenType.OR
- self["true"] = GS.TokenType.TRUE
- self["false"] = GS.TokenType.FALSE
- self["null"] = GS.TokenType.NULL
- self.remove("ctor")
- end function
- GS.StatementsTable.ctor()//Initialize
- //
- // SymbolsTable
- //
- GS.SymbolsTable = { }
- GS.SymbolsTable.ctor = function()
- self["+"] = GS.TokenType.ADD
- self["-"] = GS.TokenType.SUB
- self["*"] = GS.TokenType.MUL
- self["/"] = GS.TokenType.DIV
- self["%"] = GS.TokenType.PNT
- self["^"] = GS.TokenType.CRT
- self["@"] = GS.TokenType.REF
- self["!"] = GS.TokenType.NOT
- self["&&"] = GS.TokenType.AND
- self["||"] = GS.TokenType.OR
- self["="] = GS.TokenType.EQ
- self["<"] = GS.TokenType.LESS
- self[">"] = GS.TokenType.GRT
- self["!="] = GS.TokenType.NOTEQ
- self["=="] = GS.TokenType.EQEQ
- self["<="] = GS.TokenType.LESSEQ
- self[">="] = GS.TokenType.GRTEQ
- self["("] = GS.TokenType.LPNT
- self[")"] = GS.TokenType.RPNT
- self["["] = GS.TokenType.LBKT
- self["]"] = GS.TokenType.RBKT
- self["{"] = GS.TokenType.LBRC
- self["}"] = GS.TokenType.RBRC
- self[";"] = GS.TokenType.SEMI
- self[":"] = GS.TokenType.COLON
- self[","] = GS.TokenType.COMMA
- self["."] = GS.TokenType.DOT
- self.remove("ctor")
- end function
- GS.SymbolsTable.ctor()//Initialize
- //
- // TokenParser
- //
- GS.TokenParser = { }
- GS.TokenParser.NONE = ""
- GS.TokenParser.SPACE = " "
- GS.TokenParser.USCORE = "_"
- GS.TokenParser.DOT = "."
- GS.TokenParser.COMMENT = "//"
- GS.TokenParser.QUOTE = """"
- GS.TokenParser.NLINE = char(10)
- GS.TokenParser.TAB = char(9)
- GS.TokenParser.EOF = char(0)
- GS.TokenParser.nums = "1234567890"
- GS.TokenParser.letters = "abcdefghijklmnopqrstuvwxyz"
- GS.TokenParser.parse = function(code)
- self.curr = 0
- self.code = code
- tokens = []
- while self.curr < self.code.len
- ch1 = self.peek(0)
- ch2 = self.peek(1)
- ch12 = ch1 + ch2
- if self.isNumber(ch1) then
- tokens.push(self.parseNumber())
- else if self.isLetter(ch1) or ch1 == self.USCORE then
- tokens.push(self.parseWord())
- else if ch1 == self.QUOTE then
- tokens.push(self.parseString())
- else if ch12 == self.COMMENT then
- self.parseCommentLine()
- else if GS.SymbolsTable.hasIndex(ch12) then
- self.AddCurr(2)
- tokens.push(GS.Token.create(GS.SymbolsTable[ch12]))
- else if GS.SymbolsTable.hasIndex(ch1) then
- self.AddCurr()
- tokens.push(GS.Token.create(GS.SymbolsTable[ch1]))
- else if self.isWhitespace(ch1) or self.isNewLine(ch1) or self.isEOF(ch1) then
- self.AddCurr()
- end if
- end while
- return tokens
- end function
- //
- // ParseFunctions
- //
- GS.TokenParser.parseNumber = function()
- buff = self.get()
- doubled = false
- while true
- ch = self.peek()
- if not self.isNumber(ch) then
- if ch == self.DOT and not doubled then
- doubled = true
- else
- break
- end if
- end if
- buff = buff + ch
- self.AddCurr()
- end while
- return GS.Token.create(GS.TokenType.NUM, buff.to_int)
- end function
- GS.TokenParser.parseString = function()
- self.AddCurr(1) //"
- buff = self.NONE
- while true
- ch = self.get()
- chNext = self.peek()
- if ch == self.QUOTE and chNext != self.QUOTE then break
- if ch == self.QUOTE and chNext == self.QUOTE then self.AddCurr()
- if self.isNewLine(ch) or self.isEOF(ch) then exit("Expected - '""'")
- buff = buff + ch
- end while
- return GS.Token.create(GS.TokenType.STR, buff)
- end function
- GS.TokenParser.parseWord = function()
- buff = self.get()
- while true
- ch = self.peek()
- if not self.isLetterOrNumber(ch) and not ch == self.USCORE then break
- buff = buff + ch
- self.AddCurr()
- end while
- if GS.StatementsTable.hasIndex(buff) then return GS.Token.create(GS.StatementsTable[buff])
- return GS.Token.create(GS.TokenType.WORD, buff)
- end function
- GS.TokenParser.parseCommentLine = function()
- self.AddCurr(2)
- while true
- ch = self.get()
- if self.isNewLine(ch) or self.isEOF(ch) then break
- end while
- end function
- //
- // CharFunctions
- //
- GS.TokenParser.isWhitespace = function(ch)
- return ch == self.SPACE or self.TAB or not ch
- end function
- GS.TokenParser.isEOF = function(ch)
- return ch == self.EOF
- end function
- GS.TokenParser.isNewLine = function(ch)
- return ch == self.NLINE
- end function
- GS.TokenParser.isLetterOrNumber = function(ch)
- return self.isLetter(ch) or self.isNumber(ch)
- end function
- GS.TokenParser.isLetter = function(ch)
- if self.letters.indexOf(ch.lower()) == null then return false
- return true
- end function
- GS.TokenParser.isNumber = function(ch)
- if self.nums.indexOf(ch) == null then return false
- return true
- end function
- //
- // ParseUtils
- //
- GS.TokenParser.get = function(s = 0)
- ch = self.peek(s)
- self.AddCurr()
- return ch
- end function
- GS.TokenParser.peek = function(s = 0)
- i = self.curr + s
- if not self.isRange(i) then return self.EOF
- return self.code[i]
- end function
- GS.TokenParser.isRange = function(i)
- if i >= self.code.len then return false
- return true
- end function
- GS.TokenParser.AddCurr = function(i = 1)
- self.curr = self.curr + i
- end function
- pc = get_shell().host_computer
- inputFilePath = params[0]
- inputFile = pc.File(inputFilePath)
- code = inputFile.content
- print("Parsing...")
- st = time()
- tokens = GS.TokenParser.parse(code)
- et = time()
- print("EndParse")
- print("CountTokens: " + tokens.len)
- print("ElapsedTime: " + (et - st))
- outFolder = parent_path(inputFile.path)
- outName = inputFile.name + "_tokens.txt"
- outPath = outFolder + "/" + outName
- outFile = pc.File(outPath)
- if not outFile then
- pc.touch(outFolder, outName)
- outFile = pc.File(outPath)
- end if
- if not outFile then exit("Out file error :/")
- content = ""
- for token in tokens
- content = content + GS.Token.toString(token) + "\n"
- end for
- outFile.set_content(content)
- print("Tokens saved to - '" + outFile.path + "'")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement