Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <string>
- #include <list>
- #include <cstring>
- #include <memory>
- #include <vector>
- #include <algorithm>
- template<typename charT, std::uint32_t sz>
- std::basic_string<charT> convert_to_basic_string(const char (& str)[sz]) {
- return std::basic_string<charT>(str, str + sz - 1);
- }
- template<typename charT>
- std::basic_string<charT> convert_to_basic_string(const std::string& str) {
- return std::basic_string<charT>(str.begin(), str.end());
- }
- template<typename charT>
- class IToken {
- public:
- typedef std::pair<std::uint32_t, std::uint32_t> Position;
- enum TokenTag {
- NONE,
- ERROR,
- IDENT,
- DIRECTIVE,
- EOFT,
- };
- TokenTag _tag;
- std::pair<Position, Position> _position;
- //AttrType _attribute;
- IToken(const TokenTag tag, const std::pair<Position, Position>& position) {
- _tag = tag;
- _position = position;
- }
- virtual std::basic_string<charT> to_string(){
- std::string tag_str;
- switch (_tag){
- case IToken<charT>::TokenTag::NONE:
- tag_str = "NONE";
- break;
- case IToken<charT>::TokenTag::ERROR:
- tag_str = "ERROR";
- break;
- case IToken<charT>::TokenTag::IDENT:
- tag_str = "IDENT";
- break;
- case IToken<charT>::TokenTag::DIRECTIVE:
- tag_str = "DIRECTIVE";
- break;
- case IToken<charT>::TokenTag::EOFT:
- tag_str = "EOF";
- break;
- };
- auto position = convert_to_basic_string<charT>(std::string("(" + std::to_string(_position.first.first) + ", "
- + std::to_string(_position.first.second) + ") - ("
- + std::to_string(_position.second.first) + ", "
- + std::to_string(_position.second.second) + ")"));
- return convert_to_basic_string<charT>(tag_str) + charT(' ') + position;
- }
- };
- template<typename charT>
- class TokenError: public IToken<charT>{
- public:
- typedef std::pair<std::uint32_t, std::uint32_t> Position;
- std::basic_string<charT> error;
- TokenError(const std::pair<TokenError::Position, TokenError::Position>& position): IToken<charT>(TokenError::ERROR, position){}
- std::basic_string<charT> to_string() noexcept final{
- return IToken<charT>::to_string() + convert_to_basic_string<charT>(": ") + error;
- }
- };
- template<typename charT>
- class TokenIdent: public IToken<charT>{
- public:
- typedef std::pair<std::uint32_t, std::uint32_t> Position;
- int identifier_id;
- TokenIdent(const std::pair<TokenIdent::Position, TokenIdent::Position>& position): IToken<charT>(TokenIdent::IDENT, position){}
- std::basic_string<charT> to_string() noexcept final{
- return IToken<charT>::to_string() + convert_to_basic_string<charT>(": ") + convert_to_basic_string<charT>(std::to_string(identifier_id));
- }
- };
- template<typename charT>
- class TokenDirective: public IToken<charT>{
- public:
- typedef std::pair<std::uint32_t, std::uint32_t> Position;
- std::basic_string<charT> directive;
- TokenDirective(const std::pair<TokenDirective::Position, TokenDirective::Position>& position): IToken<charT>(TokenDirective::DIRECTIVE, position){}
- std::basic_string<charT> to_string() noexcept final{
- return IToken<charT>::to_string() + convert_to_basic_string<charT>(": ") + directive;
- }
- };
- template<typename charT>
- class TokenEOF: public IToken<charT>{
- public:
- typedef std::pair<std::uint32_t, std::uint32_t> Position;
- TokenEOF(const std::pair<TokenEOF::Position, TokenEOF::Position>& position): IToken<charT>(TokenEOF::EOFT, position){}
- };
- template<typename charT>
- class Lexer {
- #define EXPECT(x) {if(istream->peek() != x) return __error();}
- #define EXPECTF(func) {if(!func(istream->peek())) return __error();}
- typedef std::pair<std::uint32_t, std::uint32_t> Position;
- public:
- std::basic_istream<charT>* istream;
- std::list<charT> _data;
- Position _current_position;
- std::vector<std::basic_string<charT>> _identifiers_table;
- public:
- Lexer(std::basic_istream<charT>* input_stream) { istream = input_stream; }
- std::shared_ptr<IToken<charT>> nextToken() {
- charT current_char = istream->peek();
- while (current_char == '\n' || current_char == ' ' || current_char == '\t'){
- if (current_char == '\n'){
- _current_position.first++;
- _current_position.second = 0;
- }else if (current_char == ' '){
- _current_position.second++;
- }else if (current_char == '\t'){
- _current_position.second += 4;
- }
- _data.push_back(charT(istream->get()));
- current_char = istream->peek();
- }
- if (current_char == '$'){ return __parse_directive(); }
- else if (isalpha(current_char) && isupper(current_char)){ return __parse_identifier(); }
- else if (!istream->eof()){ return __error(); }
- return std::shared_ptr<TokenEOF<charT>>(new TokenEOF<charT>({_current_position, _current_position}));
- }
- private:
- std::shared_ptr<IToken<charT>> __parse_directive() {
- std::shared_ptr<TokenDirective<charT>> result(new TokenDirective<charT>({_current_position, Position()}));
- EXPECT('$')
- _data.push_back(charT(istream->get()));
- result->directive.push_back(_data.back());
- _current_position.second++;
- EXPECTF([this](const charT ch) { return !istream->eof(); })
- EXPECTF([](const charT ch) { return isalpha(ch) && isupper(ch); })
- while (isalpha(istream->peek()) && isupper(istream->peek())){
- _data.push_back(charT(istream->get()));
- result->directive.push_back(_data.back());
- _current_position.second++;
- }
- result->_position.second = _current_position;
- return result;
- }
- std::shared_ptr<IToken<charT>> __parse_identifier() {
- std::shared_ptr<TokenIdent<charT>> result(new TokenIdent<charT>({_current_position, Position()}));
- EXPECTF([](const charT ch) { return isalpha(ch) && isupper(ch); })
- _data.push_back(charT(istream->get()));
- _current_position.second++;
- std::basic_string<charT> ident;
- while (istream->peek() != ' ' && istream->peek() != '\n' && istream->peek() != '\t' && istream->peek() != '$' &&
- !istream->eof()){
- _data.push_back(charT(istream->get()));
- ident.push_back(_data.back());
- _current_position.second++;
- }
- if(auto it = std::find(_identifiers_table.begin(), _identifiers_table.end(), ident); it == _identifiers_table.end()){
- result->identifier_id = _identifiers_table.size();
- _identifiers_table.push_back(std::move(ident));
- }else{
- result->identifier_id = it - _identifiers_table.begin();
- }
- result->_position.second = _current_position;
- return result;
- }
- std::shared_ptr<TokenError<charT>> __error() {
- std::shared_ptr<TokenError<charT>> result(new TokenError<charT>({_current_position, _current_position}));
- while (istream->peek() != ' ' && istream->peek() != '\n' && istream->peek() != '\t' && !istream->eof()){
- _data.push_back(charT(istream->get()));
- result->error.push_back(_data.back());
- _current_position.second++;
- }
- //or skip 2 chars and continue
- //_data.push_back(charT(istream->get()));
- //_current_position.second++;
- //_data.push_back(charT(istream->get()));
- //_current_position.second++;
- result->_position.second = _current_position;
- return result;
- }
- };
- int main() {
- Lexer lexer(&std::wcin);
- for (auto token = lexer.nextToken(); token->_tag != IToken<wchar_t>::TokenTag::EOFT; token = lexer.nextToken())
- std::wcout << token->to_string() << '\n';
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement