Advertisement
EWTD

Untitled

Feb 27th, 2024
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.10 KB | None | 0 0
  1. #include <iostream>
  2. #include <string>
  3. #include <list>
  4. #include <cstring>
  5. #include <memory>
  6. #include <vector>
  7. #include <algorithm>
  8.  
  9. template<typename charT, std::uint32_t sz>
  10. std::basic_string<charT> convert_to_basic_string(const char (& str)[sz]) {
  11. return std::basic_string<charT>(str, str + sz - 1);
  12. }
  13.  
  14. template<typename charT>
  15. std::basic_string<charT> convert_to_basic_string(const std::string& str) {
  16. return std::basic_string<charT>(str.begin(), str.end());
  17. }
  18.  
  19. template<typename charT>
  20. class IToken {
  21. public:
  22. typedef std::pair<std::uint32_t, std::uint32_t> Position;
  23. enum TokenTag {
  24. NONE,
  25. ERROR,
  26. IDENT,
  27. DIRECTIVE,
  28. EOFT,
  29. };
  30. TokenTag _tag;
  31. std::pair<Position, Position> _position;
  32. //AttrType _attribute;
  33. IToken(const TokenTag tag, const std::pair<Position, Position>& position) {
  34. _tag = tag;
  35. _position = position;
  36. }
  37.  
  38. virtual std::basic_string<charT> to_string(){
  39. std::string tag_str;
  40. switch (_tag){
  41. case IToken<charT>::TokenTag::NONE:
  42. tag_str = "NONE";
  43. break;
  44. case IToken<charT>::TokenTag::ERROR:
  45. tag_str = "ERROR";
  46. break;
  47. case IToken<charT>::TokenTag::IDENT:
  48. tag_str = "IDENT";
  49. break;
  50. case IToken<charT>::TokenTag::DIRECTIVE:
  51. tag_str = "DIRECTIVE";
  52. break;
  53. case IToken<charT>::TokenTag::EOFT:
  54. tag_str = "EOF";
  55. break;
  56. };
  57. auto position = convert_to_basic_string<charT>(std::string("(" + std::to_string(_position.first.first) + ", "
  58. + std::to_string(_position.first.second) + ") - ("
  59. + std::to_string(_position.second.first) + ", "
  60. + std::to_string(_position.second.second) + ")"));
  61. return convert_to_basic_string<charT>(tag_str) + charT(' ') + position;
  62. }
  63. };
  64.  
  65. template<typename charT>
  66. class TokenError: public IToken<charT>{
  67. public:
  68. typedef std::pair<std::uint32_t, std::uint32_t> Position;
  69. std::basic_string<charT> error;
  70. TokenError(const std::pair<TokenError::Position, TokenError::Position>& position): IToken<charT>(TokenError::ERROR, position){}
  71. std::basic_string<charT> to_string() noexcept final{
  72. return IToken<charT>::to_string() + convert_to_basic_string<charT>(": ") + error;
  73. }
  74. };
  75.  
  76. template<typename charT>
  77. class TokenIdent: public IToken<charT>{
  78. public:
  79. typedef std::pair<std::uint32_t, std::uint32_t> Position;
  80. int identifier_id;
  81. TokenIdent(const std::pair<TokenIdent::Position, TokenIdent::Position>& position): IToken<charT>(TokenIdent::IDENT, position){}
  82. std::basic_string<charT> to_string() noexcept final{
  83. return IToken<charT>::to_string() + convert_to_basic_string<charT>(": ") + convert_to_basic_string<charT>(std::to_string(identifier_id));
  84. }
  85. };
  86. template<typename charT>
  87. class TokenDirective: public IToken<charT>{
  88. public:
  89. typedef std::pair<std::uint32_t, std::uint32_t> Position;
  90. std::basic_string<charT> directive;
  91. TokenDirective(const std::pair<TokenDirective::Position, TokenDirective::Position>& position): IToken<charT>(TokenDirective::DIRECTIVE, position){}
  92. std::basic_string<charT> to_string() noexcept final{
  93. return IToken<charT>::to_string() + convert_to_basic_string<charT>(": ") + directive;
  94. }
  95. };
  96. template<typename charT>
  97. class TokenEOF: public IToken<charT>{
  98. public:
  99. typedef std::pair<std::uint32_t, std::uint32_t> Position;
  100. TokenEOF(const std::pair<TokenEOF::Position, TokenEOF::Position>& position): IToken<charT>(TokenEOF::EOFT, position){}
  101. };
  102.  
  103. template<typename charT>
  104. class Lexer {
  105. #define EXPECT(x) {if(istream->peek() != x) return __error();}
  106. #define EXPECTF(func) {if(!func(istream->peek())) return __error();}
  107. typedef std::pair<std::uint32_t, std::uint32_t> Position;
  108. public:
  109. std::basic_istream<charT>* istream;
  110. std::list<charT> _data;
  111. Position _current_position;
  112. std::vector<std::basic_string<charT>> _identifiers_table;
  113. public:
  114. Lexer(std::basic_istream<charT>* input_stream) { istream = input_stream; }
  115.  
  116. std::shared_ptr<IToken<charT>> nextToken() {
  117. charT current_char = istream->peek();
  118. while (current_char == '\n' || current_char == ' ' || current_char == '\t'){
  119. if (current_char == '\n'){
  120. _current_position.first++;
  121. _current_position.second = 0;
  122. }else if (current_char == ' '){
  123. _current_position.second++;
  124. }else if (current_char == '\t'){
  125. _current_position.second += 4;
  126. }
  127. _data.push_back(charT(istream->get()));
  128. current_char = istream->peek();
  129. }
  130. if (current_char == '$'){ return __parse_directive(); }
  131. else if (isalpha(current_char) && isupper(current_char)){ return __parse_identifier(); }
  132. else if (!istream->eof()){ return __error(); }
  133. return std::shared_ptr<TokenEOF<charT>>(new TokenEOF<charT>({_current_position, _current_position}));
  134. }
  135.  
  136. private:
  137. std::shared_ptr<IToken<charT>> __parse_directive() {
  138. std::shared_ptr<TokenDirective<charT>> result(new TokenDirective<charT>({_current_position, Position()}));
  139. EXPECT('$')
  140. _data.push_back(charT(istream->get()));
  141. result->directive.push_back(_data.back());
  142. _current_position.second++;
  143. EXPECTF([this](const charT ch) { return !istream->eof(); })
  144. EXPECTF([](const charT ch) { return isalpha(ch) && isupper(ch); })
  145. while (isalpha(istream->peek()) && isupper(istream->peek())){
  146. _data.push_back(charT(istream->get()));
  147. result->directive.push_back(_data.back());
  148. _current_position.second++;
  149. }
  150. result->_position.second = _current_position;
  151. return result;
  152. }
  153.  
  154. std::shared_ptr<IToken<charT>> __parse_identifier() {
  155. std::shared_ptr<TokenIdent<charT>> result(new TokenIdent<charT>({_current_position, Position()}));
  156. EXPECTF([](const charT ch) { return isalpha(ch) && isupper(ch); })
  157. _data.push_back(charT(istream->get()));
  158. _current_position.second++;
  159. std::basic_string<charT> ident;
  160. while (istream->peek() != ' ' && istream->peek() != '\n' && istream->peek() != '\t' && istream->peek() != '$' &&
  161. !istream->eof()){
  162. _data.push_back(charT(istream->get()));
  163. ident.push_back(_data.back());
  164. _current_position.second++;
  165. }
  166. if(auto it = std::find(_identifiers_table.begin(), _identifiers_table.end(), ident); it == _identifiers_table.end()){
  167. result->identifier_id = _identifiers_table.size();
  168. _identifiers_table.push_back(std::move(ident));
  169. }else{
  170. result->identifier_id = it - _identifiers_table.begin();
  171. }
  172. result->_position.second = _current_position;
  173. return result;
  174. }
  175.  
  176. std::shared_ptr<TokenError<charT>> __error() {
  177. std::shared_ptr<TokenError<charT>> result(new TokenError<charT>({_current_position, _current_position}));
  178. while (istream->peek() != ' ' && istream->peek() != '\n' && istream->peek() != '\t' && !istream->eof()){
  179. _data.push_back(charT(istream->get()));
  180. result->error.push_back(_data.back());
  181. _current_position.second++;
  182. }
  183. //or skip 2 chars and continue
  184. //_data.push_back(charT(istream->get()));
  185. //_current_position.second++;
  186. //_data.push_back(charT(istream->get()));
  187. //_current_position.second++;
  188. result->_position.second = _current_position;
  189. return result;
  190. }
  191. };
  192. int main() {
  193. Lexer lexer(&std::wcin);
  194. for (auto token = lexer.nextToken(); token->_tag != IToken<wchar_t>::TokenTag::EOFT; token = lexer.nextToken())
  195. std::wcout << token->to_string() << '\n';
  196. return 0;
  197. }
  198.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement