anechka_ne_plach

part of parser

Nov 14th, 2021
195
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 9.75 KB | None | 0 0
  1. #include <variant>
  2. #include <optional>
  3. #include <istream>
  4. #include <sstream>
  5. #include <stdexcept>
  6. #include <iostream>
  7. #include <typeinfo>
  8. #include <random>
  9. #include "../../Library/Application Support/JetBrains/CLion2021.2/scratches/tokenizer.h"
  10.  
  11. #include <memory>
  12.  
  13. class Object : public std::enable_shared_from_this<Object> {
  14. public:
  15.     virtual ~Object() = default;
  16. };
  17.  
  18. class Number : public Object {
  19.     int64_t value_;
  20. public:
  21.     Number(int value) : value_(value) {
  22.     }
  23.     int GetValue() const {
  24.         return value_;
  25.     }
  26. };
  27.  
  28. class Symbol : public Object {
  29.     std::string s_;
  30. public:
  31.     Symbol(const std::string& s) : s_(s) {
  32.     }
  33.     const std::string& GetName() const {
  34.         return s_;
  35.     }
  36. };
  37.  
  38. class Cell : public Object {
  39.     std::shared_ptr<Object> head_ = nullptr;
  40.     std::shared_ptr<Object> tail_ = nullptr;
  41. public:
  42.     Cell(auto h, auto t) {
  43.         head_ = h;
  44.         tail_ = t;
  45.     }
  46.     std::shared_ptr<Object> GetFirst() const {
  47.         return head_;
  48.     }
  49.     std::shared_ptr<Object> GetSecond() const {
  50.         return tail_;
  51.     }
  52. };
  53.  
  54. template <class T>
  55. std::shared_ptr<T> As(const std::shared_ptr<Object>& obj) {
  56.     return std::dynamic_pointer_cast<T>(obj);
  57. }
  58.  
  59. template <class T>
  60. bool Is(const std::shared_ptr<Object>& obj) {
  61.     if (obj == nullptr) {
  62.         std::cerr << "чекни мать" << std::endl;
  63.         return false;
  64.     }
  65.     return (typeid(T) == typeid(*obj));
  66. }
  67.  
  68. std::shared_ptr<Object> Read(Tokenizer* tokenizer);
  69.  
  70. std::shared_ptr<Cell> ReadList(Tokenizer* tokenizer) {
  71.     if (tokenizer->IsEnd()) {
  72.         throw SyntaxError("nothing more to parse");
  73.     }
  74.     auto t = tokenizer->GetToken();
  75.     // empty list
  76.     if (t.index() == 1 && (std::get<BracketToken>(t) == BracketToken::CLOSE)) {
  77.         tokenizer->Next();
  78.         return nullptr;
  79.     }
  80.     //not empty
  81.     auto first_elem = Read(tokenizer);
  82.     auto t1 = tokenizer->GetToken(); //check a dot
  83.     //pair
  84.     if (t1.index() == 4) {
  85.         tokenizer->Next(); //next after a dot
  86.         auto second_elem = Read(tokenizer);
  87.         auto br = tokenizer->GetToken();
  88.         if (br.index() != 1 || (std::get<BracketToken>(br) != BracketToken::CLOSE)) {
  89.             throw SyntaxError("invalid pair syntax");
  90.         } else {
  91.             tokenizer->Next(); //next after a bracket
  92.             return std::shared_ptr<Cell>(new Cell(first_elem, second_elem));
  93.         }
  94.     }
  95.     //list
  96.     while (!tokenizer->IsEnd()) {
  97.        
  98.     }
  99. }
  100.  
  101. std::shared_ptr<Object> Read(Tokenizer* tokenizer) {
  102.     if (tokenizer->IsEnd()) {
  103.         throw SyntaxError("nothing to parse");
  104.     }
  105.     auto t = tokenizer->GetToken();
  106.     if (t.index() == 0) {
  107.         tokenizer->Next();
  108.         return std::shared_ptr<Number>(new Number(std::get<ConstantToken>(t).value_));
  109.     } else if (t.index() == 1) {
  110.         switch(std::get<BracketToken>(t)) {
  111.             case BracketToken::CLOSE : throw SyntaxError("unexpected )");
  112.             case BracketToken::OPEN : {
  113.                 tokenizer->Next();
  114.                 return ReadList(tokenizer);
  115.             }
  116.         }
  117.     } else if (t.index() == 2) {
  118.         tokenizer->Next();
  119.         return std::shared_ptr<Symbol>(new Symbol(std::get<SymbolToken>(t).name_));
  120.     } else if (t.index() == 3) {
  121.         throw SyntaxError("unexpected '");
  122.     } else if (t.index() == 4) {
  123.         throw SyntaxError("unexpected .");
  124.     }
  125. }
  126.  
  127. void ShouldThrow() {
  128.     std::string input = "1@";
  129.     std::stringstream ss{input};
  130.     Tokenizer tokenizer{&ss};
  131.     tokenizer.Next();
  132. }
  133.  
  134. auto ReadFull(const std::string& str) {
  135.     std::stringstream ss{str};
  136.     Tokenizer tokenizer{&ss};
  137.  
  138.     auto obj = Read(&tokenizer);
  139.     assert(tokenizer.IsEnd());
  140.     return obj;
  141. }
  142.  
  143. std::string RandomSymbol(std::default_random_engine* rng) {
  144.     std::uniform_int_distribution<char> symbol('a', 'z');
  145.     std::string s;
  146.     for (int i = 0; i < 5; ++i) {
  147.         s.push_back(symbol(*rng));
  148.     }
  149.     return s;
  150. }
  151.  
  152. int main() {
  153.     {
  154.         std::stringstream ss{"445+)'."};
  155.         Tokenizer tokenizer{&ss};
  156.  
  157.         assert(!tokenizer.IsEnd());
  158.  
  159.         // If next line fails to compile, check that operator == is defined for every token type.
  160.         assert(tokenizer.GetToken() == Token{ConstantToken{445}});
  161.         tokenizer.Next();
  162.         assert(!tokenizer.IsEnd());
  163.         assert(tokenizer.GetToken() == Token{SymbolToken{"+"}});
  164.  
  165.         tokenizer.Next();
  166.         assert(!tokenizer.IsEnd());
  167.         assert(tokenizer.GetToken() == Token{BracketToken::CLOSE});
  168.  
  169.         tokenizer.Next();
  170.         assert(!tokenizer.IsEnd());
  171.         assert(tokenizer.GetToken() == Token{QuoteToken{}});
  172.  
  173.         tokenizer.Next();
  174.         assert(!tokenizer.IsEnd());
  175.         assert(tokenizer.GetToken() == Token{DotToken{}});
  176.  
  177.         tokenizer.Next();
  178.         assert(tokenizer.IsEnd());
  179.     }
  180.     std::cout << ".............................\n";
  181.     {
  182.         std::stringstream ss{"-2 - 2"};
  183.         Tokenizer tokenizer{&ss};
  184.  
  185.         assert(!tokenizer.IsEnd());
  186.         assert(tokenizer.GetToken() == Token{ConstantToken{-2}});
  187.  
  188.         tokenizer.Next();
  189.         assert(!tokenizer.IsEnd());
  190.         assert(tokenizer.GetToken() == Token{SymbolToken{"-"}});
  191.  
  192.         tokenizer.Next();
  193.         assert(!tokenizer.IsEnd());
  194.         assert(tokenizer.GetToken() == Token{ConstantToken{2}});
  195.     }
  196.     std::cout << ".............................\n";
  197.     {
  198.         std::stringstream ss{"foo bar zog-zog?"};
  199.         Tokenizer tokenizer{&ss};
  200.  
  201.         assert(!tokenizer.IsEnd());
  202.         assert(tokenizer.GetToken() == Token{SymbolToken{"foo"}});
  203.  
  204.         tokenizer.Next();
  205.         assert(!tokenizer.IsEnd());
  206.         assert(tokenizer.GetToken() == Token{SymbolToken{"bar"}});
  207.  
  208.         tokenizer.Next();
  209.         assert(!tokenizer.IsEnd());
  210.         assert(tokenizer.GetToken() == Token{SymbolToken{"zog-zog?"}});
  211.     }
  212.     std::cout << ".............................\n";
  213.     {
  214.         std::stringstream ss{"1234+4"};
  215.         Tokenizer tokenizer{&ss};
  216.  
  217.         assert(tokenizer.GetToken() == Token{ConstantToken{1234}});
  218.         assert(tokenizer.GetToken() == Token{ConstantToken{1234}});
  219.     }
  220.     std::cout << ".............................\n";
  221.     {
  222.         std::stringstream ss;
  223.         ss << "2 ";
  224.  
  225.         Tokenizer tokenizer{&ss};
  226.         assert(tokenizer.GetToken() == Token{ConstantToken{2}});
  227.  
  228.         ss << "* ";
  229.         tokenizer.Next();
  230.         assert(tokenizer.GetToken() == Token{SymbolToken{"*"}});
  231.  
  232.         ss << "2";
  233.         tokenizer.Next();
  234.         assert(tokenizer.GetToken() == Token{ConstantToken{2}});
  235.     }
  236.     std::cout << ".............................\n";
  237.     {
  238.         std::stringstream ss{"      "};
  239.         Tokenizer tokenizer{&ss};
  240.  
  241.         assert(tokenizer.IsEnd());
  242.     }
  243.     std::cout << ".............................\n";
  244.     {
  245.         std::stringstream ss{"  4 +  "};
  246.         Tokenizer tokenizer{&ss};
  247.  
  248.         assert(!tokenizer.IsEnd());
  249.         assert(tokenizer.GetToken() == Token{ConstantToken{4}});
  250.  
  251.         tokenizer.Next();
  252.         assert(!tokenizer.IsEnd());
  253.         assert(tokenizer.GetToken() == Token{SymbolToken{"+"}});
  254.  
  255.         tokenizer.Next();
  256.         assert(tokenizer.IsEnd());
  257.     }
  258.     std::cout << ".............................\n";
  259.     {
  260.         std::string input = R"EOF(
  261.                                   )EOF";
  262.         std::stringstream ss{input};
  263.         Tokenizer tokenizer{&ss};
  264.  
  265.         assert(tokenizer.IsEnd());
  266.     }
  267.     std::cout << ".............................\n";
  268.     {
  269.         std::string input = R"EOF(
  270.                            4 +
  271.                            )EOF";
  272.         std::stringstream ss{input};
  273.         Tokenizer tokenizer{&ss};
  274.  
  275.         assert(!tokenizer.IsEnd());
  276.         assert(tokenizer.GetToken() == Token{ConstantToken{4}});
  277.  
  278.         tokenizer.Next();
  279.         assert(!tokenizer.IsEnd());
  280.         assert(tokenizer.GetToken() == Token{SymbolToken{"+"}});
  281.  
  282.         tokenizer.Next();
  283.         assert(tokenizer.IsEnd());
  284.     }
  285.     std::cout << ".............................\n";
  286.     {
  287.         std::stringstream ss;
  288.         Tokenizer tokenizer{&ss};
  289.  
  290.         assert(tokenizer.IsEnd());
  291.     }
  292.     std::cout << ".............................\n";
  293.     try {
  294.         ShouldThrow();
  295.     } catch (SyntaxError& s) {
  296.         std::cout << "Throws!\n";
  297.     }
  298.     std::cout << ".............................\n";
  299.     {
  300.         auto node = ReadFull("5");
  301.         assert(Is<Number>(node));
  302.         assert(As<Number>(node)->GetValue() == 5);
  303.  
  304.         node = ReadFull("-5");
  305.         assert(Is<Number>(node));
  306.         assert(As<Number>(node)->GetValue() == -5);
  307.     }
  308.     std::cout << ".............................\n";
  309.     {
  310.         auto node = ReadFull("+");
  311.         assert(Is<Symbol>(node));
  312.         assert(As<Symbol>(node)->GetName() == "+");
  313.     }
  314.     std::cout << ".............................\n";
  315.     {
  316.         std::default_random_engine rng{42};
  317.         for (int i = 0; i < 10; ++i) {
  318.             auto name = RandomSymbol(&rng);
  319.             auto node = ReadFull(name);
  320.             assert(Is<Symbol>(node));
  321.             assert(As<Symbol>(node)->GetName() == name);
  322.         }
  323.     }
  324.     std::cout << ".............................\n";
  325.     {
  326.         auto null = ReadFull("()");
  327.         assert(!null);
  328.     }
  329.     std::cout << ".............................\n";
  330.     {
  331.         auto pair = ReadFull("(1 . 2)");
  332.         assert(Is<Cell>(pair));
  333.  
  334.         auto first = As<Cell>(pair)->GetFirst();
  335.         assert(Is<Number>(first));
  336.         assert(As<Number>(first)->GetValue() == 1);
  337.  
  338.         auto second = As<Cell>(pair)->GetSecond();
  339.         assert(Is<Number>(second));
  340.         assert(As<Number>(second)->GetValue() == 2);
  341.     }
  342.     std::cout << ".............................\n";
  343.    
  344.     return 0;
  345. }
  346.  
Add Comment
Please, Sign In to add comment