Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <vector>
- #include <string>
- #include <algorithm>
- #include <iostream>
- #include <iterator>
- using namespace std;
- struct TestToken {
- string data;
- bool is_end_sentence_punctuation = false;
- bool IsEndSentencePunctuation() const {
- return is_end_sentence_punctuation;
- }
- bool operator==(const TestToken& other) const {
- return data == other.data && is_end_sentence_punctuation == other.is_end_sentence_punctuation;
- }
- TestToken(const TestToken&) = default;
- TestToken& operator=(const TestToken&) = delete;
- TestToken(TestToken&&) = default;
- TestToken& operator=(TestToken&&) = default;
- };
- // Перегрузка оператора << для TestToken
- ostream& operator<<(ostream& os, const TestToken& token) {
- os << token.data;
- return os;
- }
- template <typename TokenForwardIt>
- TokenForwardIt FindSentenceEnd(TokenForwardIt tokens_begin, TokenForwardIt tokens_end) {
- const TokenForwardIt before_sentence_end
- = adjacent_find(tokens_begin, tokens_end, [](const auto& left_token, const auto& right_token) {
- return left_token.IsEndSentencePunctuation() && !right_token.IsEndSentencePunctuation();
- });
- if (before_sentence_end == tokens_end) {
- if (tokens_begin != tokens_end && (tokens_end - 1)->IsEndSentencePunctuation()) {
- return tokens_end;
- }
- }
- return before_sentence_end == tokens_end ? tokens_end : next(before_sentence_end);
- }
- template <typename Token>
- using Sentence = vector<Token>;
- template <typename Token>
- vector<Sentence<Token>> SplitIntoSentences(vector<Token> tokens) {
- vector<Sentence<Token>> result_vector;
- auto it = tokens.begin();
- while (it != tokens.end()) {
- auto end_sentence = FindSentenceEnd(it, tokens.end());
- Sentence<Token> push_sentence(make_move_iterator(it), make_move_iterator(end_sentence));
- result_vector.push_back(move(push_sentence));
- it = end_sentence;
- }
- return result_vector;
- }
- int main() {
- vector<TestToken> tokens = {{"Split"s}, {"into"s}, {"sentences"s}, {"!"s, true}, {"Next"s}, {"sentence"s}, {"."s, true}};
- auto sentences = SplitIntoSentences(move(tokens));
- for (const auto& sentence : sentences) {
- for (const auto& token : sentence) {
- std::cout << token << " ";
- }
- std::cout << std::endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement