Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- License: MIT. Id est: Use it. Reuse it. Love it.
- # build options for overall usage
- CXXFLAGS='-std=c++11'
- LDFLAGS='-lboost_{system,locale}'
- # build options for Gentoo fellows
- CXXFLAGS='-std=c++11 -g0 -Ofast -flto=2 -flto-compression-level=0 -ffat-lto-objects -march=native -mtune=native'
- LDFLAGS='-fuse-linker-plugin -Wl,{-z\,{relro,now},-l{dl,pthread,icu{data,uc,i18n},boost_{system,thread,chrono,locale}}}'
- NAME=transliterate
- build() {
- eval rm $1 ${NAME}{,.o} 2>/dev/null
- eval g++ -o ${NAME}{.o,.cc} ${CXXFLAGS} -c
- eval g++ -o ${NAME}{,.o} ${CXXFLAGS} ${LDFLAGS}
- eval rm ${NAME}.o 2>/dev/null
- eval objcopy --strip-debug --strip-unneeded ${NAME} 2>/dev/null
- eval LC_ALL=C ls -lgG ${NAME} 2>/dev/null
- }
- */
- #define TEST_WORD_MIN_LEN 1000
- #define TEST_WORD_MAX_LEN 1000
- #define TEST_WORD_SEQ_LEN 10000
- #define TEST_SILENT
- #include <functional>
- #include <iostream>
- #include <string>
- #include <map>
- #include <unordered_map>
- #include <vector>
- #include <chrono>
- #include <thread>
- #include <random>
- //package "libboost-locale-dev"
- #include <boost/locale/boundary.hpp>
- #include <boost/locale/conversion.hpp>
- #include <boost/locale/generator.hpp>
- static std::map<std::string, std::string> rules_base {
- {"а", "a"},
- {"б", "b"},
- {"в", "v"},
- {"г", "g"},
- {"д", "d"},
- {"е", "e"},
- {"ё", "yo"},
- {"ж", "zh"},
- {"з", "z"},
- {"и", "i"},
- {"й", "y"},
- {"к", "k"},
- {"л", "l"},
- {"м", "m"},
- {"н", "n"},
- {"о", "o"},
- {"п", "p"},
- {"р", "r"},
- {"с", "s"},
- {"т", "t"},
- {"у", "u"},
- {"ф", "f"},
- {"х", "h"},
- {"ц", "ts"},
- {"ч", "ch"},
- {"ш", "sh"},
- {"щ", "sch"},
- {"ъ", "`"},
- {"ы", "y"},
- {"ь", "'"},
- {"э", "e"},
- {"ю", "yu"},
- {"я", "ya"},
- };
- // -------------------
- void nop(void);
- // -------------------
- std::string lowercase(const std::string & target) {
- return boost::locale::to_lower(
- boost::locale::normalize(target, boost::locale::norm_nfc)
- );
- }
- std::string uppercase(const std::string & target) {
- return boost::locale::to_upper(
- boost::locale::normalize(target, boost::locale::norm_nfc)
- );
- }
- std::string titlecase(const std::string & target) {
- return boost::locale::to_title(
- boost::locale::normalize(target, boost::locale::norm_nfc)
- );
- }
- // -------------------
- void initialize_locale(void) {
- boost::locale::generator gen;
- std::locale loc = gen("");
- std::locale::global(loc);
- std::cin.imbue(loc);
- std::cout.imbue(loc);
- std::cerr.imbue(loc);
- }
- static std::unordered_map<std::string, std::string> rules;
- static std::vector<std::string> letters;
- void initialize_engine(void) {
- rules.clear();
- letters.clear();
- for (const auto & rule : rules_base) {
- rules[uppercase(rule.first)] = titlecase(rule.second);
- rules[lowercase(rule.first)] = lowercase(rule.second);
- letters.push_back(rule.first);
- }
- letters.shrink_to_fit();
- }
- // -------------------
- std::string transliterate_1(const std::string & text) {
- std::string result;
- boost::locale::boundary::ssegment_index
- index( boost::locale::boundary::character, text.begin(), text.end() );
- boost::locale::boundary::ssegment_index::iterator
- current, end;
- for (current = index.begin(), end = index.end(); current != end; ++current) {
- auto rule = rules.find(*current);
- if (rule != rules.end()) {
- result.append(rule->second);
- } else {
- result.append(*current);
- }
- }
- return result;
- }
- /*
- //TODO: rewrite code
- std::string transliterate_2(const std::string & text) {
- std::string result;
- boost::locale::boundary::ssegment_index
- index( boost::locale::boundary::character, text.begin(), text.end() );
- boost::locale::boundary::ssegment_index::iterator
- current, end;
- for (current = index.begin(), end = index.end(); current != end; ++current) {
- auto rule = rules.find(*current);
- if (rule != rules.end()) {
- result.append(rule->second);
- } else {
- result.append(*current);
- }
- }
- return result;
- }
- //*/
- // -------------------
- /*
- source taken from:
- http://codereview.stackexchange.com/questions/48872/measuring-execution-time-in-c
- */
- template<typename TimeT = std::chrono::milliseconds>
- struct measure {
- template<typename F, typename ...Args>
- static typename TimeT::rep execution(F func, Args&&... args) {
- auto start = std::chrono::system_clock::now();
- func(std::forward<Args>(args)...);
- auto duration = std::chrono::duration_cast<TimeT>(std::chrono::system_clock::now() - start);
- return duration.count();
- }
- };
- // -------------------
- static std::vector<std::string> words;
- void generate_test_data(void) {
- std::string tmp;
- words.clear();
- //lowercase letters
- tmp.clear();
- for (const auto & rule : rules_base) {
- tmp.append(lowercase(rule.first));
- }
- words.push_back(tmp);
- //uppercase letters
- tmp.clear();
- for (const auto & rule : rules_base) {
- tmp.append(uppercase(rule.first));
- }
- words.push_back(tmp);
- //generate some letter sequences
- std::minstd_rand gen;
- std::uniform_int_distribution<int> dist(0, letters.size() - 1);
- uint i, k, n;
- for (i = TEST_WORD_MIN_LEN; i < TEST_WORD_MAX_LEN + 1; ++i) {
- for (k = 0; k < TEST_WORD_SEQ_LEN; ++k) {
- tmp.clear();
- for (n = 0; n < i; ++n) {
- tmp.append(rules[letters[dist(gen)]]);
- }
- words.push_back(tmp);
- }
- }
- // std::cerr << "words.size = " << words.size() << std::endl;
- // std::cerr << "words.capacity = " << words.capacity() << std::endl;
- // std::cerr << "words.shrink_to_fit()" << std::endl;
- words.shrink_to_fit();
- // std::cerr << "words.capacity = " << words.capacity() << std::endl;
- }
- void run_test_1(void) {
- for (const auto & word : words) {
- #ifdef TEST_SILENT
- transliterate_1(word);
- #else
- std::cout << word << " -> " << transliterate_1(word) << std::endl;
- #endif
- }
- }
- /*
- void run_test_2(void) {
- for (const auto & word : words) {
- #ifdef TEST_SILENT
- transliterate_2(word);
- #else
- std::cout << word << " -> " << transliterate_2(word) << std::endl;
- #endif
- }
- }
- //*/
- void run_tests(void) {
- auto duration = measure<>::execution(nop);
- // std::cerr << "sleep 3 seconds before test #1" << std::endl;
- std::this_thread::sleep_for(std::chrono::seconds(3));
- std::cerr << "run test #1 ... ";
- duration = measure<>::execution(run_test_1);
- std::cerr << duration << " milliseconds" << std::endl;
- /*
- // std::cerr << "sleep 3 seconds before test #2" << std::endl;
- std::this_thread::sleep_for(std::chrono::seconds(3));
- std::cerr << "run test #2 ... ";
- duration = measure<>::execution(run_test_2);
- std::cerr << duration << " milliseconds" << std::endl;
- //*/
- }
- // -------------------
- int main(int argc, char* argv[]) {
- std::cerr
- << "test configuration:"
- << std::endl
- << " word length: [" << TEST_WORD_MIN_LEN << ".." << TEST_WORD_MAX_LEN << "]"
- << std::endl
- << " sample count: " << TEST_WORD_SEQ_LEN
- << std::endl;
- initialize_locale();
- initialize_engine();
- auto duration = measure<>::execution(nop);
- std::cerr << "generate data for tests ... ";
- duration = measure<>::execution(generate_test_data);
- std::cerr << duration << " milliseconds" << std::endl;
- // std::cerr << "run tests..." << std::endl;
- run_tests();
- return 0;
- }
- void nop(void) { ; }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement