Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // =========================================
- // parse a string for words,money or numbers
- // report each type count
- //=========================================
- #include <iostream>
- #define COMMA_SEPARATE
- //#define REMOVE_CURRENCY
- //#define BALTIC_CODEPAGE_775
- //#define CYRILLIC_CODE_PAGE_855
- //#define CYRILLIC_CODE_PAGE_866
- struct parsed_word_t {
- unsigned char ourText[100];
- unsigned char ourNums[100];
- unsigned char ourMoneys[100];
- int letters;
- int digits;
- int spaces;
- int controlChars;
- int symbols;
- };
- void statOfStr(unsigned char* str, int* letters, int* digits, int* symbols, int* spaces, int* controlChars)
- {
- while (*str)
- {
- if ((*str >= 65 and *str <= 90) or (*str >= 97 and *str <= 122)) { (*letters)++; }
- else if (*str >= 48 and *str <= 57) { (*digits)++; }
- else if (*str == 32 or *str == 9) { (*spaces)++; }
- else if (*str <= 31) { (*controlChars)++; }
- else { (*symbols)++; }
- str++;
- }
- }
- // this expression will parse the incoming message counting charactures
- // and placing message, money or numbers into separate buffers
- // if you define COMMA_SEPARATE it will separate the input message strings and moneys by commas
- // e.g. bread $10 milk $1.12 sugar £1.54 shirt £25.60
- // bread , milk, sugar, shirt,
- // $10, $1.12, £1.54, £25.60
- // so you can send an update from the user to the bot to update a price map for example
- void parseExpression(unsigned char* str, parsed_word_t* parsedObj)
- {
- int cnt = 0;
- int cnt1 = 0;
- int cnt2 = 0;
- int lastItem = 0;
- while (*str)
- {
- if ((*str >= 65 and *str <= 90) or (*str >= 97 and *str <= 122)) /* normal letters */
- {
- #if defined(COMMA_SEPARATE)
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = ',';
- ++cnt1;
- }
- #endif
- (parsedObj->letters)++;
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- lastItem = 1;
- }
- #if defined(BALTIC_CODEPAGE_775)
- else if (((((((*str >= 128 and *str <= 149) or (*str >= 151 and *str <= 155)) or (*str == 157)) or (*str >= 160 and *str <= 165)) or (*str >= 198 and *str <= 199)) or (*str >= 207 and *str <= 216)) or (*str >= 224 and *str <= 238)) /* extended letters */
- {
- #if defined(COMMA_SEPARATE)
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = ',';
- ++cnt1;
- }
- #endif
- (parsedObj->letters)++;
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- lastItem = 1;
- }
- #elif defined(CYRILLIC_CODE_PAGE_855)
- else if ((((((((*str >= 128 and *str <= 173) or (*str >= 181 and *str <= 184)) or (*str >= 189 and *str <= 190)) or (*str >= 198 and *str <= 199)) or (*str >= 208 and *str <= 216)) or (*str >= 221 and *str <= 222)) or (*str >= 224 and *str <= 238)) or (*str >= 241 and *str <= 252)) /* extended letters code page 855 Cyrillic */
- {
- #if defined(COMMA_SEPARATE)
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = ',';
- ++cnt1;
- }
- #endif
- (parsedObj->letters)++;
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- lastItem = 1;
- }
- #elif defined(CYRILLIC_CODE_PAGE_866)
- else if ((*str >= 128 and *str <= 175) or (*str >= 224 and *str <= 247)) /* extended letters code page 855 Cyrillic */
- {
- #if defined(COMMA_SEPARATE)
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = ',';
- ++cnt1;
- }
- #endif
- (parsedObj->letters)++;
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- lastItem = 1;
- }
- #endif // end code page choice
- else if (*str >= 48 and *str <= 57) /* its a number */
- {
- (parsedObj->digits)++;
- if ((lastItem == 5) || (lastItem == 7))
- {
- parsedObj->ourMoneys[cnt1] = *str;
- ++cnt1;
- lastItem = 5;
- }
- else
- {
- parsedObj->ourNums[cnt2] = *str;
- ++cnt2;
- lastItem = 2;
- }
- }
- else if ((*str == 13) or (*str == 10)) /* CR or LF */
- {
- parsedObj->ourText[cnt] = ' ';
- ++cnt;
- lastItem = 9;
- }
- else if (*str == 46) /* full stop */
- {
- (parsedObj->digits)++;
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = *str;
- ++cnt1;
- lastItem = 7;
- }
- else
- {
- parsedObj->ourNums[cnt2] = *str;
- ++cnt2;
- lastItem = 3;
- }
- }
- else if (*str == 32 or *str == 9) /* space or tab */
- {
- (parsedObj->spaces)++;
- switch (lastItem)
- {
- case 1:
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- break;
- case 2:
- parsedObj->ourNums[cnt2] = *str;
- ++cnt2;
- break;
- case 3:
- #if defined(COMMA_SEPARATE)
- parsedObj->ourNums[cnt2] = '0';
- parsedObj->ourNums[cnt2+1] = ',';
- parsedObj->ourNums[cnt2+2] = *str;
- cnt2+=3;
- #else
- parsedObj->ourNums[cnt2] = '0';
- parsedObj->ourNums[cnt2+1] = *str;
- cnt2+=2;
- #endif
- break;
- case 5:
- #if defined(COMMA_SEPARATE)
- parsedObj->ourMoneys[cnt1] = ',';
- parsedObj->ourMoneys[cnt1+1] = *str;
- cnt1+=2;
- #else
- parsedObj->ourMoneys[cnt1] = *str;
- ++cnt1;
- #endif
- break;
- case 6:
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- break;
- case 7:
- #if defined(COMMA_SEPARATE)
- parsedObj->ourMoneys[cnt1] = '0';
- parsedObj->ourMoneys[cnt1+1] = ',';
- parsedObj->ourMoneys[cnt1+2] = *str;
- cnt1+=3;
- #else
- parsedObj->ourMoneys[cnt1] = '0';
- parsedObj->ourMoneys[cnt1+1] = *str;
- cnt1+=2;
- #endif
- break;
- case 8:
- parsedObj->ourText[cnt] = *str;
- ++cnt;
- break;
- }
- lastItem = 4;
- }
- else if ((*str == 36) || ((*str == 163) || (*str == 219))) // preceded by a currency identifier of $ £ or euro
- {
- #if defined(COMMA_SEPARATE)
- parsedObj->ourText[cnt] = ',';
- ++cnt;
- #endif
- #if defined(REMOVE_CURRENCY)
- lastItem = 5;
- #else
- parsedObj->ourMoneys[cnt1] = *str;
- ++cnt1;
- lastItem = 5;
- #endif
- }
- else if (*str <= 31) /* control char */
- {
- #if defined(COMMA_SEPARATE)
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = ',';
- ++cnt1;
- }
- #endif
- (parsedObj->controlChars)++;
- lastItem = 6;
- }
- else /* it was a symbol */
- {
- #if defined(COMMA_SEPARATE)
- if (lastItem == 5)
- {
- parsedObj->ourMoneys[cnt1] = ',';
- ++cnt1;
- }
- #endif
- (parsedObj->symbols)++;
- lastItem = 8;
- }
- str++;
- }
- }
- int main()
- {
- //unsigned char str[170]{ "Hello! How are you?\n4.5.0. $35.0 æble £12. this is the text --11.74 120 $11.76 -- $16000. £11.57 and & spaces!!! new word" };
- unsigned char str[170]{ "dog food : $35.0 bread : £12. random numbers grøn : 11.74 æble £1.11 120 records $11.76 -- new car , $16000. milk £11.57 and & spaces!!! čo to je" };
- //unsigned char str[170]{ "dog food : $3.10 bread : £12. grøn æble : 11.74 raw whole milk £1.11 leather jacket 120 records $11.76 -- new car , $16000. milk £11.57 and & spaces!!! čo to je" };
- int letters{ 0 };
- int digits{ 0 };
- int symbols{ 0 };
- int spaces{ 0 };
- int control{ 0 };
- parsed_word_t pWord;
- std::cout << str << "\n\n";
- statOfStr(str, &letters, &digits, &symbols, &spaces, &control);
- std::cout << "In this string there are:\n" << letters << " letters\n" << symbols << " symbols\n" << digits << " digits\n" << control << " control chars.\n" << spaces << " spaces\n";
- parseExpression(str, &pWord);
- std::string printitout;
- std::string printitmoneys;
- std::string printitnumbers;
- char * pout = (char*) &pWord.ourText[0];
- printitout.append(pout);
- pout = (char*) &pWord.ourMoneys[0];
- printitmoneys.append(pout);
- printf("%s",pWord.ourNums);
- pout = (char*) &pWord.ourNums[0];
- printitnumbers.append(pout);
- std::cout << printitout << "\nmoney " << printitmoneys << "\nnums " << printitnumbers << std::endl;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement