Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static class LanguageAutoDetect
- {
- private static int GetCount(string text, params string[] words)
- {
- var commonWordSet = new HashSet<string>(words);
- var boundaryCharsSet = new HashSet<char>
- {
- '.', ' ', '}', ']', ')', '(', '[', '{', '-', '|', '?', '!', '&', '\r', '\n', ',' ,':', '>', '<','\'', '"'
- };
- int len = text.Length;
- int l = 0;
- var count = 0;
- for (int r = 0; r < len; r++)
- {
- if (char.IsLetter(text[r]) && !char.IsLetter(text[l]))
- {
- l = r;
- }
- else if (boundaryCharsSet.Contains(text[r]) && char.IsLetter(text[l]) && r - l > 1)
- {
- string word = text.Substring(l, r - l);
- if (commonWordSet.Contains(word) || commonWordSet.Contains(word.ToLowerInvariant()))
- {
- count++;
- }
- l = r + 1;
- }
- else if (boundaryCharsSet.Contains(text[r]))
- {
- l = r + 1;
- }
- }
- return count;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement