Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System.Text.RegularExpressions;
- class TextParser
- {
- public static string[] GetRussianWords(string text)
- {
- Regex wordRegex = new Regex(@"[а-яА-Я]+");
- return wordRegex
- .Matches(text)
- .Select(x=> x.Value)
- .ToArray();
- }
- public static string[] GetWords(string text)
- {
- return text.Split(new char[] { ',', '!', '.', '\"', '\'', ',', '-', ':', '\n', '\r', '-', ' ', ' ', '-', (char)56 },
- StringSplitOptions.RemoveEmptyEntries);
- }
- }
- class TextUtils
- {
- public static int GetUnicueWordsCount(IDictionary<string, int> collection, IEnumerable<string> words)
- {
- foreach (var word in words)
- {
- if (collection.ContainsKey(word))
- {
- collection[word]++;
- }
- else
- {
- collection[word] = 1;
- }
- }
- var tenMostWords = collection
- .OrderByDescending(x => x.Value)
- .Take(10)
- .ToArray();
- //Console.WriteLine(String.Join(" ", tenMostWords));//.Select(x => (byte)x.Key[0])));
- return collection.Count;
- }
- }
- class Program
- {
- static void Main(string[] args)
- {
- IDictionary<string, int>[] collections = new IDictionary<string, int>[]
- {
- new SortedList<string, int>(),
- new SortedDictionary<string, int>(),
- new Dictionary<string, int>(),
- };
- var text = File.ReadAllText("Tolstoy_Lev_Voyna_i_mir_1-2.txt");
- var words = TextParser.GetWords(text);
- foreach (var collection in collections)
- {
- var startTime = DateTime.Now;
- var unicueWordsCount = TextUtils
- .GetUnicueWordsCount(new Dictionary<string, int>(), words);
- var endTime = DateTime.Now;
- Console.WriteLine($"Collection: {collection} | Time: {(endTime-startTime).Milliseconds}");
- }
- //Console.WriteLine(String.Join(" ", words));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement