Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import java.util.*;
- import java.util.stream.Collectors;
- public class App {
- public static void main(String[] args) throws Exception {
- File directoryPath = new File("in/");
- String[] files = directoryPath.list();
- assert files != null;
- if (files.length < 10) {
- throw new NotEnoughFilesException("Not enough files in the directory");
- }
- for (var fileName : files) {
- File file = new File("in/" + fileName);
- if (file.length() < 150 * 1024) {
- throw new FileTooSmallException("File " + fileName + " is too small");
- }
- }
- LinkedHashMap<String, Integer> dict = new LinkedHashMap<>();
- for (var fileName : files) {
- File file = new File("in/" + fileName);
- BufferedReader reader = new BufferedReader(new FileReader(file));
- String st;
- StringBuilder contents = new StringBuilder();
- while ((st = reader.readLine()) != null) {
- contents.append(st).append("\n");
- }
- String[] words = contents.toString().split("\\s");
- for (var word : words) {
- if (word.length() > 0) {
- String fmt = word.replaceAll("[^\\w\\p{sc=Cyrillic}]+", "").toLowerCase();
- int count;
- if (dict.containsKey(fmt)) {
- count = dict.get(fmt) + 1;
- } else {
- count = 1;
- }
- dict.put(fmt, count);
- }
- }
- }
- LinkedHashMap<String, Integer> sortedDict = dict.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
- int wordCount = sortedDict.values().stream().mapToInt(i -> i).sum();
- int termCount = sortedDict.size();
- File file = new File("dict.txt");
- BufferedWriter writer = new BufferedWriter(new FileWriter(file));
- for (var entry : sortedDict.entrySet()) {
- writer.write(entry.getKey() + " -- " + entry.getValue() + "\n");
- }
- writer.flush();
- long size = file.length();
- System.out.printf("Dictionary size: %.2f KB\n", (double) size / 1024.0);
- System.out.println("Word count: " + wordCount);
- System.out.println("Term count: " + termCount);
- writer.close();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement