Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.nio.charset.StandardCharsets;
- import java.util.*;
- public class LanguageDetector {
- HashMap<String, HashMap<String, Integer>> languageTables;
- LinkedList<String> languages;
- int n;
- int N;
- public LanguageDetector(int n, int N) {
- this.n = n;
- this.N = N;
- languageTables = new HashMap<>();
- languages = new LinkedList<>();
- }
- public void learnLanguage(String language, String text) {
- if (!languages.contains(language)) {
- languages.add(language);
- languageTables.put(language, new HashMap<>(N, 31));
- }
- HashMap<String, Integer> table = languageTables.get(language);
- for (int i = 0; i <= text.length() - n; i++) {
- String ngram = text.substring(i, i + n);
- int count = table.getOrDefault(ngram, 0);
- table.put(ngram, count + 1);
- }
- }
- public int getCount(String ngram, String language) {
- if (!languages.contains(language)) return 0;
- HashMap<String, Integer> table = languageTables.get(language);
- return table.getOrDefault(ngram, 0);
- }
- public HashMap<String, Integer> apply(String text) {
- HashMap<String, Integer> votes = new HashMap<>();
- for (int i = 0; i <= text.length() - n; i++) {
- String ngram = text.substring(i, i + n);
- if (ngram.equals(null) || ngram.isEmpty()) continue;
- HashMap<String, Integer> maxVotes = new HashMap<>();
- int maxCount = 0;
- for (String language : languages) {
- int count = getCount(ngram, language);
- if (count > maxCount) {
- maxVotes.clear();
- maxVotes.put(language, 1);
- maxCount = count;
- } else if (count == maxCount) {
- maxVotes.put(language, maxVotes.getOrDefault(language, 0) + 1);
- }
- }
- for (String language : maxVotes.keySet()) {
- int currentVotes = votes.getOrDefault(language, 0);
- int maxVote = maxVotes.get(language);
- votes.put(language, currentVotes + maxVote);
- }
- }
- return votes;
- }
- public static void main(String[] args) {
- LanguageDetector detector = new LanguageDetector(3, 101);
- detector.learnLanguage("english", "This is an example of English text");
- detector.learnLanguage("german", "Dies ist ein Beispiel für deutschen Text");
- detector.learnLanguage("croatian", "Ovo je primjer hrvatskog teksta");
- String testText = "This is a test text written in English, German, and Croatian";
- HashMap<String, Integer> result = detector.apply(testText);
- // Sort the languages based on the number of votes in descending order
- List<Map.Entry<String, Integer>> sortedEntries = new ArrayList<>(result.entrySet());
- sortedEntries.sort(Map.Entry.comparingByValue(Comparator.reverseOrder()));
- // Get the maximum number of votes
- int maxVotes = sortedEntries.get(0).getValue();
- // Filter the languages with the maximum number of votes
- List<String> preferredLanguages = new ArrayList<>();
- for (Map.Entry<String, Integer> entry : sortedEntries) {
- if (entry.getValue() == maxVotes) {
- preferredLanguages.add(entry.getKey());
- } else {
- break;
- }
- }
- // Sort the preferred languages lexicographically
- preferredLanguages.sort(Comparator.naturalOrder());
- System.out.println("HashMap returned:");
- for (Map.Entry<String, Integer> entry : sortedEntries) {
- System.out.println(entry.getKey() + ":" + entry.getValue());
- }
- System.out.println("Preferred language(s): " + preferredLanguages);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement