Advertisement
CR7CR7

LanguageDetector

Jun 25th, 2023
936
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.88 KB | None | 0 0
  1. import java.nio.charset.StandardCharsets;
  2. import java.util.*;
  3.  
  4. public class LanguageDetector {
  5.     HashMap<String, HashMap<String, Integer>> languageTables;
  6.     LinkedList<String> languages;
  7.     int n;
  8.     int N;
  9.  
  10.     public LanguageDetector(int n, int N) {
  11.         this.n = n;
  12.         this.N = N;
  13.         languageTables = new HashMap<>();
  14.         languages = new LinkedList<>();
  15.     }
  16.  
  17.     public void learnLanguage(String language, String text) {
  18.         if (!languages.contains(language)) {
  19.             languages.add(language);
  20.             languageTables.put(language, new HashMap<>(N, 31));
  21.         }
  22.  
  23.         HashMap<String, Integer> table = languageTables.get(language);
  24.         for (int i = 0; i <= text.length() - n; i++) {
  25.             String ngram = text.substring(i, i + n);
  26.             int count = table.getOrDefault(ngram, 0);
  27.             table.put(ngram, count + 1);
  28.         }
  29.     }
  30.  
  31.     public int getCount(String ngram, String language) {
  32.         if (!languages.contains(language)) return 0;
  33.         HashMap<String, Integer> table = languageTables.get(language);
  34.         return table.getOrDefault(ngram, 0);
  35.     }
  36.  
  37.     public HashMap<String, Integer> apply(String text) {
  38.         HashMap<String, Integer> votes = new HashMap<>();
  39.         for (int i = 0; i <= text.length() - n; i++) {
  40.             String ngram = text.substring(i, i + n);
  41.             if (ngram.equals(null) || ngram.isEmpty()) continue;
  42.             HashMap<String, Integer> maxVotes = new HashMap<>();
  43.             int maxCount = 0;
  44.             for (String language : languages) {
  45.                 int count = getCount(ngram, language);
  46.                 if (count > maxCount) {
  47.                     maxVotes.clear();
  48.                     maxVotes.put(language, 1);
  49.                     maxCount = count;
  50.                 } else if (count == maxCount) {
  51.                     maxVotes.put(language, maxVotes.getOrDefault(language, 0) + 1);
  52.                 }
  53.             }
  54.             for (String language : maxVotes.keySet()) {
  55.                 int currentVotes = votes.getOrDefault(language, 0);
  56.                 int maxVote = maxVotes.get(language);
  57.                 votes.put(language, currentVotes + maxVote);
  58.             }
  59.         }
  60.         return votes;
  61.     }
  62.  
  63.     public static void main(String[] args) {
  64.         LanguageDetector detector = new LanguageDetector(3, 101);
  65.         detector.learnLanguage("english", "This is an example of English text");
  66.         detector.learnLanguage("german", "Dies ist ein Beispiel für deutschen Text");
  67.         detector.learnLanguage("croatian", "Ovo je primjer hrvatskog teksta");
  68.  
  69.         String testText = "This is a test text written in English, German, and Croatian";
  70.         HashMap<String, Integer> result = detector.apply(testText);
  71.  
  72.         // Sort the languages based on the number of votes in descending order
  73.         List<Map.Entry<String, Integer>> sortedEntries = new ArrayList<>(result.entrySet());
  74.         sortedEntries.sort(Map.Entry.comparingByValue(Comparator.reverseOrder()));
  75.  
  76.         // Get the maximum number of votes
  77.         int maxVotes = sortedEntries.get(0).getValue();
  78.  
  79.         // Filter the languages with the maximum number of votes
  80.         List<String> preferredLanguages = new ArrayList<>();
  81.         for (Map.Entry<String, Integer> entry : sortedEntries) {
  82.             if (entry.getValue() == maxVotes) {
  83.                 preferredLanguages.add(entry.getKey());
  84.             } else {
  85.                 break;
  86.             }
  87.         }
  88.  
  89.         // Sort the preferred languages lexicographically
  90.         preferredLanguages.sort(Comparator.naturalOrder());
  91.  
  92.         System.out.println("HashMap returned:");
  93.         for (Map.Entry<String, Integer> entry : sortedEntries) {
  94.             System.out.println(entry.getKey() + ":" + entry.getValue());
  95.         }
  96.  
  97.         System.out.println("Preferred language(s): " + preferredLanguages);
  98.     }
  99. }
  100.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement