Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.lang.reflect.Array;
- import java.nio.charset.StandardCharsets;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.LinkedList;
- import java.util.List;
- public class LanguageDetector {
- HashMap<Integer>[] languageTables;
- LinkedList<String> languages;
- int n;
- int N;
- public LanguageDetector(int n, int N) {
- this.n = n;
- this.N = N;
- languageTables = new HashMap[N];
- languages = new LinkedList<>();
- }
- public void learnLanguage(String language, String text) {
- if (!languages.contains(language)) {
- languages.add(language);
- languageTables[languages.indexOf(language)] = new HashMap<>(N, 31);
- }
- HashMap<Integer> table;
- table = languageTables[languages.indexOf(language)];
- for (int i = 0; i <= text.length() - n; i++) {
- String ngram = text.substring(i, i + n);
- int count = table.get(ngram) == null ? 0 : table.get(ngram);
- table.put(ngram, count + 1);
- }
- }
- public int getCount(String ngram, String language) {
- if (!languages.contains(language)) return 0;
- HashMap<Integer> table = languageTables[languages.indexOf(language)];
- return table.get(ngram) == null ? 0 : table.get(ngram);
- }
- public HashMap<Integer> apply(String text) {
- HashMap<Integer> votes = new HashMap<>(101, 31);
- for (int i = 0; i <= text.length() - n; i++) {
- String ngram = text.substring(i, i + n);
- if (ngram.equals(null) || ngram.isEmpty()) continue;
- HashMap<Integer> maxVotes = new HashMap<>(101, 31);
- int maxCount = 0;
- for (String language : languages) {
- int count = getCount(ngram, language);
- if (count > maxCount) {
- maxVotes.clear();
- maxVotes.put(language, 1);
- maxCount = count;
- } else if (count == maxCount) {
- int vote = maxVotes.get(language) != null ? maxVotes.get(language) : 0;
- maxVotes.put(language, vote + 1);
- }
- }
- String[] keys = maxVotes.keySet().toArray(new String[0]);
- for (String language : keys) {
- votes.put(language, (votes.get(language) != null ? votes.get(language) : 0) + maxVotes.get(language));
- }
- }
- return votes;
- }
- public static class HashMap<T> {
- public boolean remove(String key) {
- int hashCode = hashCode(key);
- int index = hashCode;
- for (int i = 0; i < table.length; i++) {
- if (table[index] != null && table[index].key.equals(key)) {
- table[index] = null;
- return true;
- }
- index = (index + 2 * i + 1) % table.length;
- }
- return false;
- }
- public void removeAll(String[] keySet) {
- for (String key : keySet) {
- remove(key);
- }
- }
- public String[] keySet() {
- List<String> keys = new ArrayList<>();
- for (Entry entry : table) {
- if (entry != null) {
- keys.add(entry.key);
- }
- }
- return keys.toArray(new String[0]);
- }
- public class Entry {
- String key;
- T value;
- Entry(String key, T value) {
- this.key = key;
- this.value = value;
- }
- }
- Entry[] table;
- int basis;
- public HashMap(int N, int basis) {
- table = (Entry[]) Array.newInstance(Entry.class, N);
- this.basis = basis;
- }
- public double fillRatio() {
- int count = 0;
- for (Entry e : table) {
- if (e != null) count++;
- }
- return (double) count / table.length;
- }
- public int hashCode(String s) {
- int hash = 0;
- byte[] bytes = s.getBytes(StandardCharsets.US_ASCII);
- for (int i = 0; i < bytes.length; i++) {
- hash = (hash * basis + bytes[i]) % table.length;
- }
- return hash;
- }
- public T get(String key) {
- int hashCode = hashCode(key);
- int index = hashCode;
- for (int i = 0; i < table.length; i++) {
- if (table[index] != null && table[index].key.equals(key)) return table[index].value;
- index = (index + 2 * i + 1) % table.length;
- }
- return null;
- }
- public boolean add(String key, T value) {
- int hash = hashCode(key);
- int index = hash;
- for (int i = 0; i < table.length; i++) {
- if (table[index] == null) {
- table[index] = new Entry(key, value);
- return true;
- }
- if (table[index].key.equals(key)) {
- table[index].value = value;
- return true;
- }
- index = (index + 2 * i + 1) % table.length;
- }
- return false;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement