jules0707

Wordnet.java

Jan 7th, 2021 (edited)
224
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 6.67 KB | None | 0 0
  1. /*
  2. Correctness:  36/36 tests passed
  3. Memory:       4/4 tests passed
  4. Timing:       26/27 tests passed
  5. Aggregate score: 99.26%
  6.  
  7.  
  8. ********************************************************************************
  9. *  TIMING (substituting reference SAP)
  10. ********************************************************************************
  11.  
  12. Timing WordNet
  13. *-----------------------------------------------------------
  14. Running 11 total tests.
  15.  
  16. Test 2: count number of SAP operations when constructing a WordNet object
  17.         and calling distance() and sap() three times each
  18.   * calls to constructor = 1
  19.   * calls to length()    = 6
  20.   * calls to ancestor()  = 12
  21.     - minimum required   = 3
  22.     - maximum allowed    = 6
  23. ==> FAILED
  24. */
  25.  
  26. import edu.princeton.cs.algs4.Bag;
  27. import edu.princeton.cs.algs4.Digraph;
  28. import edu.princeton.cs.algs4.In;
  29. import edu.princeton.cs.algs4.ST;
  30. import edu.princeton.cs.algs4.StdOut;
  31. import edu.princeton.cs.algs4.Topological;
  32.  
  33. import java.util.ArrayList;
  34. import java.util.Arrays;
  35. import java.util.Collections;
  36. import java.util.List;
  37.  
  38. public class WordNet {
  39.  
  40.     private final ST<Integer, Bag<String>> st1; // Dictionary ids to all the words of a synset noun
  41.     private final ST<String, Bag<Integer>> st2; // Dictionary nouns to all its hypernyms
  42.     private final SAP sap;
  43.  
  44.     // constructor takes the name of the two input files
  45.     public WordNet(String synsets, String hypernyms) {
  46.         // read and save the synsets file
  47.         In in1 = new In(synsets);
  48.         String[] linesSynsets = in1.readAllLines();
  49.         // number of synsets
  50.         int n = linesSynsets.length;
  51.         st2 = new ST<>();
  52.         st1 = new ST<>();
  53.  
  54.         String[] synsetsLineArray;
  55.         for (String line : linesSynsets) { // for each read line
  56.             synsetsLineArray = line.split(",");
  57.             for (String word : synsetsLineArray[1].split(" ")) { // for each noun  in1 a given synset
  58.                 int id = Integer.parseInt(synsetsLineArray[0]);
  59.  
  60.                 // build first symbol table
  61.                 if (!st1.contains(id)) {
  62.                     st1.put(id, new Bag<>());
  63.                 }
  64.                 Bag<String> bag1 = st1.get(id);
  65.                 bag1.add(word);
  66.  
  67.                 //build second symbol table
  68.                 if (!st2.contains(word)) {
  69.                     st2.put(word, new Bag<>());
  70.                 }
  71.                 Bag<Integer> bag2 = st2.get(word);
  72.                 bag2.add(id);
  73.             }
  74.         }
  75.  
  76.         // build of Digraph
  77.         Digraph wnDigraph = new Digraph(n);
  78.  
  79.         // read and save the hypernyms file
  80.         List<String> hypernymLine; // hypernymsLineArray list as we do not know how many there are
  81.         In in2 = new In(hypernyms);
  82.         String[] lines2 = in2.readAllLines();
  83.         int roots = 0; // the number of roots of the graph
  84.  
  85.         for (String line : lines2) {
  86.             hypernymLine = Arrays.asList(line.split(","));
  87.             for (int h = 1; h < hypernymLine.size(); h++) {
  88.                 int v = Integer.parseInt(hypernymLine.get(0));
  89.                 int w = Integer.parseInt(hypernymLine.get(h));
  90.                 // build the digraph
  91.                 wnDigraph.addEdge(v, w);
  92.             }
  93.         }
  94.         // rooted?
  95.         for (int v : st1) {
  96.             if (wnDigraph.outdegree(v) == 0) {
  97.                 roots += 1;
  98.             }
  99.         }
  100.  
  101.         Topological topological = new Topological(wnDigraph);
  102.         // is this graph a DAG?
  103.         if (!topological.hasOrder() || roots > 1) {
  104.             throw new IllegalArgumentException();
  105.         }
  106.         sap = new SAP(wnDigraph);
  107.     }
  108.  
  109.     // returns all WordNet nouns
  110.     public Iterable<String> nouns() {
  111.         ArrayList<String> nouns = new ArrayList<>();
  112.         for (String noun : st2) {
  113.             nouns.add(noun);
  114.         }
  115.         return nouns;
  116.     }
  117.  
  118.     // is the word a WordNet noun?
  119.     public boolean isNoun(String word) {
  120.         try {
  121.             return null != st2.get(word);
  122.         } catch (NullPointerException e) {
  123.             throw new IllegalArgumentException();
  124.         }
  125.     }
  126.  
  127.     // distance between nounA and nounB (defined below)
  128.     public int distance(String nounA, String nounB) {
  129.         return findDistanceAncestor(nounA, nounB)[0];
  130.     }
  131.  
  132.     // a synset (second field of synsets.txt) that is the common ancestor of nounA and nounB
  133.     // in a shortest ancestral path (defined below)
  134.     public String sap(String nounA, String nounB) {
  135.         int idAncestor = findDistanceAncestor(nounA, nounB)[1];
  136.         String res = " ";
  137.         ArrayList<String> resArray = new ArrayList<>();
  138.         for (String s : st1.get(idAncestor)) {
  139.             resArray.add(s);
  140.         }
  141.         Collections.reverse(resArray);
  142.         StringBuilder sb = new StringBuilder();
  143.         for (String s : resArray) {
  144.             sb.append(s);
  145.             sb.append(" ");
  146.         }
  147.         res = sb.toString();
  148.         return res;
  149.     }
  150.  
  151.  
  152.     private int[] findDistanceAncestor(String nounA, String nounB) {
  153.         ArrayList<Integer> idsNounA = new ArrayList<>();
  154.         ArrayList<Integer> idsNounB = new ArrayList<>();
  155.         Bag<Integer> idsNounABag;
  156.         Bag<Integer> idsNounBBag;
  157.  
  158.         int[] res = new int[2];
  159.         res[0] = -1;
  160.         res[1] = -1;
  161.  
  162.         if (isNoun(nounA) && isNoun(nounB)) {
  163.             idsNounABag = st2.get(nounA);
  164.             idsNounBBag = st2.get(nounB);
  165.  
  166.             for (Integer id : idsNounABag) {
  167.                 idsNounA.add(id);
  168.             }
  169.  
  170.             for (Integer id : idsNounBBag) {
  171.                 idsNounB.add(id);
  172.             }
  173.  
  174.             res[0] = sap.length(idsNounA, idsNounB);
  175.             res[1] = sap.ancestor(idsNounA, idsNounB);
  176.  
  177.             return res;
  178.  
  179.         } else throw new IllegalArgumentException();
  180.     }
  181.  
  182.     // do unit testing of this class
  183.     public static void main(String[] args) {
  184.  
  185.         // read the files
  186.         WordNet wordNet = new WordNet(args[0], args[1]);
  187.  
  188.         //StdOut.println(" All the wordNet nouns are :  " + wordNet.nouns());
  189.         String nounA = "n"; // Black_Plague, black_marlin
  190.         String nounB = "b";
  191.         //String nounC = "0,'hood,(slang)";
  192.  
  193.         //StdOut.println("noun : " + nounA + " is " + wordNet.isNoun(nounA));
  194. //        StdOut.println("noun : " + nounB + " is " + wordNet.isNoun(nounB));
  195.         //StdOut.print("WordNet contains " + word1 + " is " + wordNet.isNoun(word1));
  196.         //StdOut.print("distance between " + nounA + " and " + nounB + " is " + wordNet.distance(nounA, nounB) + "\n");
  197.         StdOut.print("a shortest common ancestor between :" + nounA + " and : " + nounB + " is  " + wordNet.sap(nounA, nounB) + "\n");
  198.     }
  199. }
  200.  
Add Comment
Please, Sign In to add comment