Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/sh
- # Set these paths appropriately
- BIN=/mnt/minerva1/nlp/software/TreeTagger/bin
- CMD=/mnt/minerva1/nlp/software/TreeTagger/cmd
- LIB=/mnt/minerva1/nlp/software/TreeTagger/lib
- OPTIONS="-token -lemma -sgml -pt-with-lemma"
- TOKENIZER=${CMD}/tokenize.pl
- TAGGER=${BIN}/tree-tagger
- ABBR_LIST=${LIB}/english-abbreviations
- PARFILE=${LIB}/english.par
- LEXFILE=${LIB}/english-lexicon.txt
- $TOKENIZER -e -a $ABBR_LIST $* |
- # remove empty lines
- grep -v '^$' |
- # external lexicon lookup
- perl $CMD/lookup.perl $LEXFILE |
- # tagging
- $TAGGER $OPTIONS $PARFILE
- #!/bin/sh
- # In order to activate external lexicon lookup
- # you have to uncomment the respective line below
- # i.e. remove the hash symbol in front of it.
- # The external lexicon must be stored in a file named
- # "german-lexicon.txt" in the subdirectory "lib".
- # See the Perl script "lookup.perl" for more information
- # on the format of this file.
- # Set these paths appropriately
- BIN=/mnt/minerva1/nlp/software/TreeTagger/bin
- CMD=/mnt/minerva1/nlp/software/TreeTagger/cmd
- LIB=/mnt/minerva1/nlp/software/TreeTagger/lib
- OPTIONS="-token -lemma -sgml -pt-with-lemma"
- TOKENIZER=${CMD}/tokenize.pl
- TAGGER=${BIN}/tree-tagger
- ABBR_LIST=${LIB}/german-abbreviations
- PARFILE=${LIB}/german.par
- LEXFILE=${LIB}/german-lexicon.txt
- FILTER=${CMD}/filter-german-tags
- $TOKENIZER -a $ABBR_LIST $* |
- # external lexicon lookup
- perl $CMD/lookup.perl $LEXFILE |
- # tagging
- $TAGGER $OPTIONS $PARFILE |
- # error correction
- $FILTER
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement