Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/sh
- # Set these paths appropriately
- BIN=/mnt/minerva1/nlp/software/TreeTagger/bin
- CMD=/mnt/minerva1/nlp/software/TreeTagger/cmd
- LIB=/mnt/minerva1/nlp/software/TreeTagger/lib
- OPTIONS="-token -lemma -sgml -pt-with-lemma"
- TOKENIZER=${CMD}/tokenize.pl
- TAGGER=${BIN}/tree-tagger
- ABBR_LIST=${LIB}/english-abbreviations
- PARFILE=${LIB}/english.par
- LEXFILE=${LIB}/english-lexicon.txt
- $TOKENIZER -e -a $ABBR_LIST $* |
- # remove empty lines
- grep -v '^$' |
- # external lexicon lookup
- perl $CMD/lookup.perl $LEXFILE |
- # tagging
- $TAGGER $OPTIONS $PARFILE
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement