Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- #gets user input
- url="$1"
- #create uniq file name for this search
- searched="$(date +%s).lst"
- touch $searched
- echo "Saving to $searched"
- function crawl(){
- url="$1"
- #s will be used to check if url has already been searched
- s=0
- #check searched file for current url
- grep "$url" $searched && s=1 || s=0
- #if current url has been search skip it
- if [[ $s == 0 ]];
- then
- #add current url to searched list
- echo "$url" >> $searched
- echo "Crawling ${url}..."
- #get all urls on page
- lynx --dump "$url"|\
- sed 's/http/\nhttp/g'|\
- grep -e "^http:" -e "^https:"|\
- sed 's/%3A%2F%2F/:\/\//g'|\
- sort -u| while read line
- do
- #crawl through each url
- crawl "$line"
- done
- else
- #reset searched check
- s=0
- fi
- }
- #calls crawl function
- crawl "$url"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement