Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package main
- import (
- "fmt"
- "sync"
- )
- type Fetcher interface {
- // Fetch returns the body of URL and
- // a slice of URLs found on that page.
- Fetch(url string) (body string, urls []string, err error)
- }
- type Seen struct {
- mu sync.Mutex
- seen map[string]bool
- }
- func (s *Seen) isSeen(url string) bool {
- s.mu.Lock()
- defer s.mu.Unlock()
- if val, ok := s.seen[url]; ok {
- return val
- }
- return false
- }
- func (s *Seen) markSeen(url string) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.seen[url] = true
- }
- var ss Seen = Seen{seen: make(map[string]bool)}
- // Crawl uses fetcher to recursively crawl
- // pages starting with url, to a maximum of depth.
- func Crawl(url string, depth int, fetcher Fetcher, wg *sync.WaitGroup) {
- // TODO: Fetch URLs in parallel.
- // TODO: Don't fetch the same URL twice.
- // This implementation doesn't do either:
- defer wg.Done()
- if depth <= 0 {
- return
- }
- body, urls, err := fetcher.Fetch(url)
- if err != nil {
- fmt.Println(err)
- return
- }
- fmt.Printf("found: %s %q\n", url, body)
- for _, u := range urls {
- if ss.isSeen(u) {
- continue
- }
- ss.markSeen(u)
- wg.Add(1)
- go Crawl(u, depth-1, fetcher, wg)
- }
- return
- }
- func main() {
- var wg sync.WaitGroup
- defer wg.Wait()
- wg.Add(1)
- go Crawl("https://golang.org/", 4, fetcher, &wg)
- }
- // fakeFetcher is Fetcher that returns canned results.
- type fakeFetcher map[string]*fakeResult
- type fakeResult struct {
- body string
- urls []string
- }
- func (f fakeFetcher) Fetch(url string) (string, []string, error) {
- if res, ok := f[url]; ok {
- return res.body, res.urls, nil
- }
- return "", nil, fmt.Errorf("not found: %s", url)
- }
- // fetcher is a populated fakeFetcher.
- var fetcher = fakeFetcher{
- "https://golang.org/": &fakeResult{
- "The Go Programming Language",
- []string{
- "https://golang.org/pkg/",
- "https://golang.org/cmd/",
- },
- },
- "https://golang.org/pkg/": &fakeResult{
- "Packages",
- []string{
- "https://golang.org/",
- "https://golang.org/cmd/",
- "https://golang.org/pkg/fmt/",
- "https://golang.org/pkg/os/",
- },
- },
- "https://golang.org/pkg/fmt/": &fakeResult{
- "Package fmt",
- []string{
- "https://golang.org/",
- "https://golang.org/pkg/",
- },
- },
- "https://golang.org/pkg/os/": &fakeResult{
- "Package os",
- []string{
- "https://golang.org/",
- "https://golang.org/pkg/",
- },
- },
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement