Advertisement
sam65536

goQueryTest.go

Sep 8th, 2020
539
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Go 1.66 KB | None | 0 0
  1. //Parsing HTML in Go/Golang using goQuery to extract data from only one of multiple tables. Demonstrates nested Find statements.
  2.  
  3. package main
  4.  
  5. import (
  6.     "fmt"
  7.     "log"
  8.     "strings"
  9.  
  10.     "github.com/PuerkitoBio/goquery"
  11. )
  12.  
  13. func goGet() {
  14.     var headings, row []string
  15.     var rows [][]string
  16.  
  17.     data := `<html><body>
  18.     <table>
  19.         <tr><th>Heading 1</th><th>Heading two</th></tr>
  20.         <tr><td>Data 11</td><td>Data 12</td></tr>
  21.         <tr><td>Data 21</td><td>Data 22</td></tr>
  22.         <tr><td>Data 31</td><td>Data 32</td></tr>
  23.         <tr><td>Data 41</td><td>Data 42</td></tr>
  24.     </table>
  25.     <p>Stuff in here</p>
  26.     <table>
  27.         <tr><th>Heading 21</th><th>Heading 2two</th></tr>
  28.         <tr><td>Data 211</td><td>Data 212</td></tr>
  29.         <tr><td>Data 221</td><td>Data 222</td></tr>
  30.         <tr><td>Data 231</td><td><span></span><span><a href="">Data 232</a></span></td></tr>
  31.         <tr><td>Data 241</td><td>Data 242</td></tr>
  32.     </table>
  33.     </body>
  34.     </html>
  35.     `
  36.     doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
  37.     if err != nil {
  38.         fmt.Println("No url found")
  39.         log.Fatal(err)
  40.     }
  41.  
  42.     // Find each table
  43.     doc.Find("table").Each(func(index int, tablehtml *goquery.Selection) {
  44.         tablehtml.Find("tr").Each(func(indextr int, rowhtml *goquery.Selection) {
  45.             rowhtml.Find("th").Each(func(indexth int, tableheading *goquery.Selection) {
  46.                 headings = append(headings, tableheading.Text())
  47.             })
  48.             rowhtml.Find("td").Each(func(indexth int, tablecell *goquery.Selection) {
  49.                 row = append(row, tablecell.Text())
  50.             })
  51.             rows = append(rows, row)
  52.             row = nil
  53.         })
  54.     })
  55.     fmt.Println("####### headings = ", len(headings), headings)
  56.     fmt.Println("####### rows = ", len(rows), rows)
  57. }
  58.  
  59. func main() {
  60.     goGet()
  61. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement