Advertisement
dimkiriaoks

books to scrape

Nov 15th, 2021
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.78 KB | None | 0 0
  1. <?php
  2.     # scraping books to scrape: https://books.toscrape.com/
  3.    require 'vendor/autoload.php';
  4.  
  5.     $url = "https://books.toscrape.com/";
  6.     $httpClient = new \GuzzleHttp\Client();
  7.     $response = $httpClient->get($url);
  8.     $htmlString = (string) $response->getBody();
  9.     //add this line to suppress any warnings
  10.     libxml_use_internal_errors(true);
  11.     $doc = new DOMDocument();
  12.     $doc->loadHTML($htmlString);
  13.     $xpath = new DOMXPath($doc);
  14.  
  15.     $titles = $xpath->evaluate('//ol[@class="row"]//li//article//h3/a');
  16.     $covers = $xpath->evaluate("//img[@class='thumbnail']");
  17.     $extractedTitles = [];
  18.     foreach ($titles as $title) {
  19.         $extractedTitles[] = $title->textContent.PHP_EOL;
  20.     }
  21.  
  22.     $extractedCovers = [];
  23.     foreach ($covers as $cover){
  24.         $extractedCovers[] = $cover->getAttribute('src');
  25.     }
  26.  
  27.     $books = [
  28.         'title' => $extractedTitles,
  29.         'cover' => $extractedCovers,
  30.     ]
  31. ?>
  32.  
  33. <!DOCTYPE html>
  34. <html lang="en">
  35. <head>
  36.     <meta charset="UTF-8">
  37.     <meta http-equiv="X-UA-Compatible" content="IE=edge">
  38.     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  39.     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" integrity="sha384-zCbKRCUGaJDkqS1kPbPd7TveP5iyJE0EjAuZQTgFLD2ylzuqKfdKlfG/eSrtxUkn" crossorigin="anonymous">
  40.     <title>books scrape</title>
  41. </head>
  42. <body>
  43.     <div class="container">
  44.        
  45.         <center>
  46.             <h1 style="text-decoration: underline;">Scraped Books</h1>
  47.         </center>
  48.         <div class="row">
  49.             <?php for ($i=0; $i < count($books['title']); $i++){ ?>
  50.                 <div class="col-xl-2 col-lg-2 col-12 mb-5">
  51.                     <div class="card">
  52.                         <img src="<?=$url . $books['cover'][$i]?>" height="200px" alt="">
  53.                         <div class="card-body" style="max-height: 100px; height: 100px;  overflow: auto;">
  54.                             <span ><?= $books['title'][$i] ?></span>
  55.                         </div>
  56.                     </div>
  57.                 </div>
  58.             <?php } ?>
  59.  
  60.         </div>
  61.     </div>
  62.  
  63.         <script src="https://cdn.jsdelivr.net/npm/jquery@3.5.1/dist/jquery.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
  64.         <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.1/dist/umd/popper.min.js" integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN" crossorigin="anonymous"></script>
  65.         <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js" integrity="sha384-VHvPCCyXqtD5DqJeNxl2dtTyhF78xXNXdkwX1CZeRusQfRKp+tA7hAShOK/B/fQ2" crossorigin="anonymous"></script>
  66. </body>
  67. </html>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement