Advertisement
krot

whoistory crawler

Jul 8th, 2020
1,942
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.44 KB | None | 0 0
  1. <?php
  2. ##http://proxy-base.com/208173-post6.html
  3. require "vendor/autoload.php";
  4.  
  5. use Amp\Http\Client\HttpClientBuilder;
  6. use Amp\Http\Client\Request;
  7. use Amp\Loop;
  8. use Symfony\Component\DomCrawler\Crawler;
  9.  
  10. $from = new DateTime('2006-06-01');
  11. $to = new DateTime('2020-05-02');
  12.  
  13.  
  14. Loop::run(function () use (&$from, $to) {
  15.     $client = HttpClientBuilder::buildDefault();
  16.     $handle = \Amp\File\open("domains.txt", "w");
  17.  
  18.     while ($from < $to) {
  19.         $uri = "https://whoistory.com/" . $from->format('/Y/m/d/');
  20.         $response = yield $client->request(new Request($uri, 'GET'));
  21.  
  22.         if ($response->getStatus() == 200){
  23.             $crawler = new Crawler((string) yield $response->getBody()->buffer());
  24.             $links = $crawler->filter('div.left > a');
  25.             $links->each(function ($node) use ($handle) {
  26.                 if(substr($node->attr('href'), 0, -1) != null && $node->attr('class') != "backlink") {
  27.                     $handle->onResolve(function ($error, $result) use ($node) {
  28.                         if ($error !== null) {
  29.                             exit($error->getMessage());
  30.                         }
  31.                         $write = $result->write($node->text() . "\n");
  32.                     });
  33.                 }
  34.             });
  35.             echo $uri . ' : ' . $links->count() . PHP_EOL;
  36.         } else {
  37.             echo $uri . " 404" . PHP_EOL;
  38.         }
  39.         $from->modify("+ 1 day");
  40.     }
  41. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement