Advertisement
metalx1000

Puppeteer page scraper

Jan 13th, 2024 (edited)
2,170
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. mkdir scraper
  2. cd  scraper
  3. npm i puppeteer --save
  4.  
  5. ######SCRIPT######
  6. const puppeteer = require('puppeteer');
  7. const url = process.argv[2];
  8. if (!url) {
  9.     throw "Please provide URL as a first argument";
  10. }
  11.  
  12. function run () {
  13.     return new Promise(async (resolve, reject) => {
  14.         try {
  15.             const browser = await puppeteer.launch({headless: 'new'});
  16.             const page = await browser.newPage();
  17.             await page.goto(url);
  18.             let urls = await page.evaluate(() => {
  19.                 let results = [];
  20.                 let items = document.querySelectorAll('a');
  21.                 items.forEach((item) => {
  22.                     results.push({
  23.                         url:  item.getAttribute('href'),
  24.                         text: item.innerText,
  25.                     });
  26.                 });
  27.                 return results;
  28.             })
  29.             browser.close();
  30.             return resolve(urls);
  31.         } catch (e) {
  32.             return reject(e);
  33.         }
  34.     })
  35. }
  36. run().then(console.log).catch(console.error);
  37.  
  38. ########RUN#########
  39. node scrape.js "https://filmsbykris.com"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement