Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const fs = require("fs");
- class Parser {
- constructor() {
- this.defaultConf = {
- version: "0.1.26",
- results: {
- flat: [
- ["sku", "Artikul"],
- ["data_txt", "Full_data"],
- ["img_txt", "Image"],
- ["path", "Path"],
- ],
- },
- results_format:
- "$query\\n <path>$path</path>\\n <img>$img_txt</img>\\n <art>$sku</art>\\n $data_txt\\n",
- parsecodes: {
- 200: 1,
- },
- //максимальный размер документа
- max_size: 10 * 1024 * 1024,
- //включаем протоколирование http 2.0
- http2: true,
- proxyretries: 1000,
- };
- }
- init() {
- if (!fs.existsSync("results/ru.mouser.com/images"))
- fs.mkdirSync("results/ru.mouser.com/images");
- }
- *parse(set, results) {
- return yield* set.lvl == 0
- ? this.parseDATA(set, results)
- : this.download(set, results);
- }
- //parseDATA(set, results1) {
- *parseDATA(set, results1) {
- this.logger.put("Start scraping query: " + set.query);
- let response = yield this.request(
- "GET",
- set.query,
- {},
- {
- headers: {
- "user-agent":
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36",
- accept:
- "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\naccept-encoding: gzip, deflate, br\naccept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6",
- },
- check_content: [" | Mouser Российская Федерация</title>"],
- decode: "auto-html",
- }
- );
- if (response.success) {
- let sku = response.data.match(/\"sku\": \"(.+?)\"/i);
- if (sku) results1.sku = sku[1].replace(/[\\\\|/|\\&|\\||\\+|\\$]/g, "");
- let data_txt = response.data.match(/<main role=\"main\">(.*?)<\/main>/is);
- if (data_txt) results1.data_txt = data_txt[1];
- let img_txt = response.data.match(/\"image\": \"(.*?)\"/is);
- if (img_txt) {
- if (img_txt[1] != "https://ru.mouser.com/images/no-image.gif") {
- results1.img_txt = img_txt[1];
- this.query.add(img_txt[1]);
- results1.path = `/results/ru.mouser.com/images/${img_txt[1]
- .split("/")
- .pop()}`;
- } else {
- results1.img_txt = "no-image.gif";
- }
- }
- //return results1;
- }
- results1.success = response.success;
- return results1;
- }
- *download(set, results2) {
- this.logger.put(`img_txt ${set.query}`);
- let resp = yield this.request(
- "GET",
- set.query,
- {},
- {
- save_to_file: `./results/ru.mouser.com/images/${set.query
- .split("/")
- .pop()}`,
- max_size: 10 * 1024 * 1024,
- do_gzip: 0,
- }
- );
- this.logger.put(`========Pics in save `);
- results2.SKIP = 1;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement