Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const fs = require('fs');
- //https://ru.mouser.com/ProductDetail/Microchip-Technology/AVR128DA28T-I-SS/?qs=sGAEpiMZZMs0L%252B%252BydDbPCiYS%252Bs7zdMhfpTlyrC1fdwASDCTmYdXhoQ==
- class Parser {
- constructor() {
- this.defaultConf = {
- version: '0.1.16',
- results: {
- flat: [
- ['sku', 'Artikul'],
- ['data_txt', 'Full_data'],
- ['img_txt', 'Image'],
- ['path', 'Path'],
- ]
- },
- results_format: '$query\\n <path>$path</path>\\n <img>$img_txt</img>\\n <art>$sku</art>\\n $data_txt\\n',
- //"<url>$query</url>\n<code>$code</code>\n<image>$imageim</image>\n$maindata"
- resultsFileName: "results/ru.mouser.com/dataaa2222.txt",
- parsecodes: {
- 200: 1,
- },
- //максимальный размер документа
- max_size: 120000 * 1024,
- //включаем протоколирование http 2.0
- http2: true,
- proxyretries: 1000
- };
- }
- init() {
- if(!fs.existsSync('results/ru.mouser.com/images')) fs.mkdirSync('results/ru.mouser.com/images');
- }
- *parse(set, results) {
- return yield* set.lvl == 0 ? this.parseDATA(set, results) : this.download(set, results);
- }
- *parseDATA(set, results) {
- this.logger.put("Start scraping query: " + set.query);
- let response = yield this.request('GET', set.query, {}, {
- headers: {
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36',
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\naccept-encoding: gzip, deflate, br\naccept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6'
- },
- check_content: [' \| Mouser Российская Федерация</title>'],
- decode: 'auto-html',
- });
- if(response.success) {
- let sku = response.data.match(/\"sku\": \"(.+?)\"/i);
- if(sku)
- results.sku = sku[1].replace(/[\\\\|/|\\&|\\||\\+|\\$]/g, '');
- let data_txt = response.data.match(/<main role=\"main\">(.*?)<\/main>/is);
- if(data_txt)
- results.data_txt = data_txt[1];
- let img_txt = response.data.match(/\"image\": \"(.*?)\"/is);
- if(img_txt)
- {
- results.img_txt = img_txt[1];
- this.query.add(img_txt[1]);
- }
- return results;
- }
- results.success = response.success;
- return results;
- }
- *download(set, results) {
- this.logger.put(`img_txt ${set.query}`);
- if (set.query == "https://ru.mouser.com/images/no-image.gif")
- {
- results.SKIP = 1;
- }
- else
- {
- // в img_txt должна быть фотка
- let resp = yield this.request('GET', set.query, {}, {
- save_to_file: `./results/ru.mouser.com/images/${set.query.split('/').pop()}`,
- max_size: 0,
- do_gzip: 0
- });
- results.success = resp.success;
- if(resp.success)
- {
- this.logger.put(`фотку получили`);
- results.path = `/results/ru.mouser.com/images/${set.query.split('/').pop()}`;
- }
- else
- {
- this.logger.put('Download failed');
- }
- }
- return results;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement