Advertisement
Kreol2013

Untitled

Mar 11th, 2021
24
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.74 KB | None | 0 0
  1. const fs = require('fs');
  2. //https://ru.mouser.com/ProductDetail/Microchip-Technology/AVR128DA28T-I-SS/?qs=sGAEpiMZZMs0L%252B%252BydDbPCiYS%252Bs7zdMhfpTlyrC1fdwASDCTmYdXhoQ==
  3. class Parser {
  4. constructor() {
  5. this.defaultConf = {
  6. version: '0.1.16',
  7. results: {
  8. flat: [
  9. ['sku', 'Artikul'],
  10. ['data_txt', 'Full_data'],
  11. ['img_txt', 'Image'],
  12. ['path', 'Path'],
  13. ]
  14. },
  15. results_format: '$query\\n <path>$path</path>\\n <img>$img_txt</img>\\n <art>$sku</art>\\n $data_txt\\n',
  16. //"<url>$query</url>\n<code>$code</code>\n<image>$imageim</image>\n$maindata"
  17. resultsFileName: "results/ru.mouser.com/dataaa2222.txt",
  18. parsecodes: {
  19. 200: 1,
  20. },
  21. //максимальный размер документа
  22. max_size: 120000 * 1024,
  23. //включаем протоколирование http 2.0
  24. http2: true,
  25. proxyretries: 1000
  26.  
  27. };
  28. }
  29.  
  30. init() {
  31. if(!fs.existsSync('results/ru.mouser.com/images')) fs.mkdirSync('results/ru.mouser.com/images');
  32. }
  33.  
  34. *parse(set, results) {
  35. return yield* set.lvl == 0 ? this.parseDATA(set, results) : this.download(set, results);
  36. }
  37.  
  38. *parseDATA(set, results) {
  39. this.logger.put("Start scraping query: " + set.query);
  40.  
  41. let response = yield this.request('GET', set.query, {}, {
  42. headers: {
  43. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36',
  44. 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\naccept-encoding: gzip, deflate, br\naccept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6'
  45. },
  46. check_content: [' \| Mouser Российская Федерация</title>'],
  47. decode: 'auto-html',
  48. });
  49.  
  50. if(response.success) {
  51. let sku = response.data.match(/\"sku\": \"(.+?)\"/i);
  52. if(sku)
  53. results.sku = sku[1].replace(/[\\\\|/|\\&|\\||\\+|\\$]/g, '');
  54. let data_txt = response.data.match(/<main role=\"main\">(.*?)<\/main>/is);
  55. if(data_txt)
  56. results.data_txt = data_txt[1];
  57. let img_txt = response.data.match(/\"image\": \"(.*?)\"/is);
  58. if(img_txt)
  59. {
  60. results.img_txt = img_txt[1];
  61. this.query.add(img_txt[1]);
  62. }
  63.  
  64. return results;
  65.  
  66. }
  67.  
  68. results.success = response.success;
  69.  
  70. return results;
  71. }
  72.  
  73. *download(set, results) {
  74. this.logger.put(`img_txt ${set.query}`);
  75. if (set.query == "https://ru.mouser.com/images/no-image.gif")
  76. {
  77. results.SKIP = 1;
  78. }
  79. else
  80. {
  81.  
  82.  
  83. // в img_txt должна быть фотка
  84. let resp = yield this.request('GET', set.query, {}, {
  85. save_to_file: `./results/ru.mouser.com/images/${set.query.split('/').pop()}`,
  86. max_size: 0,
  87. do_gzip: 0
  88. });
  89. results.success = resp.success;
  90. if(resp.success)
  91. {
  92. this.logger.put(`фотку получили`);
  93. results.path = `/results/ru.mouser.com/images/${set.query.split('/').pop()}`;
  94. }
  95. else
  96. {
  97. this.logger.put('Download failed');
  98. }
  99. }
  100.  
  101. return results;
  102. }
  103. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement