Advertisement
Kreol2013

Untitled

Mar 12th, 2021
25
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.00 KB | None | 0 0
  1. const fs = require("fs");
  2. class Parser {
  3. constructor() {
  4. this.defaultConf = {
  5. version: "0.1.26",
  6. results: {
  7. flat: [
  8. ["sku", "Artikul"],
  9. ["data_txt", "Full_data"],
  10. ["img_txt", "Image"],
  11. ["path", "Path"],
  12. ],
  13. },
  14. results_format:
  15. "$query\\n <path>$path</path>\\n <img>$img_txt</img>\\n <art>$sku</art>\\n $data_txt\\n",
  16. parsecodes: {
  17. 200: 1,
  18. },
  19. //максимальный размер документа
  20. max_size: 10 * 1024 * 1024,
  21. //включаем протоколирование http 2.0
  22. http2: true,
  23. proxyretries: 1000,
  24. };
  25. }
  26.  
  27. init() {
  28. if (!fs.existsSync("results/ru.mouser.com/images"))
  29. fs.mkdirSync("results/ru.mouser.com/images");
  30. }
  31.  
  32. *parse(set, results) {
  33. return yield* set.lvl == 0
  34. ? this.parseDATA(set, results)
  35. : this.download(set, results);
  36. }
  37.  
  38. //parseDATA(set, results1) {
  39. *parseDATA(set, results1) {
  40. this.logger.put("Start scraping query: " + set.query);
  41.  
  42. let response = yield this.request(
  43. "GET",
  44. set.query,
  45. {},
  46. {
  47. headers: {
  48. "user-agent":
  49. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36",
  50. accept:
  51. "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\naccept-encoding: gzip, deflate, br\naccept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6",
  52. },
  53. check_content: [" | Mouser Российская Федерация</title>"],
  54. decode: "auto-html",
  55. }
  56. );
  57.  
  58. if (response.success) {
  59. let sku = response.data.match(/\"sku\": \"(.+?)\"/i);
  60. if (sku) results1.sku = sku[1].replace(/[\\\\|/|\\&|\\||\\+|\\$]/g, "");
  61. let data_txt = response.data.match(/<main role=\"main\">(.*?)<\/main>/is);
  62. if (data_txt) results1.data_txt = data_txt[1];
  63. let img_txt = response.data.match(/\"image\": \"(.*?)\"/is);
  64. if (img_txt) {
  65. if (img_txt[1] != "https://ru.mouser.com/images/no-image.gif") {
  66. results1.img_txt = img_txt[1];
  67. this.query.add(img_txt[1]);
  68. results1.path = `/results/ru.mouser.com/images/${img_txt[1]
  69. .split("/")
  70. .pop()}`;
  71. } else {
  72. results1.img_txt = "no-image.gif";
  73. }
  74. }
  75. //return results1;
  76. }
  77. results1.success = response.success;
  78. return results1;
  79. }
  80.  
  81. *download(set, results2) {
  82. this.logger.put(`img_txt ${set.query}`);
  83. let resp = yield this.request(
  84. "GET",
  85. set.query,
  86. {},
  87. {
  88. save_to_file: `./results/ru.mouser.com/images/${set.query
  89. .split("/")
  90. .pop()}`,
  91. max_size: 10 * 1024 * 1024,
  92. do_gzip: 0,
  93. }
  94. );
  95. this.logger.put(`========Pics in save `);
  96. results2.SKIP = 1;
  97. }
  98. }
  99.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement