Advertisement
Kreol2013

Untitled

Feb 7th, 2021
17
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. class Parser {
  2. constructor() {
  3. this.defaultConf = {
  4. version: '0.1.19',
  5. results: {
  6. flat: [
  7. ['title', 'HTML title'],
  8. ['shopName','Название магазина'],
  9. ['UrlDecoded','Урл магазина '],
  10. ['phone','Номер телефона'],
  11. ['cpaRegnum','ОГРН'],
  12. ['Address','Адрес'],
  13. ['year','сколько лет'],
  14. ['Reviews','Отзывы'],
  15. ['starmag','Рейтинг'],
  16. ['tabPanelIds','Ids'],
  17. ['id_mag','id_mag'],
  18. ['sk','sk'],
  19. ['cspNonce','cspNonce']
  20. ]
  21. },
  22. results_format: '$query|$shopName|$UrlDecoded|$phone|$cpaRegnum|$Address|$year|$Reviews|$starmag|$sk|$cspNonce\\n',
  23. parsecodes: {
  24. 200: 1,
  25. },
  26. max_size: 200 * 1024,
  27. };
  28. }
  29.  
  30. *parse(set, results) {
  31. this.logger.put("Start scraping query: " + set.query);
  32.  
  33. let response = yield this.request('GET', set.query, {}, {
  34. check_content: ['<\/html>'],
  35. decode: 'auto-html',
  36. });
  37.  
  38. if(response.success) {
  39. let title = response.data.match(/<title>(.*?)<\/title>/i);
  40. if(title) results.title = title[1];
  41. let shopName = response.data.match(/\"shopName\":\"(.*?)\"/i);
  42. if(shopName) results.shopName = shopName[1];
  43. let UrlDecoded = response.data.match(/\"datasourceUrlDecoded\":\"(.*?)\"/i);
  44. if(UrlDecoded) results.UrlDecoded = UrlDecoded[1];
  45. let phone = response.data.match(/\"phones\":\{\"raw\":\"(.*?)\"/i);
  46. if(phone) results.phone = phone[1];
  47. let cpaRegnum = response.data.match(/\"cpaRegnum\":\"(.*?)\"/i);
  48. if(cpaRegnum) results.cpaRegnum = cpaRegnum[1];
  49. let Address = response.data.match(/\"juridicalAddress\":\"(.*?)\"/i);
  50. if(Address) results.Address = Address[1];
  51. let year = response.data.match(/<div class=\"_2lRN4IzLYH\" data-tid=\"4c1f6dba\">(.*?)</i);
  52. if(year) results.year = year[1];
  53. let Reviews = response.data.match(/\"textReviewsCount\":(.*?),/i);
  54. if(Reviews) results.Reviews = Reviews[1];
  55. let starmag = response.data.match(/<div class=\"QCiKPDByR1\">(.*?)</i);
  56. if(starmag) results.starmag = starmag[1];
  57. let id_mag = response.data.match(/<a class=\"_19whcS2l-o _1DpwW9o1wj\" href=\"\/(.*?)\"/i);
  58. if(id_mag) results.id_mag = id_mag[1];
  59. let tabPanelIds = response.data.match(/\"tabPanelIds\":\[\"(.*?)\"\]/i);
  60. if(tabPanelIds) results.tabPanelIds = tabPanelIds[1];
  61. let sk = response.data.match(/\"sk\":\"(.*?)\"/i);
  62. if(sk) results.sk = sk[1];
  63. //"sk":"s2c73c4bdd67d1c26e73f66da79383fbe"
  64. //"cspNonce":"
  65. let cspNonce = response.data.match(/\"cspNonce\":\"(.*?)\"/i);
  66. if(cspNonce) results.cspNonce = cspNonce[1];
  67. let AGresp = yield* this.getPost(id_mag, tabPanelIds, shopName, sk, cspNonce);
  68. if(AGresp && AGresp.success) {
  69. return;
  70. }
  71. }
  72.  
  73.  
  74. results.success = response.success;
  75.  
  76. return results;
  77. }
  78. /**getPost(id_mag, tabPanelIds, shopName) {
  79. this.logger.put("Category magazins: " + shopName);
  80. let headers = {
  81. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0",
  82. "Accept-Language": "en-US,en;q=0.5",
  83. "sk": "s093e43de896f5480cc8649257b31f87f",
  84. "Content-Type": "application/json",
  85. "Cookie": "skid=1467123421612284601;",
  86. }
  87.  
  88. let postBody = {"widgets":[{"lazyId":"{$tabPanelIds})","widgetName":"@MarketNode/HeaderTabsLayout","options":{"mboWidgetId":{$tabPanelIds),"props":{"isRoot":false,"tabsWidgetId":{$tabPanelIds)},"nodes":[],"position":"undefined-0-0"},"timerId":"{$tabPanelIds)-TabsLayout"}],"cspNonce":"MxQ/Zf3ycOz0+o38nTr1Aw==","path":"/shop--m-video/211"};
  89. let resp_post = yield this.request("POST", "https://market.yandex.ru/api/render-lazy", {}, { decode: "auto-html" });
  90. *parse(set, results) {*/
  91. *getPost(id_mag_1, tabPanelIds_1, shopName_1, sk_1, cspNonce_1) {
  92. this.logger.put("Start POST scraping query: " + set.query);
  93.  
  94. let headers = {
  95. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0",
  96. "Accept-Language": "en-US,en;q=0.5",
  97. "sk": "{$sk_1}",
  98. "Content-Type": "application/json",
  99. "Cookie": "skid=3784353731612612262;",
  100. }
  101.  
  102. let postBody = {"widgets":[{"lazyId":"{$tabPanelIds_1}","widgetName":"@MarketNode/HeaderTabsLayout","options":{"mboWidgetId":{$tabPanelIds_1},"props":{"isRoot":false,"tabsWidgetId":{$tabPanelIds_1}},"nodes":[],"position":"undefined-0-0"},"timerId":"{$tabPanelIds_1}-TabsLayout"}],"cspNonce":"{$cspNonce_1}","path":"{$id_mag_1}"};
  103.  
  104. let response_post = yield this.request('POST', 'https://market.yandex.ru/api/render-lazy', {}, {
  105. body: JSON.stringify(postBody),
  106. decode: 'utf8',
  107. headers: headers,
  108. });
  109.  
  110. if(response_post.success) {
  111. this.logger.put("response.data: " + response_post.data);
  112. }
  113.  
  114. //results.success = response.success;
  115.  
  116. return;
  117. }
  118. }
  119.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement