Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Parser {
- constructor() {
- this.defaultConf = {
- version: '0.1.19',
- results: {
- flat: [
- ['title', 'HTML title'],
- ['shopName','Название магазина'],
- ['UrlDecoded','Урл магазина '],
- ['phone','Номер телефона'],
- ['cpaRegnum','ОГРН'],
- ['Address','Адрес'],
- ['year','сколько лет'],
- ['Reviews','Отзывы'],
- ['starmag','Рейтинг'],
- ['tabPanelIds','Ids'],
- ['id_mag','id_mag'],
- ['sk','sk'],
- ['cspNonce','cspNonce']
- ]
- },
- results_format: '$query|$shopName|$UrlDecoded|$phone|$cpaRegnum|$Address|$year|$Reviews|$starmag|$sk|$cspNonce\\n',
- parsecodes: {
- 200: 1,
- },
- max_size: 200 * 1024,
- };
- }
- *parse(set, results) {
- this.logger.put("Start scraping query: " + set.query);
- let response = yield this.request('GET', set.query, {}, {
- check_content: ['<\/html>'],
- decode: 'auto-html',
- });
- if(response.success) {
- let title = response.data.match(/<title>(.*?)<\/title>/i);
- if(title) results.title = title[1];
- let shopName = response.data.match(/\"shopName\":\"(.*?)\"/i);
- if(shopName) results.shopName = shopName[1];
- let UrlDecoded = response.data.match(/\"datasourceUrlDecoded\":\"(.*?)\"/i);
- if(UrlDecoded) results.UrlDecoded = UrlDecoded[1];
- let phone = response.data.match(/\"phones\":\{\"raw\":\"(.*?)\"/i);
- if(phone) results.phone = phone[1];
- let cpaRegnum = response.data.match(/\"cpaRegnum\":\"(.*?)\"/i);
- if(cpaRegnum) results.cpaRegnum = cpaRegnum[1];
- let Address = response.data.match(/\"juridicalAddress\":\"(.*?)\"/i);
- if(Address) results.Address = Address[1];
- let year = response.data.match(/<div class=\"_2lRN4IzLYH\" data-tid=\"4c1f6dba\">(.*?)</i);
- if(year) results.year = year[1];
- let Reviews = response.data.match(/\"textReviewsCount\":(.*?),/i);
- if(Reviews) results.Reviews = Reviews[1];
- let starmag = response.data.match(/<div class=\"QCiKPDByR1\">(.*?)</i);
- if(starmag) results.starmag = starmag[1];
- let id_mag = response.data.match(/<a class=\"_19whcS2l-o _1DpwW9o1wj\" href=\"\/(.*?)\"/i);
- if(id_mag) results.id_mag = id_mag[1];
- let tabPanelIds = response.data.match(/\"tabPanelIds\":\[\"(.*?)\"\]/i);
- if(tabPanelIds) results.tabPanelIds = tabPanelIds[1];
- let sk = response.data.match(/\"sk\":\"(.*?)\"/i);
- if(sk) results.sk = sk[1];
- //"sk":"s2c73c4bdd67d1c26e73f66da79383fbe"
- //"cspNonce":"
- let cspNonce = response.data.match(/\"cspNonce\":\"(.*?)\"/i);
- if(cspNonce) results.cspNonce = cspNonce[1];
- let AGresp = yield* this.getPost(id_mag, tabPanelIds, shopName, sk, cspNonce);
- if(AGresp && AGresp.success) {
- return;
- }
- }
- results.success = response.success;
- return results;
- }
- /**getPost(id_mag, tabPanelIds, shopName) {
- this.logger.put("Category magazins: " + shopName);
- let headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0",
- "Accept-Language": "en-US,en;q=0.5",
- "sk": "s093e43de896f5480cc8649257b31f87f",
- "Content-Type": "application/json",
- "Cookie": "skid=1467123421612284601;",
- }
- let postBody = {"widgets":[{"lazyId":"{$tabPanelIds})","widgetName":"@MarketNode/HeaderTabsLayout","options":{"mboWidgetId":{$tabPanelIds),"props":{"isRoot":false,"tabsWidgetId":{$tabPanelIds)},"nodes":[],"position":"undefined-0-0"},"timerId":"{$tabPanelIds)-TabsLayout"}],"cspNonce":"MxQ/Zf3ycOz0+o38nTr1Aw==","path":"/shop--m-video/211"};
- let resp_post = yield this.request("POST", "https://market.yandex.ru/api/render-lazy", {}, { decode: "auto-html" });
- *parse(set, results) {*/
- *getPost(id_mag_1, tabPanelIds_1, shopName_1, sk_1, cspNonce_1) {
- this.logger.put("Start POST scraping query: " + set.query);
- let headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0",
- "Accept-Language": "en-US,en;q=0.5",
- "sk": "{$sk_1}",
- "Content-Type": "application/json",
- "Cookie": "skid=3784353731612612262;",
- }
- let postBody = {"widgets":[{"lazyId":"{$tabPanelIds_1}","widgetName":"@MarketNode/HeaderTabsLayout","options":{"mboWidgetId":{$tabPanelIds_1},"props":{"isRoot":false,"tabsWidgetId":{$tabPanelIds_1}},"nodes":[],"position":"undefined-0-0"},"timerId":"{$tabPanelIds_1}-TabsLayout"}],"cspNonce":"{$cspNonce_1}","path":"{$id_mag_1}"};
- let response_post = yield this.request('POST', 'https://market.yandex.ru/api/render-lazy', {}, {
- body: JSON.stringify(postBody),
- decode: 'utf8',
- headers: headers,
- });
- if(response_post.success) {
- this.logger.put("response.data: " + response_post.data);
- }
- //results.success = response.success;
- return;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement