Advertisement
YasserKhalil2019

John

Nov 27th, 2020
170
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.77 KB | None | 0 0
  1. from requests_html import HTMLSession
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4. import argparse
  5.  
  6. #Comment out these 3 lines and change the searchterm variable, if you do not wish to use argparse version
  7. my_parser = argparse.ArgumentParser(description='Return BF Amazon Deals')
  8. my_parser.add_argument('searchterm', metavar='searchterm', type=str, help='The item to be searched for. Use + for spaces')
  9. args = my_parser.parse_args()
  10.  
  11. searchterm = args.searchterm
  12. s = HTMLSession()
  13. dealslist = []
  14.  
  15. url = f'https://www.amazon.co.uk/s?k={searchterm}&i=black-friday'
  16.  
  17. def getdata(url):
  18. r = s.get(url)
  19. r.html.render(sleep=20)
  20. soup = BeautifulSoup(r.html.html, 'html.parser')
  21. return soup
  22.  
  23. def getdeals(soup):
  24. products = soup.find_all('div', {'data-component-type': 's-search-result'})
  25. for item in products:
  26. title = item.find('a', {'class': 'a-link-normal a-text-normal'}).text.strip()
  27. short_title = item.find('a', {'class': 'a-link-normal a-text-normal'}).text.strip()[:25]
  28. link = item.find('a', {'class': 'a-link-normal a-text-normal'})['href']
  29.  
  30. spanlist = item.find_all('span', {'class': 'a-offscreen'})
  31. saleprice, oldprice = 0, 0
  32. if not spanlist:
  33. print(title, "No Price")
  34. else:
  35. try:
  36. saleprice = float(spanlist[0].text.replace('£','').replace(',','').strip())
  37. oldprice = float(spanlist[1].text.replace('£','').replace(',','').strip())
  38. except:
  39. oldprice = float(spanlist[0].text.replace('£','').replace(',','').strip())
  40.  
  41. try:
  42. reviews = float(item.find('span', {'class': 'a-size-base'}).text.strip())
  43. except:
  44. reviews = 0
  45.  
  46. saleitem = {
  47. 'title': title,
  48. 'short_title': short_title,
  49. 'link': link,
  50. 'saleprice': saleprice,
  51. 'oldprice': oldprice,
  52. 'reviews': reviews
  53. }
  54. dealslist.append(saleitem)
  55. return
  56.  
  57. def getnextpage(soup):
  58. pages = soup.find('ul', {'class': 'a-pagination'})
  59. try:
  60. if not pages.find('li', {'class': 'a-disabled a-last'}):
  61. url = 'https://www.amazon.co.uk' + str(pages.find('li', {'class': 'a-last'}).find('a')['href'])
  62. return url
  63. else:
  64. return
  65. except:
  66. return
  67.  
  68. while True:
  69. soup = getdata(url)
  70. getdeals(soup)
  71. url = getnextpage(soup)
  72. if not url:
  73. break
  74. else:
  75. print(url)
  76. print(len(dealslist))
  77.  
  78. df = pd.DataFrame(dealslist)
  79. df['percentoff'] = 100 - ((df.saleprice / df.oldprice) * 100)
  80. df = df.sort_values(by=['percentoff'], ascending=False)
  81. df.to_csv(searchterm + '-bfdeals.csv', index=False)
  82. print('Complete')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement