Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from requests_html import HTMLSession
- from bs4 import BeautifulSoup
- import pandas as pd
- import argparse
- #Comment out these 3 lines and change the searchterm variable, if you do not wish to use argparse version
- my_parser = argparse.ArgumentParser(description='Return BF Amazon Deals')
- my_parser.add_argument('searchterm', metavar='searchterm', type=str, help='The item to be searched for. Use + for spaces')
- args = my_parser.parse_args()
- searchterm = args.searchterm
- s = HTMLSession()
- dealslist = []
- url = f'https://www.amazon.co.uk/s?k={searchterm}&i=black-friday'
- def getdata(url):
- r = s.get(url)
- r.html.render(sleep=20)
- soup = BeautifulSoup(r.html.html, 'html.parser')
- return soup
- def getdeals(soup):
- products = soup.find_all('div', {'data-component-type': 's-search-result'})
- for item in products:
- title = item.find('a', {'class': 'a-link-normal a-text-normal'}).text.strip()
- short_title = item.find('a', {'class': 'a-link-normal a-text-normal'}).text.strip()[:25]
- link = item.find('a', {'class': 'a-link-normal a-text-normal'})['href']
- spanlist = item.find_all('span', {'class': 'a-offscreen'})
- saleprice, oldprice = 0, 0
- if not spanlist:
- print(title, "No Price")
- else:
- try:
- saleprice = float(spanlist[0].text.replace('£','').replace(',','').strip())
- oldprice = float(spanlist[1].text.replace('£','').replace(',','').strip())
- except:
- oldprice = float(spanlist[0].text.replace('£','').replace(',','').strip())
- try:
- reviews = float(item.find('span', {'class': 'a-size-base'}).text.strip())
- except:
- reviews = 0
- saleitem = {
- 'title': title,
- 'short_title': short_title,
- 'link': link,
- 'saleprice': saleprice,
- 'oldprice': oldprice,
- 'reviews': reviews
- }
- dealslist.append(saleitem)
- return
- def getnextpage(soup):
- pages = soup.find('ul', {'class': 'a-pagination'})
- try:
- if not pages.find('li', {'class': 'a-disabled a-last'}):
- url = 'https://www.amazon.co.uk' + str(pages.find('li', {'class': 'a-last'}).find('a')['href'])
- return url
- else:
- return
- except:
- return
- while True:
- soup = getdata(url)
- getdeals(soup)
- url = getnextpage(soup)
- if not url:
- break
- else:
- print(url)
- print(len(dealslist))
- df = pd.DataFrame(dealslist)
- df['percentoff'] = 100 - ((df.saleprice / df.oldprice) * 100)
- df = df.sort_values(by=['percentoff'], ascending=False)
- df.to_csv(searchterm + '-bfdeals.csv', index=False)
- print('Complete')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement