Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from requests_html import HTMLSession, requests
- from typing import Union
- from pathlib import Path
- import logging
- class JitasheDownloader:
- BASE_URL = 'https://www.jitashe.org'
- def __init__(self):
- self.session = HTMLSession()
- def _search(self, keyword) -> Union[str, None]:
- """ 查询页,如有查询结果返回第一个,否则返回 None """
- search_req = self.session.get(f'{self.BASE_URL}/search/tab/{keyword}')
- search_first = search_req.html.find('#threadlist > div > div.text > a', first=True)
- first_elem_url = search_first and self.BASE_URL + search_first.element.attrib['href']
- return first_elem_url
- def _download(self, keyword, out_dir) -> None:
- """ 根据查询页返回结果下载乐谱 """
- search_url = self._search(keyword)
- if search_url:
- score_req = self.session.get(search_url)
- title = score_req.html.find('h1.gb-title', first=True).text
- scores_url = [url.split('!')[0] for url in score_req.html.xpath(
- '//ignore_js_op/picture/img/@src')]
- if scores_url:
- download_path = Path(out_dir) / title
- download_path.mkdir(parents=True, exist_ok=True)
- filename_width = len(str(len(scores_url)))
- for i, url in enumerate(scores_url, 1):
- file = download_path / f'{title}-{i:0{filename_width}}.jpg'
- logging.debug(f'下载中 - {file.name}')
- file.write_bytes(self.session.get(url).content)
- logging.info(f'完成下载:{title}')
- else:
- logging.info(f'无图片谱: {keyword}')
- else:
- logging.info(f'搜索词「{keyword}」下无对应结果')
- logging.debug('')
- def download(self, keyword, out_dir='./output', max_retry=3) -> None:
- """ 下载乐谱(支持重试) """
- for i in range(max_retry):
- try:
- self._download(keyword, out_dir=out_dir)
- break
- except requests.RequestException:
- logging.error(f'重试第{i + 1}次下载 - {keyword}')
- continue
- else:
- logging.error(f'{max_retry}次尝试下载失败 - {keyword}')
- if __name__ == '__main__':
- logging.basicConfig(
- format='[%(asctime)s] [%(levelname)-5s] %(message)s',
- level=logging.INFO,
- datefmt='%Y-%m-%d %T'
- )
- logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL)
- downloader = JitasheDownloader()
- song_list = [
- '同桌的你', '十年', '蓝莲花', '千千阙歌', '大海',
- '月亮代表我的心', '童话', '爱情转移', '外婆的澎湖湾',
- '笨小孩', '上海滩', '红日', '我们的爱', '雨一直下',
- '江南','孤勇者', '孤勇者1', '孤勇者2'
- ]
- for song in song_list:
- downloader.download(song)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement