Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import requests
- from math import ceil
- from plexapi.server import PlexServer
- from lxml.html import parse
- from urllib.request import urlopen
- import time
- import sqlite3
- from joblib import Parallel, delayed
- #Variables globales
- PLEX_URL = 'http://127.0.0.1:32400'
- PLEX_TOKEN = 'XXXXXX'
- ### Existing movie library details ###
- MOVIE_LIBRARY_NAME = 'Peliculas'
- #Diccionarios con listas de IMDB : nombre de coleccion
- PARES_IMDB_COLECCIONES ={'ls076156304' : 'European Film Award'}
- # la ruta absoluta hacia la ubicacion donde se encuentra la base de datos
- # de plex
- DATABASE_PATH = 'J:\Plex Media Server\Plug-in Support\Databases\com.plexapp.plugins.library.db'
- class ParserIMDB():
- def __init__(self, imdb_list):
- self.imdb_list = imdb_list
- if 'www.' in self.imdb_list:
- self.scrape_alternativo = True
- else:
- self.scrape_alternativo = False
- self.lista_urls = []
- self.lista_paralelizar = []
- def ExtraerURLsScraping(self):
- lista_imdb = self.imdb_list
- lista_urls = self.lista_urls
- IMDB_LIST_URL = 'http://www.imdb.com/list/{}/?view=compact&sort=listorian:asc&defaults=1&' 'lists={}&mode=detail&page='.format(lista_imdb, lista_imdb)
- i = 1
- remaining_pages = 0
- url = IMDB_LIST_URL + str(i)
- if i == 1:
- tree = parse(urlopen(url))
- lista_urls.append(url)
- total_titles = int(tree.xpath("//div[@class='desc lister-total-num-results']/text()")[0].strip().split(' ')[0].replace(",", ""))
- if (total_titles > 100):
- total_titles = total_titles - 100
- remaining_pages = int(ceil(total_titles / 100.0))
- i = i + 1
- while(remaining_pages > 0):
- url = IMDB_LIST_URL + str(i)
- lista_urls.append(url)
- remaining_pages = remaining_pages - 1
- i = i + 1
- self.lista_urls = lista_urls
- def SplitList(self, max_elements_per_list):
- input_list = self.lista_urls
- output_list = []
- partial_list = []
- for each_element in input_list:
- partial_list.append(each_element)
- if len(partial_list) == max_elements_per_list:
- output_list.append(partial_list)
- partial_list = []
- if (len(input_list) % max_elements_per_list) > 0:
- output_list.append(partial_list)
- self.lista_paralelizar = output_list
- def ParseList(self, url, alternativo):
- if alternativo:
- tree = parse(urlopen(url))
- list_picture_titles = tree.xpath("//td[@class='titleColumn']//a/text()")
- list_picture_years = tree.xpath("//td[@class='titleColumn']//span[@class='secondaryInfo']/text()")
- list_picture_ids = [a.split('/')[2] for a in tree.xpath("//td[@class='titleColumn']//a//@href")]
- return((list_picture_ids, list_picture_years, list_picture_titles))
- url = url[0]
- tree = parse(urlopen(url))
- list_picture_titles = tree.xpath("//div[@class='lister-item-content']//h3[@class='lister-item-header']//a/text()")
- list_picture_years = tree.xpath("//div[@class='lister-item-content']//h3[@class='lister-item-header']//span[@class='lister-item-year text-muted unbold']/text()")
- list_picture_ids = [a.split('/')[2] for a in tree.xpath("//div[@class='lister-item-content']//h3[@class='lister-item-header']//a//@href")]
- return((list_picture_ids, list_picture_years, list_picture_titles))
- def flatten(self, lista_in):
- return([x for y in lista_in for x in y])
- def RunParser(self):
- if self.scrape_alternativo:
- list_picture_ids, list_picture_years, list_picture_titles = self.ParseList(self.imdb_list, self.scrape_alternativo)
- return((list_picture_ids, list_picture_years, list_picture_titles))
- else:
- self.ExtraerURLsScraping()
- self.SplitList(1)
- result = Parallel(n_jobs= -1)(delayed(self.ParseList)(url, self.scrape_alternativo) for url in self.lista_paralelizar)
- list_picture_ids = []
- list_picture_years = []
- list_picture_titles = []
- for each_list in result:
- list_picture_ids.append(each_list[0])
- list_picture_years.append(each_list[1])
- list_picture_titles.append(each_list[2])
- list_picture_ids = self.flatten(list_picture_ids)
- list_picture_years = self.flatten(list_picture_years)
- list_picture_titles = self.flatten(list_picture_titles)
- return((list_picture_ids, list_picture_years, list_picture_titles))
- def ExtraerIMDBGuid(sql_database_path, library_section_id):
- ''' La funcion se conecta a la base de datos (sql_database_path) y extrae
- el guid de cada elemento en la biblioteca determinada por el parametro
- library_section_id (numeric)
- Finalmente devuelve una lista con cada guid'''
- con = sqlite3.connect(DATABASE_PATH)
- cursorObj = con.cursor()
- cursorObj.execute('SELECT guid, id FROM metadata_items WHERE library_section_id = ' + str(library_section_id))
- resultado = cursorObj.fetchall()
- return(resultado)
- def add_collection(library_key, rating_key):
- headers = {"X-Plex-Token": PLEX_TOKEN}
- params = {"type": 1,
- "id": rating_key,
- "collection[0].tag.tag": IMDB_COLLECTION_NAME,
- "collection.locked": 1
- }
- url = "{base_url}/library/sections/{library}/all".format(base_url=PLEX_URL, library=library_key)
- r = requests.put(url, headers=headers, params=params)
- r
- def RemoveMovieFromCollection():
- ''' La funcion se conecta a la base de datos (sql_database_path) y extrae
- el guid de cada elemento en la biblioteca determinada por el parametro
- library_section_id (numeric)
- Finalmente devuelve una lista con cada guid'''
- try:
- plex = PlexServer(PLEX_URL, PLEX_TOKEN)
- except:
- print("No Plex server found at: {base_url}".format(base_url=PLEX_URL))
- print("Exiting script.")
- return([], 0)
- try:
- movie_library = plex.library.section(MOVIE_LIBRARY_NAME)
- movie_library_key = movie_library.key
- all_movies = movie_library.all()
- except:
- print("The '{library}' library does not exist in Plex.".format(library=MOVIE_LIBRARY_NAME))
- print("Exiting script.")
- return
- collection_name = IMDB_COLLECTION_NAME
- con = sqlite3.connect(DATABASE_PATH)
- cursorObj = con.cursor()
- cursorObj.execute("SELECT id, tags_collection \
- FROM metadata_items WHERE library_section_id = "+ \
- str(movie_library_key) + " AND tags_collection != ''")
- resultado = cursorObj.fetchall()
- movies_id = []
- for each_result in resultado:
- id_db = str(each_result[0])
- tag = each_result[1]
- list_of_tags = tag.split('|')
- for each_tag in list_of_tags:
- if each_tag == collection_name:
- movies_id.append(id_db)
- movies_in_collection = []
- for each_movie in all_movies:
- movie_id = str(each_movie).split(':')[1]
- if movie_id in movies_id:
- movies_in_collection.append(each_movie)
- print('Eliminando peliculas de la coleccion')
- for each_movie in movies_in_collection:
- each_movie.removeCollection(collection_name)
- def run_imdb_list(lista_imdb):
- try:
- plex = PlexServer(PLEX_URL, PLEX_TOKEN)
- except:
- print("No Plex server found at: {base_url}".format(base_url=PLEX_URL))
- print("Exiting script.")
- return([], 0)
- # Get the IMDB list
- print("Retrieving the IMDB list...")
- print(time.strftime("%d/%m/%Y %H:%M:%S"))
- # list_picture_ids, list_picture_years, list_picture_titles = ParseIMDBLists(IMDB_LISTS)
- lista = ParserIMDB(imdb_list= lista_imdb)
- list_picture_ids, list_picture_years, list_picture_titles = lista.RunParser()
- if list_picture_ids == None:
- print('La lista de IMDB ya ha sido escaneada previamente')
- return([], 0)
- # Get list of movies from the Plex server
- print("Retrieving a list of movies from the '{library}' library in Plex...".format(library=MOVIE_LIBRARY_NAME))
- print(time.strftime("%d/%m/%Y %H:%M:%S"))
- try:
- movie_library = plex.library.section(MOVIE_LIBRARY_NAME)
- movie_library_key = movie_library.key
- except:
- print("The '{library}' library does not exist in Plex.".format(library=MOVIE_LIBRARY_NAME))
- print("Exiting script.")
- return
- # Create a dictionary of {imdb_id: movie}
- print("Creating a Dictionary")
- print(time.strftime("%d/%m/%Y %H:%M:%S"))
- imdb_map = {}
- movies_sql = ExtraerIMDBGuid(sql_database_path= DATABASE_PATH,
- library_section_id= movie_library_key)
- #Para cada pelicula de la biblioteca
- for movie in movies_sql:
- guid = movie[0]
- key = movie[1]
- if 'imdb://' in guid:
- imdb_id = guid.split('imdb://')[1].split('?')[0]
- if imdb_id in list_picture_ids:
- imdb_map[imdb_id] = key
- print("Setting the collection for the '{}' library...".format(MOVIE_LIBRARY_NAME))
- print(time.strftime("%d/%m/%Y %H:%M:%S"))
- in_library_idx = []
- # Para cada pelicula extraida de las listas de imdb, si la id de la pelicula esta
- # en la lista local entonces la pelicula se guarda a la variable movie y esta variable
- # se agrega con la funcion add_collection y a su vez su indice (i) se guarda en
- # in_library_idx
- for i, imdb_id in enumerate(list_picture_ids):
- movie = imdb_map.pop(imdb_id, None)
- if movie:
- add_collection(movie_library_key, movie)
- in_library_idx.append(i)
- # Get list of missing movies
- #Esta es la lista de todas las peliculas de la lista de IMDB que no esta en la biblioteca
- # local
- missing_imdb_list = [(idx, imdb) for idx, imdb in enumerate(zip(list_picture_ids, list_picture_titles, list_picture_years)) if idx not in in_library_idx]
- return missing_imdb_list, len(list_picture_ids)
- #
- #
- #
- if __name__ == "__main__":
- for cada_lista in PARES_IMDB_COLECCIONES.keys():
- #Coleccion de la biblioteca a actualizar
- print(cada_lista)
- IMDB_COLLECTION_NAME = PARES_IMDB_COLECCIONES[cada_lista]
- if 'www.' in cada_lista:
- print('Se eliminaran las peliculas de la coleccion')
- RemoveMovieFromCollection()
- print("===================================================================")
- print()
- print(IMDB_COLLECTION_NAME)
- print()
- print("===================================================================")
- print(time.strftime("%d/%m/%Y %H:%M:%S"))
- print("===================================================================\n")
- missing_imdb_list, list_count = run_imdb_list(lista_imdb= cada_lista)
- if(list_count == 0):
- continue
- print("\n===================================================================\n")
- print("Number of IMDB movies in the library: {count}".format(count=list_count - len(missing_imdb_list)))
- print("Number of missing IMDB movies: {count}".format(count=len(missing_imdb_list)))
- print("\nList of missing movies:\n")
- for idx, (imdb_id, title, year) in missing_imdb_list:
- print("{idx}\t{imdb_id}\t{title} {year}".format(idx=idx + 1, imdb_id=imdb_id, title=title, year=year))
- print("\n===================================================================")
- print(" Done! ")
- print("===================================================================\n")
- archivo_salida = IMDB_COLLECTION_NAME + '.txt'
- f = open(archivo_salida,'w')
- f.write("===================================================================\n")
- f.write(time.strftime("%d/%m/%Y %H:%M:%S") )
- f.write(IMDB_COLLECTION_NAME)
- f.write("\n===================================================================\n")
- f.write("\nNumber of IMDB movies in the library: {count}".format(count=list_count - len(missing_imdb_list)))
- f.write("\nNumber of missing IMDB movies: {count}\n".format(count=len(missing_imdb_list)))
- f.write("\nList of missing movies:")
- for idx, (imdb_id, title, year) in missing_imdb_list:
- f.write("\n{idx}\t{imdb_id}\t{title} {year}".format(idx=idx + 1, imdb_id=imdb_id, title=title, year=year))
- f.write("\n===================================================================")
- f.write("\n Done! ")
- f.write("\n===================================================================")
- f.close()
- #input("Press Enter to finish...")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement