Advertisement
jalarab

IMDB_list_to_COLLECTION

May 21st, 2019
894
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 13.11 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import requests
  4. from math import ceil
  5. from plexapi.server import PlexServer
  6. from lxml.html import parse
  7. from urllib.request import urlopen
  8. import time
  9. import sqlite3
  10. from joblib import Parallel, delayed
  11. #Variables globales
  12. PLEX_URL = 'http://127.0.0.1:32400'
  13. PLEX_TOKEN = 'XXXXXX'
  14. ### Existing movie library details ###
  15. MOVIE_LIBRARY_NAME = 'Peliculas'
  16. #Diccionarios con listas de IMDB : nombre de coleccion
  17. PARES_IMDB_COLECCIONES ={'ls076156304' : 'European Film Award'}
  18. # la ruta absoluta hacia la ubicacion donde se encuentra la base de datos
  19. # de plex
  20. DATABASE_PATH = 'J:\Plex Media Server\Plug-in Support\Databases\com.plexapp.plugins.library.db'
  21.  
  22.  
  23. class ParserIMDB():
  24.     def __init__(self, imdb_list):
  25.         self.imdb_list = imdb_list
  26.         if 'www.' in self.imdb_list:
  27.             self.scrape_alternativo = True
  28.         else:
  29.             self.scrape_alternativo = False
  30.         self.lista_urls = []
  31.         self.lista_paralelizar = []
  32.    
  33.     def ExtraerURLsScraping(self):
  34.         lista_imdb = self.imdb_list
  35.         lista_urls = self.lista_urls
  36.         IMDB_LIST_URL = 'http://www.imdb.com/list/{}/?view=compact&sort=listorian:asc&defaults=1&' 'lists={}&mode=detail&page='.format(lista_imdb, lista_imdb)
  37.         i = 1
  38.         remaining_pages = 0
  39.         url = IMDB_LIST_URL + str(i)
  40.        
  41.         if i == 1:
  42.             tree = parse(urlopen(url))
  43.             lista_urls.append(url)
  44.         total_titles = int(tree.xpath("//div[@class='desc lister-total-num-results']/text()")[0].strip().split(' ')[0].replace(",", ""))
  45.  
  46.         if (total_titles > 100):
  47.             total_titles = total_titles - 100
  48.             remaining_pages = int(ceil(total_titles / 100.0))
  49.             i = i + 1
  50.            
  51.         while(remaining_pages > 0):
  52.             url = IMDB_LIST_URL + str(i)
  53.             lista_urls.append(url)
  54.             remaining_pages = remaining_pages - 1
  55.             i = i + 1
  56.            
  57.         self.lista_urls = lista_urls
  58.    
  59.     def SplitList(self, max_elements_per_list):
  60.         input_list = self.lista_urls
  61.         output_list = []
  62.         partial_list = []
  63.         for each_element in input_list:
  64.             partial_list.append(each_element)
  65.             if len(partial_list) == max_elements_per_list:
  66.                 output_list.append(partial_list)
  67.                 partial_list = []
  68.         if (len(input_list) % max_elements_per_list) > 0:
  69.             output_list.append(partial_list)
  70.         self.lista_paralelizar = output_list
  71.        
  72.     def ParseList(self, url, alternativo):
  73.         if alternativo:
  74.             tree = parse(urlopen(url))
  75.             list_picture_titles = tree.xpath("//td[@class='titleColumn']//a/text()")
  76.             list_picture_years = tree.xpath("//td[@class='titleColumn']//span[@class='secondaryInfo']/text()")
  77.             list_picture_ids = [a.split('/')[2] for a in tree.xpath("//td[@class='titleColumn']//a//@href")]  
  78.             return((list_picture_ids, list_picture_years, list_picture_titles))
  79.            
  80.         url = url[0]
  81.         tree = parse(urlopen(url))
  82.         list_picture_titles = tree.xpath("//div[@class='lister-item-content']//h3[@class='lister-item-header']//a/text()")
  83.         list_picture_years = tree.xpath("//div[@class='lister-item-content']//h3[@class='lister-item-header']//span[@class='lister-item-year text-muted unbold']/text()")
  84.         list_picture_ids = [a.split('/')[2] for a in tree.xpath("//div[@class='lister-item-content']//h3[@class='lister-item-header']//a//@href")]  
  85.         return((list_picture_ids, list_picture_years, list_picture_titles))
  86.        
  87.        
  88.     def flatten(self, lista_in):
  89.         return([x for y in lista_in for x in y])
  90.    
  91.     def RunParser(self):
  92.         if self.scrape_alternativo:
  93.            list_picture_ids, list_picture_years, list_picture_titles = self.ParseList(self.imdb_list, self.scrape_alternativo)
  94.            return((list_picture_ids, list_picture_years, list_picture_titles))
  95.         else:
  96.             self.ExtraerURLsScraping()
  97.             self.SplitList(1)
  98.             result = Parallel(n_jobs= -1)(delayed(self.ParseList)(url, self.scrape_alternativo) for url in self.lista_paralelizar)
  99.            
  100.             list_picture_ids = []
  101.             list_picture_years = []
  102.             list_picture_titles = []
  103.            
  104.             for each_list in result:
  105.                 list_picture_ids.append(each_list[0])
  106.                 list_picture_years.append(each_list[1])
  107.                 list_picture_titles.append(each_list[2])
  108.    
  109.             list_picture_ids = self.flatten(list_picture_ids)
  110.             list_picture_years = self.flatten(list_picture_years)
  111.             list_picture_titles = self.flatten(list_picture_titles)
  112.             return((list_picture_ids, list_picture_years, list_picture_titles))
  113.  
  114. def ExtraerIMDBGuid(sql_database_path, library_section_id):
  115.     ''' La funcion se conecta a la base de datos (sql_database_path) y extrae
  116.    el guid de cada elemento en la biblioteca determinada por el parametro
  117.    library_section_id (numeric)
  118.    Finalmente devuelve una lista con cada guid'''
  119.     con = sqlite3.connect(DATABASE_PATH)
  120.     cursorObj = con.cursor()
  121.     cursorObj.execute('SELECT guid, id FROM metadata_items WHERE library_section_id = ' + str(library_section_id))
  122.     resultado = cursorObj.fetchall()
  123.     return(resultado)
  124.    
  125.    
  126. def add_collection(library_key, rating_key):
  127.     headers = {"X-Plex-Token": PLEX_TOKEN}
  128.     params = {"type": 1,
  129.               "id": rating_key,
  130.               "collection[0].tag.tag": IMDB_COLLECTION_NAME,
  131.               "collection.locked": 1
  132.               }
  133.     url = "{base_url}/library/sections/{library}/all".format(base_url=PLEX_URL, library=library_key)
  134.     r = requests.put(url, headers=headers, params=params)
  135.     r
  136.  
  137. def RemoveMovieFromCollection():
  138.     ''' La funcion se conecta a la base de datos (sql_database_path) y extrae
  139.    el guid de cada elemento en la biblioteca determinada por el parametro
  140.    library_section_id (numeric)
  141.    Finalmente devuelve una lista con cada guid'''
  142.     try:
  143.         plex = PlexServer(PLEX_URL, PLEX_TOKEN)
  144.     except:
  145.         print("No Plex server found at: {base_url}".format(base_url=PLEX_URL))
  146.         print("Exiting script.")
  147.         return([], 0)
  148.     try:
  149.         movie_library = plex.library.section(MOVIE_LIBRARY_NAME)
  150.         movie_library_key = movie_library.key
  151.         all_movies = movie_library.all()
  152.     except:
  153.         print("The '{library}' library does not exist in Plex.".format(library=MOVIE_LIBRARY_NAME))
  154.         print("Exiting script.")
  155.         return
  156.    
  157.  
  158.     collection_name = IMDB_COLLECTION_NAME
  159.     con = sqlite3.connect(DATABASE_PATH)
  160.     cursorObj = con.cursor()
  161.     cursorObj.execute("SELECT id, tags_collection \
  162.                      FROM metadata_items WHERE library_section_id = "+ \
  163.                       str(movie_library_key) + " AND tags_collection != ''")
  164.     resultado = cursorObj.fetchall()
  165.     movies_id = []
  166.     for each_result in resultado:
  167.         id_db = str(each_result[0])
  168.         tag = each_result[1]
  169.         list_of_tags = tag.split('|')
  170.         for each_tag in list_of_tags:
  171.             if each_tag == collection_name:
  172.                 movies_id.append(id_db)
  173.    
  174.     movies_in_collection = []
  175.     for each_movie in all_movies:
  176.         movie_id = str(each_movie).split(':')[1]
  177.         if movie_id in movies_id:
  178.             movies_in_collection.append(each_movie)
  179.     print('Eliminando peliculas de la coleccion')
  180.     for each_movie in movies_in_collection:
  181.         each_movie.removeCollection(collection_name)
  182.        
  183. def run_imdb_list(lista_imdb):
  184.     try:
  185.         plex = PlexServer(PLEX_URL, PLEX_TOKEN)
  186.     except:
  187.         print("No Plex server found at: {base_url}".format(base_url=PLEX_URL))
  188.         print("Exiting script.")
  189.         return([], 0)
  190.  
  191.     # Get the IMDB  list
  192.     print("Retrieving the IMDB list...")
  193.     print(time.strftime("%d/%m/%Y %H:%M:%S"))
  194. #    list_picture_ids, list_picture_years, list_picture_titles = ParseIMDBLists(IMDB_LISTS)
  195.     lista = ParserIMDB(imdb_list= lista_imdb)
  196.     list_picture_ids, list_picture_years, list_picture_titles = lista.RunParser()
  197.     if list_picture_ids == None:
  198.         print('La lista de IMDB ya ha sido escaneada previamente')
  199.         return([], 0)
  200.     # Get list of movies from the Plex server
  201.     print("Retrieving a list of movies from the '{library}' library in Plex...".format(library=MOVIE_LIBRARY_NAME))
  202.     print(time.strftime("%d/%m/%Y %H:%M:%S"))
  203.     try:
  204.         movie_library = plex.library.section(MOVIE_LIBRARY_NAME)
  205.         movie_library_key = movie_library.key
  206.     except:
  207.         print("The '{library}' library does not exist in Plex.".format(library=MOVIE_LIBRARY_NAME))
  208.         print("Exiting script.")
  209.         return
  210.     # Create a dictionary of {imdb_id: movie}
  211.     print("Creating a Dictionary")
  212.     print(time.strftime("%d/%m/%Y %H:%M:%S"))
  213.     imdb_map = {}
  214.     movies_sql = ExtraerIMDBGuid(sql_database_path= DATABASE_PATH,
  215.                                  library_section_id= movie_library_key)
  216. #Para cada pelicula de la biblioteca
  217.     for movie in movies_sql:
  218.         guid = movie[0]
  219.         key = movie[1]
  220.         if 'imdb://' in guid:
  221.             imdb_id = guid.split('imdb://')[1].split('?')[0]
  222.             if imdb_id in list_picture_ids:
  223.                     imdb_map[imdb_id] = key
  224.              
  225.     print("Setting the collection for the '{}' library...".format(MOVIE_LIBRARY_NAME))
  226.     print(time.strftime("%d/%m/%Y %H:%M:%S"))
  227.     in_library_idx = []
  228. #    Para cada pelicula extraida de las listas de imdb, si la id de la pelicula esta
  229. #    en la lista local entonces la pelicula se guarda a la variable movie y esta variable
  230. #    se agrega con la funcion add_collection y a su vez su indice (i) se guarda en
  231. #    in_library_idx
  232.     for i, imdb_id in enumerate(list_picture_ids):
  233.         movie = imdb_map.pop(imdb_id, None)
  234.         if movie:
  235.             add_collection(movie_library_key, movie)
  236.             in_library_idx.append(i)
  237.     # Get list of missing movies
  238.     #Esta es la lista de todas las peliculas de la lista de IMDB que no esta en la biblioteca
  239. #    local
  240.     missing_imdb_list = [(idx, imdb) for idx, imdb in enumerate(zip(list_picture_ids, list_picture_titles, list_picture_years)) if idx not in in_library_idx]
  241.     return missing_imdb_list, len(list_picture_ids)
  242. #
  243. #
  244. #
  245. if __name__ == "__main__":
  246.     for cada_lista in PARES_IMDB_COLECCIONES.keys():
  247.         #Coleccion de la biblioteca a actualizar
  248.         print(cada_lista)
  249.         IMDB_COLLECTION_NAME = PARES_IMDB_COLECCIONES[cada_lista]
  250.         if 'www.' in cada_lista:
  251.             print('Se eliminaran las peliculas de la coleccion')
  252.             RemoveMovieFromCollection()
  253.         print("===================================================================")
  254.         print()        
  255.         print(IMDB_COLLECTION_NAME)
  256.         print()
  257.         print("===================================================================")
  258.         print(time.strftime("%d/%m/%Y %H:%M:%S"))
  259.         print("===================================================================\n")
  260.         missing_imdb_list, list_count = run_imdb_list(lista_imdb= cada_lista)
  261.         if(list_count == 0):
  262.             continue
  263.         print("\n===================================================================\n")
  264.         print("Number of IMDB movies in the library: {count}".format(count=list_count - len(missing_imdb_list)))
  265.         print("Number of missing IMDB movies: {count}".format(count=len(missing_imdb_list)))
  266.         print("\nList of missing movies:\n")
  267.         for idx, (imdb_id, title, year) in missing_imdb_list:
  268.             print("{idx}\t{imdb_id}\t{title} {year}".format(idx=idx + 1, imdb_id=imdb_id, title=title, year=year))
  269.         print("\n===================================================================")
  270.         print("                               Done!                               ")
  271.         print("===================================================================\n")
  272.         archivo_salida = IMDB_COLLECTION_NAME + '.txt'
  273.         f = open(archivo_salida,'w')
  274.         f.write("===================================================================\n")
  275.         f.write(time.strftime("%d/%m/%Y  %H:%M:%S") )
  276.         f.write(IMDB_COLLECTION_NAME)
  277.         f.write("\n===================================================================\n")
  278.         f.write("\nNumber of IMDB movies in the library: {count}".format(count=list_count - len(missing_imdb_list)))
  279.         f.write("\nNumber of missing IMDB movies: {count}\n".format(count=len(missing_imdb_list)))
  280.         f.write("\nList of missing movies:")
  281.         for idx, (imdb_id, title, year) in missing_imdb_list:
  282.             f.write("\n{idx}\t{imdb_id}\t{title} {year}".format(idx=idx + 1, imdb_id=imdb_id, title=title, year=year))
  283.         f.write("\n===================================================================")
  284.         f.write("\n                               Done!                               ")
  285.         f.write("\n===================================================================")       
  286.         f.close()                                          
  287.     #input("Press Enter to finish...")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement