Advertisement
jules0707

degrees of separation

Mar 17th, 2023 (edited)
25
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.76 KB | None | 0 0
  1. import csv
  2. import sys
  3.  
  4. from util import Node, StackFrontier, QueueFrontier
  5.  
  6. # Maps names to a set of corresponding person_ids
  7. names = {}
  8.  
  9. # Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
  10. people = {}
  11.  
  12. # Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
  13. movies = {}
  14.  
  15. explored = set() # the explored set of movies for a search run
  16. frontier = QueueFrontier() # a queue of nodes used to discover path in a Breath First Search way
  17.  
  18. def load_data(directory):
  19.     """
  20.    Load data from CSV files into memory.
  21.    """
  22.     # Load people
  23.     with open(f"{directory}/people.csv", encoding="utf-8") as f:
  24.         reader = csv.DictReader(f)
  25.         for row in reader:
  26.             people[row["id"]] = {
  27.                 "name": row["name"],
  28.                 "birth": row["birth"],
  29.                 "movies": set()
  30.             }
  31.             if row["name"].lower() not in names:
  32.                 names[row["name"].lower()] = {row["id"]}
  33.             else:
  34.                 names[row["name"].lower()].add(row["id"])
  35.  
  36.     # Load movies
  37.     with open(f"{directory}/movies.csv", encoding="utf-8") as f:
  38.         reader = csv.DictReader(f)
  39.         for row in reader:
  40.             movies[row["id"]] = {
  41.                 "title": row["title"],
  42.                 "year": row["year"],
  43.                 "stars": set()
  44.             }
  45.  
  46.     # Load stars
  47.     with open(f"{directory}/stars.csv", encoding="utf-8") as f:
  48.         reader = csv.DictReader(f)
  49.         for row in reader:
  50.             try:
  51.                 people[row["person_id"]]["movies"].add(row["movie_id"])
  52.                 movies[row["movie_id"]]["stars"].add(row["person_id"])
  53.             except KeyError:
  54.                 pass
  55.  
  56. def main():
  57.     if len(sys.argv) > 2:
  58.         sys.exit("Usage: python degrees.py [directory]")
  59.     directory = sys.argv[1] if len(sys.argv) == 2 else "large"
  60.  
  61.     # Load data from files into memory
  62.     print("Loading data...")
  63.     load_data(directory)
  64.     print("Data loaded.")
  65.  
  66.     source = person_id_for_name(input("Name: "))
  67.     if source is None:
  68.         sys.exit("Person not found.")
  69.     target = person_id_for_name(input("Name: "))
  70.     if target is None:
  71.         sys.exit("Person not found.")
  72.    
  73.     path = shortest_path(source, target)
  74.  
  75.     if path is None:
  76.         print("Not connected.")
  77.     else:
  78.         degrees = len(path)
  79.         print(f"{degrees} degrees of separation.")
  80.         path = [(None, source)] + path
  81.         for i in range(degrees):
  82.             person1 = people[path[i][1]]["name"]
  83.             person2 = people[path[i + 1][1]]["name"]
  84.             movie = movies[path[i + 1][0]]["title"]
  85.             print(f"{i + 1}: {person1} and {person2} starred in {movie}")
  86.    
  87.  
  88. def shortest_path(source, target):
  89.     """
  90.    Returns the shortest list of (movie_id, person_id) pairs
  91.    that connect the source to the target.
  92.  
  93.    If no possible path, returns None.
  94.    """
  95.     start = Node(state=source, parent=None, action=None)
  96.     frontier.add(start) # we add the starting node to the queue frontier
  97.  
  98.     while True:
  99.        
  100.         if frontier.empty():
  101.             raise Exception("no solution")
  102.          
  103.         node = frontier.remove() # pop node from queue
  104.  
  105.         if node.state == target: # found it!
  106.            return built_path(node)
  107.            
  108.         neighbors = [] # a container of all the neighbors of that node
  109.         neighbors = neighbors_for_person(node.state)
  110.  
  111.         for neighbor in neighbors:
  112.             if neighbor[0] not in explored: # this movie path is unexplored
  113.                 if neighbor[1] == node.state: continue # do not backtrack, check next neighbor
  114.                 if neighbor[1] == target: # found it!
  115.                     child = Node(state=neighbor[1], parent=node, action=neighbor[0])
  116.                     return built_path(child)
  117.                 if not frontier.contains_state(neighbor[1]): # keep searching
  118.                     child = Node(state=neighbor[1], parent=node, action=neighbor[0])
  119.                     frontier.add(child)
  120.         explored.add(neighbor[0])
  121.  
  122.  
  123. # we back track to follow the nodes that lead to the target and reverse
  124. def built_path(nd):
  125.     path = []
  126.     while nd.parent is not None:
  127.         path.append((nd.action,nd.state))
  128.         nd = nd.parent
  129.     path.reverse()
  130.     return path
  131.  
  132.  
  133. def person_id_for_name(name):
  134.     """
  135.    Returns the IMDB id for a person's name,
  136.    resolving ambiguities as needed.
  137.    """
  138.     person_ids = list(names.get(name.lower(), set()))
  139.     if len(person_ids) == 0:
  140.         return None
  141.     elif len(person_ids) > 1:
  142.         print(f"Which '{name}'?")
  143.         for person_id in person_ids:
  144.             person = people[person_id]
  145.             name = person["name"]
  146.             birth = person["birth"]
  147.             print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
  148.         try:
  149.             person_id = input("Intended Person ID: ")
  150.             if person_id in person_ids:
  151.                 return person_id
  152.         except ValueError:
  153.             pass
  154.         return None
  155.     else:
  156.         return person_ids[0]
  157.  
  158.  
  159. def neighbors_for_person(person_id):
  160.     """
  161.    Returns (movie_id, person_id) pairs for people
  162.    who starred with a given person.
  163.    """
  164.     source_actor = person_id
  165.     movie_ids = people[person_id]["movies"]
  166.     neighbors = set()
  167.     for movie_id in movie_ids:
  168.         if movie_id not in explored:  
  169.            for actor in movies[movie_id]["stars"]: # do not add neighbor if already in the frontier because it will take longer +1 extra movie
  170.             if actor != source_actor and not frontier.contains_state(actor): # do not add source to the neighbors' list extra cost
  171.                 neighbors.add((movie_id, actor))
  172.     return neighbors
  173.  
  174.  
  175.  
  176. if __name__ == "__main__":
  177.     main()
  178.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement