Cluster near duplicate Union Find

"""
We have a near duplicate pipeline at Pinterest,
which produces a mapping from every image to a list of up to k near-duplicate images, such as:
near_dups = {
"A": ["B", "I", "K"],
"B": ["A", "D"],
"C": ["E"],
"D": [],
"E": [],
"F": [],
"G": ["K"],
"I": [],
"K": [],
}

Given a mapping such as this one, form neardup clusters: collections of almost identical images.
In the example above, we would form the following groups: (A, B, D, I, G, K), (C, E), and (F).**
"""

class UnionFind:
    """
    Union-Find (Disjoint Set Union) class to group elements into clusters.

    Time Complexity: O(α(n)), where α is the Inverse Ackermann function, very slow-growing.
    Space Complexity: O(n), where n is the number of elements.
    """

    def __init__(self):
        self.parent = {}
        self.rank = {}

    def find(self, item):
        """Find the root representative of the item."""
        if self.parent[item] != item:
            self.parent[item] = self.find(self.parent[item])  # Path compression
        return self.parent[item]

    def union(self, item1, item2):
        """Union the sets that contain item1 and item2."""
        root1 = self.find(item1)
        root2 = self.find(item2)
        if root1 != root2:
            # Union by rank
            if self.rank[root1] > self.rank[root2]:
                self.parent[root2] = root1
            elif self.rank[root1] < self.rank[root2]:
                self.parent[root1] = root2
            else:
                self.parent[root2] = root1
                self.rank[root1] += 1

    def add(self, item):
        """Add an item to the union-find structure."""
        if item not in self.parent:
            self.parent[item] = item
            self.rank[item] = 0

def form_near_dup_clusters(near_dups):
    """
    Form near-duplicate clusters from the given mapping.

    Time Complexity: O(len(near_dups) * average_size(near_dups.values()))
    Space Complexity: O(len(near_dups))
    """
    uf = UnionFind()

    # Add all images to the Union-Find structure
    for image in near_dups.keys():
        uf.add(image)

    # Union images that are near duplicates
    for image, duplicates in near_dups.items():
        for dup in duplicates:
            uf.union(image, dup)

    # Collect clusters
    clusters = {}
    for image in near_dups.keys():
        root = uf.find(image)
        if root not in clusters:
            clusters[root] = []
        clusters[root].append(image)

    return list(clusters.values())

near_dups = {
    "A": ["B", "I", "K"],
    "B": ["A", "D"],
    "C": ["E"],
    "D": [],
    "E": [],
    "F": [],
    "G": ["K"],
    "I": [],
    "K": [],
}

clusters = form_near_dup_clusters(near_dups)
print(clusters)  # Example output: [['A', 'B', 'D', 'I', 'G', 'K'], ['C', 'E'], ['F']]