Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark import SparkContext
- from math import isqrt, sqrt
- def generate_initial_sieve(limit):
- """Generate a sieve up to `limit`."""
- sieve = [True] * (limit + 1)
- primes = []
- for p in range(2, len(sieve)):
- if sieve[p]:
- primes.append(p)
- for i in range(p*p, len(sieve), p):
- sieve[i] = False
- return primes
- def generate_primes_in_range(start, end, broadcast_sieve):
- """Generate primes in the range [start, end) using the sieve broadcasted."""
- sieve = broadcast_sieve.value
- return [num for num in range(start, end) if all(num % p != 0 for p in sieve if p*p <= num)]
- def iterative_prime_generation(max_num, manageable_limit):
- sc = SparkContext.getOrCreate()
- small_sieve_limit = manageable_limit
- while small_sieve_limit**2 < max_num:
- small_sieve_limit = small_sieve_limit**2
- # Generate initial sieve
- initial_primes = generate_initial_sieve(isqrt(small_sieve_limit))
- broadcast_sieve = sc.broadcast(initial_primes)
- current_limit = manageable_limit
- all_primes = initial_primes.copy() # Start with initial primes
- while current_limit < max_num:
- next_limit = min(current_limit**2, max_num)
- # Create RDD to distribute the generation of primes in the current range
- range_rdd = sc.parallelize(range(current_limit+1, next_limit+1), numSlices=1000)
- primes_rdd = range_rdd.filter(lambda x: generate_primes_in_range(x, x+1, broadcast_sieve)).flatMap(lambda x: x)
- current_primes = primes_rdd.collect()
- all_primes.extend(current_primes)
- current_limit = next_limit
- return all_primes
- # Example usage
- max_num = 100 * 10**12 # 100 trillion
- manageable_limit = 10**7 # 10 million
- primes = iterative_prime_generation(max_num, manageable_limit)
- print(f"Generated {len(primes)} primes up to {max_num}.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement