Advertisement
aristotle029

Final_version_of_sieve

Mar 28th, 2024 (edited)
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.95 KB | None | 0 0
  1. from pyspark import SparkContext
  2. from math import isqrt, log10
  3.  
  4. # Generate a sieve up to the square root of the limit. This is done in a sequential manner
  5. def generate_sieve_upto_sqrt(limit):
  6.     sieve_to_return = []
  7.     sieve_bound = isqrt(limit) + 1
  8.     sieve = [True] * sieve_bound
  9.     sieve[0], sieve[1] = False, False  # 0 and 1 are not primes
  10.    
  11.     for num in range(2, isqrt(sieve_bound) + 1):
  12.         if sieve[num]:
  13.             for multiple in range(num*num, sieve_bound, num):
  14.                 sieve[multiple] = False
  15.    
  16.     for num, is_prime in enumerate(sieve):
  17.         if is_prime:
  18.             sieve_to_return.append(num)
  19.    
  20.     return sieve_to_return
  21.  
  22. # Use the small sieve to filter primes within a segment
  23. def apply_sieve_on_segment(segment, small_sieve):
  24.     # Initialize the start and end of the segment
  25.     segment_start, segment_end = segment
  26.     if segment_start < 2:
  27.         segment_start = 2
  28.    
  29.     segment_sieve = [True] * (segment_end - segment_start + 1)
  30.     primes_in_segment = []
  31.    
  32.     for prime in small_sieve:
  33.         first_multiple = max(prime*prime, ((segment_start + prime - 1) // prime) * prime)
  34.         # Remove all the multiples of the current prime
  35.         for multiple in range(first_multiple, segment_end + 1, prime):
  36.             segment_sieve[multiple - segment_start] = False
  37.    
  38.     # Add all the prime numbers to the result list to return
  39.     for num in range(segment_start, segment_end + 1):
  40.         # Append the number to the list if the number corresponding to the current index is True
  41.         if segment_sieve[num - segment_start]:
  42.             primes_in_segment.append(num)
  43.    
  44.     return primes_in_segment
  45.  
  46. if __name__ == "__main__":
  47.     sc = SparkContext.getOrCreate()
  48.     max_num = 10**7 # 10 Million
  49.  
  50.     """
  51.    Generate a small sieve to be used at each node for efficient filtering.
  52.    The advantage of creating such a sieve lies in the reduced number of comparisons.
  53.    For a large number like 1 Trillion (10**12), we need to create a small sieve of numbers
  54.    only upto 10**6, which will have only 78k prime numbers. Thus, the total number of comparisons
  55.    our segment requires will be much less. Distribute this small sieve along with each segment and
  56.    perform our comparisons.
  57.    """
  58.     small_sieve = generate_sieve_upto_sqrt(max_num)
  59.  
  60.     num_slices = int(2 ** log10(max_num))
  61.     range_step = max_num // num_slices
  62.      
  63.     # Create RDD of ranges (segments) to distribute. A range is denoted by tuple of (start_num, end_num)
  64.     ranges = [(i, min(i + range_step, max_num)) for i in range(2, max_num+1, range_step)]
  65.     ranges_rdd = sc.parallelize(ranges, numSlices=num_slices)
  66.      
  67.     # Apply the sieve on each segment in parallel
  68.     primes_rdd = ranges_rdd.flatMap(lambda segment: apply_sieve_on_segment(segment, small_sieve))
  69.      
  70.     # primes = primes_rdd.collect()
  71.  
  72.     # Save primes to text file
  73.     primes_rdd.map(str).saveAsTextFile("primes")
  74.    
  75.    
  76.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement