Advertisement
Bisqwit

Mandelbrot SIMD calculation snippets

May 27th, 2017
434
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 8.50 KB | None | 0 0
  1. /**
  2.  * Mandelbrot fractal portable SIMD calculation core
  3.  * Copyright (c) 2017 Joel Yliluoma (http://iki.fi/bisqwit/)
  4.  *
  5.  * T is the underlying (real) numeric type, with which the calculations
  6.  * are peformed. It is ideally float or double.
  7.  *
  8.  * N is the number of mandelbrot coordinates to calculate in parallel (vector size).
  9.  * Ideally N should be a small integer multiple of the largest native vector size for T.
  10.  * Some options:
  11.  *                MMX SSE SSE2 AVX AVX512 Altivec Neon
  12.  *   ------------ --- --- ---- --- ------ ------- ----
  13.  *   float16      *   *   *    *   *      *       8
  14.  *   float        2   4   4    8   16     4       4
  15.  *   double       1   1   2    4   8      1       2
  16.  *   long double  1   1   1    1   1      1       1
  17.  *
  18.  * MaxIter is the maximum number of iterations to perform.
  19.  *
  20.  * If do_smooth is enabled, points that are outside the set are given
  21.  * a smooth (floating point) iteration count rather than a discrete
  22.  * (integral) iteration count. This incurs a slight performance penalty.
  23.  * If you use this, you should also set iter_mul to e.g. 4096.
  24.  *
  25.  * If do_periodicity_checking is enabled, light-weight periodicity checking
  26.  * is enabled to improve the performance significantly for points that are
  27.  * inside the set. It has a slight performance penalty for points that are outside the set.
  28.  *
  29.  * If do_logarithmic_iteration_counts is enabled, a log() function is applied
  30.  * to the returned iteration counts. You should also set iter_mul to e.g. 4096.
  31.  *
  32.  * The returned iteration counts (float) are multiplied by iter_mul before
  33.  * they are converted into integers and returned (fixed-point).
  34.  */
  35. #include <array>
  36. #include <cmath>
  37. template<typename T, unsigned N,
  38.          bool do_smooth = true                        /* S */,
  39.          bool do_periodicity_checking = true          /* P */,
  40.          bool do_logarithmic_iteration_counts = false /* L */,
  41.          unsigned iter_mul = 1                        /* I */>
  42. struct Calculator
  43. {
  44.     static constexpr T color_scale           = 8;
  45.     static constexpr T escape_radius_squared = do_smooth ? 6*6 : 4;
  46.  
  47.     typedef std::array<T,   N> dvec;
  48.     typedef std::array<int, N> bvec;
  49.  
  50.     dvec x,y;           // The "c", complex coordinate we are iterating
  51.     dvec r,i;           // The "z", temporary complex number
  52.     dvec dist;          // Last recorded squared distance before iteration was stopped
  53.     //                  // If do_smooth is false, it continues being updated
  54.     //                  // even then, for a moderate improvement in performance.
  55.     dvec perr,peri;     // A recorded sample of "z" used in periodicity checking
  56.     bvec escaped;       // Flag used to stop iterating (indicates whether an escape happened)
  57.     bvec iter;          // The iteration count
  58.     bvec maxiter;       // Maximum iteration count
  59.  
  60.     /* Start(): Initialize calculation for the specified number of maximum iterations, at given coordinates */
  61.     Calculator(unsigned MaxIter, const T* cr, const T* ci)
  62.     {
  63.         #pragma omp simd
  64.         for(unsigned n=0; n<N; ++n)
  65.         {
  66.             x[n]       = cr[n];
  67.             y[n]       = ci[n];
  68.             r[n]       = cr[n];
  69.             i[n]       = ci[n];
  70.             maxiter[n] = MaxIter;
  71.             iter[n]    = MaxIter;
  72.             perr[n]    = 0;
  73.             peri[n]    = 0;
  74.             dist[n]    = 0;
  75.             escaped[n] = 0;
  76.         }
  77.     }
  78.  
  79.     /* End(): Returns true if all N coordinates have escaped or MaxIter iterations has been reached */
  80.     bool End() const
  81.     {
  82.         typename bvec::value_type n_escaped = 0;
  83.         #pragma omp simd
  84.         for(unsigned n=0; n<N; ++n) n_escaped += escaped[n];
  85.         return n_escaped >= typename bvec::value_type(N);
  86.     }
  87.  
  88.     /* Round(): Performs one iteration for all N coordinates */
  89.     void Round()
  90.     {
  91.         dvec r2, i2, ri;
  92.         /* Note: GCC generates slightly better performing code, when this code is
  93.          * in a single loop than when the statements are each in separate loops.
  94.          * However, if one of the statements fails to be vectorized, the entire loop
  95.          * will be de-vectorized. If that happens (inspect the code with gcc -S!),
  96.          * try splitting the loop into separate consecutive loops.
  97.          */
  98.         #pragma omp simd
  99.         for(unsigned n=0; n<N; ++n) { r2[n] = (r[n] * r[n]);
  100.                                       i2[n] = (i[n] * i[n]);
  101.                                       if(!do_smooth || !escaped[n]) { dist[n] = r2[n] + i2[n]; }
  102.                                       escaped[n] = escaped[n] || !iter[n] || (dist[n] >= escape_radius_squared);
  103.                                       iter[n] -= !escaped[n];
  104.                                       ri[n] = (r[n] * i[n]);
  105.                                       i[n] = y[n] + (ri[n] * 2);
  106.                                       r[n] = x[n] + (r2[n] - i2[n]); }
  107.  
  108.         /* Periodicity checking produces a significant speedup for coordinates
  109.          * that are inside the set (especially if they go into a cycle early on),
  110.          * but at a cost of a slight performance penalty for coordinates that are not.
  111.          */
  112.         if(do_periodicity_checking)
  113.         {
  114.             bvec moment;
  115.             /* When the number of remaining iterations is a power of two,
  116.              * save the current complex number (r & i).
  117.              * Otherwise, compare it to the saved number. If they match, we are inside the set,
  118.              * and can set the iteration count to max immediately.
  119.              */
  120.             #pragma omp simd
  121.             for(unsigned n=0; n<N; ++n) { int tmp = iter[n]; moment[n] = tmp & (tmp-1); }
  122.             #pragma omp simd
  123.             for(unsigned n=0; n<N; ++n) { if(!moment[n])                              { perr[n] = r[n]; peri[n] = i[n]; }
  124.                                           else if(r[n] == perr[n] && i[n] == peri[n]) { iter[n] = 0; } }
  125.         }
  126.     }
  127.  
  128.     /* Get(): Get the translation iteration counts for all N coordinates according
  129.      *        to the current progress (call after End()==true, but can be called before) */
  130.     std::array<int,N> Get() const
  131.     {
  132.         std::array<int,N> out;
  133.         if(!do_logarithmic_iteration_counts && iter_mul == 1)
  134.         {
  135.             #pragma omp simd
  136.             for(unsigned n=0; n<N; ++n) out[n] = maxiter[n] - iter[n];
  137.             return out;
  138.         }
  139.         dvec iter_f;
  140.         #pragma omp simd
  141.         for(unsigned n=0; n<N; ++n) iter_f[n] = maxiter[n] - iter[n];
  142.         #pragma omp simd
  143.         for(unsigned n=0; n<N; ++n)
  144.             if(do_smooth && iter[n] != 0)
  145.                 iter_f[n] += 1 - std::log2(std::log2(dist[n]) / T(2));
  146.         #pragma omp simd
  147.         for(unsigned n=0; n<N; ++n) iter_f[n] = (do_logarithmic_iteration_counts ? std::log(iter_f[n]) : iter_f[n]) * color_scale;
  148.         #pragma omp simd
  149.         for(unsigned n=0; n<N; ++n) out[n] = iter_f[n] * iter_mul;
  150.         return out;
  151.     }
  152. };
  153.  
  154. /* Utility example: Calculate for N coordinates, return iteration counts as array */
  155. template<unsigned N, bool S,bool P=true,bool L=true,unsigned I, typename T>
  156. std::array<int,N> Iterate(unsigned MaxIter, const T* x, const T* y)
  157. {
  158.     Calculator<T,N,  S,P,L,I> calc(MaxIter, x,y);
  159.     while(!calc.End()) { calc.Round(); }
  160.     return calc.Get();
  161. }
  162.  
  163. /* Utility example: Calculate for N coordinates passed as std::array, return iteration counts as array */
  164. template<bool S,bool P=true,bool L=true,unsigned I, typename T,std::size_t N>
  165. std::array<int,N> Iterate(unsigned MaxIter, const std::array<T,N>& x, const std::array<T,N>& y)
  166. {
  167.     return Iterate<N, S,P,L,I>(MaxIter, &x[0], &y[0]);
  168. }
  169.  
  170. /* Utility example: Calculate for N coordinates where N is not a compile-time constant */
  171. template<bool S,bool P=true,bool L=true,unsigned I, typename T>
  172. void Iterate(unsigned MaxIter, const T* x, const T* y, int* out, unsigned N)
  173. {
  174.     while(N >= 16)
  175.     {
  176.         auto tmp = Iterate<16, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n<16; ++n) out[n] = tmp[n];
  177.         x += 16; y += 16; out += 16; N -= 16;
  178.     }
  179.     if(N == 8)
  180.     {
  181.         auto tmp = Iterate< 8, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 8; ++n) out[n] = tmp[n];
  182.         return;
  183.     }
  184.     while(N >= 4)
  185.     {
  186.         auto tmp = Iterate< 4, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 4; ++n) out[n] = tmp[n];
  187.         x += 4; y += 4; out += 4; N -= 4;
  188.     }
  189.     while(N >= 1)
  190.     {
  191.         auto tmp = Iterate< 1, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 1; ++n) out[n] = tmp[n];
  192.         x += 1; y += 1; out += 1; N -= 1;
  193.     }
  194. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement