Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Mandelbrot fractal portable SIMD calculation core
- * Copyright (c) 2017 Joel Yliluoma (http://iki.fi/bisqwit/)
- *
- * T is the underlying (real) numeric type, with which the calculations
- * are peformed. It is ideally float or double.
- *
- * N is the number of mandelbrot coordinates to calculate in parallel (vector size).
- * Ideally N should be a small integer multiple of the largest native vector size for T.
- * Some options:
- * MMX SSE SSE2 AVX AVX512 Altivec Neon
- * ------------ --- --- ---- --- ------ ------- ----
- * float16 * * * * * * 8
- * float 2 4 4 8 16 4 4
- * double 1 1 2 4 8 1 2
- * long double 1 1 1 1 1 1 1
- *
- * MaxIter is the maximum number of iterations to perform.
- *
- * If do_smooth is enabled, points that are outside the set are given
- * a smooth (floating point) iteration count rather than a discrete
- * (integral) iteration count. This incurs a slight performance penalty.
- * If you use this, you should also set iter_mul to e.g. 4096.
- *
- * If do_periodicity_checking is enabled, light-weight periodicity checking
- * is enabled to improve the performance significantly for points that are
- * inside the set. It has a slight performance penalty for points that are outside the set.
- *
- * If do_logarithmic_iteration_counts is enabled, a log() function is applied
- * to the returned iteration counts. You should also set iter_mul to e.g. 4096.
- *
- * The returned iteration counts (float) are multiplied by iter_mul before
- * they are converted into integers and returned (fixed-point).
- */
- #include <array>
- #include <cmath>
- template<typename T, unsigned N,
- bool do_smooth = true /* S */,
- bool do_periodicity_checking = true /* P */,
- bool do_logarithmic_iteration_counts = false /* L */,
- unsigned iter_mul = 1 /* I */>
- struct Calculator
- {
- static constexpr T color_scale = 8;
- static constexpr T escape_radius_squared = do_smooth ? 6*6 : 4;
- typedef std::array<T, N> dvec;
- typedef std::array<int, N> bvec;
- dvec x,y; // The "c", complex coordinate we are iterating
- dvec r,i; // The "z", temporary complex number
- dvec dist; // Last recorded squared distance before iteration was stopped
- // // If do_smooth is false, it continues being updated
- // // even then, for a moderate improvement in performance.
- dvec perr,peri; // A recorded sample of "z" used in periodicity checking
- bvec escaped; // Flag used to stop iterating (indicates whether an escape happened)
- bvec iter; // The iteration count
- bvec maxiter; // Maximum iteration count
- /* Start(): Initialize calculation for the specified number of maximum iterations, at given coordinates */
- Calculator(unsigned MaxIter, const T* cr, const T* ci)
- {
- #pragma omp simd
- for(unsigned n=0; n<N; ++n)
- {
- x[n] = cr[n];
- y[n] = ci[n];
- r[n] = cr[n];
- i[n] = ci[n];
- maxiter[n] = MaxIter;
- iter[n] = MaxIter;
- perr[n] = 0;
- peri[n] = 0;
- dist[n] = 0;
- escaped[n] = 0;
- }
- }
- /* End(): Returns true if all N coordinates have escaped or MaxIter iterations has been reached */
- bool End() const
- {
- typename bvec::value_type n_escaped = 0;
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) n_escaped += escaped[n];
- return n_escaped >= typename bvec::value_type(N);
- }
- /* Round(): Performs one iteration for all N coordinates */
- void Round()
- {
- dvec r2, i2, ri;
- /* Note: GCC generates slightly better performing code, when this code is
- * in a single loop than when the statements are each in separate loops.
- * However, if one of the statements fails to be vectorized, the entire loop
- * will be de-vectorized. If that happens (inspect the code with gcc -S!),
- * try splitting the loop into separate consecutive loops.
- */
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) { r2[n] = (r[n] * r[n]);
- i2[n] = (i[n] * i[n]);
- if(!do_smooth || !escaped[n]) { dist[n] = r2[n] + i2[n]; }
- escaped[n] = escaped[n] || !iter[n] || (dist[n] >= escape_radius_squared);
- iter[n] -= !escaped[n];
- ri[n] = (r[n] * i[n]);
- i[n] = y[n] + (ri[n] * 2);
- r[n] = x[n] + (r2[n] - i2[n]); }
- /* Periodicity checking produces a significant speedup for coordinates
- * that are inside the set (especially if they go into a cycle early on),
- * but at a cost of a slight performance penalty for coordinates that are not.
- */
- if(do_periodicity_checking)
- {
- bvec moment;
- /* When the number of remaining iterations is a power of two,
- * save the current complex number (r & i).
- * Otherwise, compare it to the saved number. If they match, we are inside the set,
- * and can set the iteration count to max immediately.
- */
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) { int tmp = iter[n]; moment[n] = tmp & (tmp-1); }
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) { if(!moment[n]) { perr[n] = r[n]; peri[n] = i[n]; }
- else if(r[n] == perr[n] && i[n] == peri[n]) { iter[n] = 0; } }
- }
- }
- /* Get(): Get the translation iteration counts for all N coordinates according
- * to the current progress (call after End()==true, but can be called before) */
- std::array<int,N> Get() const
- {
- std::array<int,N> out;
- if(!do_logarithmic_iteration_counts && iter_mul == 1)
- {
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) out[n] = maxiter[n] - iter[n];
- return out;
- }
- dvec iter_f;
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) iter_f[n] = maxiter[n] - iter[n];
- #pragma omp simd
- for(unsigned n=0; n<N; ++n)
- if(do_smooth && iter[n] != 0)
- iter_f[n] += 1 - std::log2(std::log2(dist[n]) / T(2));
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) iter_f[n] = (do_logarithmic_iteration_counts ? std::log(iter_f[n]) : iter_f[n]) * color_scale;
- #pragma omp simd
- for(unsigned n=0; n<N; ++n) out[n] = iter_f[n] * iter_mul;
- return out;
- }
- };
- /* Utility example: Calculate for N coordinates, return iteration counts as array */
- template<unsigned N, bool S,bool P=true,bool L=true,unsigned I, typename T>
- std::array<int,N> Iterate(unsigned MaxIter, const T* x, const T* y)
- {
- Calculator<T,N, S,P,L,I> calc(MaxIter, x,y);
- while(!calc.End()) { calc.Round(); }
- return calc.Get();
- }
- /* Utility example: Calculate for N coordinates passed as std::array, return iteration counts as array */
- template<bool S,bool P=true,bool L=true,unsigned I, typename T,std::size_t N>
- std::array<int,N> Iterate(unsigned MaxIter, const std::array<T,N>& x, const std::array<T,N>& y)
- {
- return Iterate<N, S,P,L,I>(MaxIter, &x[0], &y[0]);
- }
- /* Utility example: Calculate for N coordinates where N is not a compile-time constant */
- template<bool S,bool P=true,bool L=true,unsigned I, typename T>
- void Iterate(unsigned MaxIter, const T* x, const T* y, int* out, unsigned N)
- {
- while(N >= 16)
- {
- auto tmp = Iterate<16, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n<16; ++n) out[n] = tmp[n];
- x += 16; y += 16; out += 16; N -= 16;
- }
- if(N == 8)
- {
- auto tmp = Iterate< 8, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 8; ++n) out[n] = tmp[n];
- return;
- }
- while(N >= 4)
- {
- auto tmp = Iterate< 4, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 4; ++n) out[n] = tmp[n];
- x += 4; y += 4; out += 4; N -= 4;
- }
- while(N >= 1)
- {
- auto tmp = Iterate< 1, S,P,L,I>(MaxIter, x,y); for(unsigned n=0; n< 1; ++n) out[n] = tmp[n];
- x += 1; y += 1; out += 1; N -= 1;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement