dan-masek

Hacking BGR to YUV 422... take 2 (code)

Apr 23rd, 2018
1,107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 14.51 KB | None | 0 0
  1. #include <opencv2/opencv.hpp>
  2.  
  3. #include <chrono>
  4. #include <cstdint>
  5. // ============================================================================
  6. namespace hal {
  7. // ============================================================================
  8. using namespace cv;
  9. // ----------------------------------------------------------------------------
  10. const int ITUR_BT_601_CY = 1220542;
  11. const int ITUR_BT_601_CUB = 2116026;
  12. const int ITUR_BT_601_CUG = -409993;
  13. const int ITUR_BT_601_CVG = -852492;
  14. const int ITUR_BT_601_CVR = 1673527;
  15. const int ITUR_BT_601_SHIFT = 20;
  16.  
  17. // Coefficients for RGB to YUV420p conversion
  18. const int ITUR_BT_601_CRY = 269484;
  19. const int ITUR_BT_601_CGY = 528482;
  20. const int ITUR_BT_601_CBY = 102760;
  21. const int ITUR_BT_601_CRU = -155188;
  22. const int ITUR_BT_601_CGU = -305135;
  23. const int ITUR_BT_601_CBU = 460324;
  24. const int ITUR_BT_601_CGV = -385875;
  25. const int ITUR_BT_601_CBV = -74448;
  26. // ----------------------------------------------------------------------------
  27. template<int bIdx, int uIdx, int yIdx, int cn>
  28. struct RGB888toYUV422Invoker : ParallelLoopBody
  29. {
  30.     uchar * dst_data;
  31.     size_t dst_step;
  32.     const uchar * src_data;
  33.     size_t src_step;
  34.     int width;
  35.  
  36.     RGB888toYUV422Invoker(uchar * _dst_data, size_t _dst_step,
  37.         const uchar * _src_data, size_t _src_step,
  38.         int _width)
  39.         : dst_data(_dst_data), dst_step(_dst_step), src_data(_src_data), src_step(_src_step), width(_width)
  40.     {
  41.     }
  42.  
  43.     void operator()(const Range& range) const
  44.     {
  45.         const int uidx = 1 - yIdx + uIdx * 2;
  46.         const int vidx = (2 + uidx) % 4;
  47.  
  48.         for (int j = range.start; j < range.end; j++) {
  49.             const uchar* row = src_data + src_step * j;
  50.             const uchar* brow = row + bIdx;
  51.             const uchar* grow = row + 1;
  52.             const uchar* rrow = row + (2 - bIdx);
  53.  
  54.             uchar* yuv_row = dst_data + dst_step * j;
  55.  
  56.             for (int i = 0, k = 0; i < (width * cn); i += (2 * cn), k += 4) {
  57.                 int b0(brow[i]), g0(grow[i]), r0(rrow[i]);
  58.                 int b1(brow[cn + i]), g1(grow[cn + i]), r1(rrow[cn + i]);
  59.  
  60.                 const int shifted16 = (16 << ITUR_BT_601_SHIFT);
  61.                 const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
  62.                 int y0 = ITUR_BT_601_CRY * r0 + ITUR_BT_601_CGY * g0 + ITUR_BT_601_CBY * b0 + halfShift + shifted16;
  63.                 int y1 = ITUR_BT_601_CRY * r1 + ITUR_BT_601_CGY * g1 + ITUR_BT_601_CBY * b1 + halfShift + shifted16;
  64.                 yuv_row[k + yIdx] = saturate_cast<uchar>(y0 >> ITUR_BT_601_SHIFT);
  65.                 yuv_row[k + yIdx + 2] = saturate_cast<uchar>(y1 >> ITUR_BT_601_SHIFT);
  66.  
  67.  
  68.                 const int shifted128 = (128 << ITUR_BT_601_SHIFT);
  69.                 int u0 = ITUR_BT_601_CRU * r0 + ITUR_BT_601_CGU * g0 + ITUR_BT_601_CBU * b0 + halfShift + shifted128;
  70.                 int v0 = ITUR_BT_601_CBU * r0 + ITUR_BT_601_CGV * g0 + ITUR_BT_601_CBV * b0 + halfShift + shifted128;
  71.                 yuv_row[k + uidx] = saturate_cast<uchar>(u0 >> ITUR_BT_601_SHIFT);
  72.                 yuv_row[k + vidx] = saturate_cast<uchar>(v0 >> ITUR_BT_601_SHIFT);
  73.             }
  74.         }
  75.     }
  76. };
  77. // ----------------------------------------------------------------------------
  78. #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240)
  79. // ----------------------------------------------------------------------------
  80. template<int bIdx, int uIdx, int yIdx, int cn>
  81. inline void cvtRGBtoYUV422(uchar * dst_data, size_t dst_step, const uchar * src_data, size_t src_step,
  82.     int width, int height)
  83. {
  84.     RGB888toYUV422Invoker<bIdx, uIdx, yIdx, cn> converter(dst_data, dst_step, src_data, src_step, width);
  85.     if (width * height >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
  86.         parallel_for_(Range(0, height), converter);
  87.     else
  88.         converter(Range(0, height));
  89. }
  90. // ----------------------------------------------------------------------------
  91. void cvtBGRtoOnePlaneYUV(const uchar * src_data, size_t src_step,
  92.     uchar * dst_data, size_t dst_step,
  93.     int width, int height,
  94.     int scn, bool swapBlue, int uIdx, int ycn)
  95. {
  96.     // CV_INSTRUMENT_REGION();
  97.  
  98.     // TODO: add hal replacement method
  99.     int blueIdx = swapBlue ? 2 : 0;
  100.     switch (scn * 1000 + blueIdx * 100 + uIdx * 10 + ycn) {
  101.     case 3000: cvtRGBtoYUV422<0, 0, 0, 3>(dst_data, dst_step, src_data, src_step, width, height); break;
  102.     case 3001: cvtRGBtoYUV422<0, 0, 1, 3>(dst_data, dst_step, src_data, src_step, width, height); break;
  103.     case 3010: cvtRGBtoYUV422<0, 1, 0, 3>(dst_data, dst_step, src_data, src_step, width, height); break;
  104.     case 3200: cvtRGBtoYUV422<2, 0, 0, 3>(dst_data, dst_step, src_data, src_step, width, height); break;
  105.     case 3201: cvtRGBtoYUV422<2, 0, 1, 3>(dst_data, dst_step, src_data, src_step, width, height); break;
  106.     case 3210: cvtRGBtoYUV422<2, 1, 0, 3>(dst_data, dst_step, src_data, src_step, width, height); break;
  107.     case 4000: cvtRGBtoYUV422<0, 0, 0, 4>(dst_data, dst_step, src_data, src_step, width, height); break;
  108.     case 4001: cvtRGBtoYUV422<0, 0, 1, 4>(dst_data, dst_step, src_data, src_step, width, height); break;
  109.     case 4010: cvtRGBtoYUV422<0, 1, 0, 4>(dst_data, dst_step, src_data, src_step, width, height); break;
  110.     case 4200: cvtRGBtoYUV422<2, 0, 0, 4>(dst_data, dst_step, src_data, src_step, width, height); break;
  111.     case 4201: cvtRGBtoYUV422<2, 0, 1, 4>(dst_data, dst_step, src_data, src_step, width, height); break;
  112.     case 4210: cvtRGBtoYUV422<2, 1, 0, 4>(dst_data, dst_step, src_data, src_step, width, height); break;
  113.     default: CV_Error(CV_StsBadFlag, "Unknown/unsupported color conversion code"); break;
  114.     };
  115. }
  116. // ============================================================================
  117. } // namespace hal
  118. // ============================================================================
  119. enum
  120. {
  121.     //! YUV 4:2:2 family to RGB
  122.     COLOR_RGB2YUV_UYVY = 127,
  123.     COLOR_BGR2YUV_UYVY = 128,
  124.     //COLOR_RGB2YUV_VYUY = 129,
  125.     //COLOR_BGR2YUV_VYUY = 130,
  126.     COLOR_RGB2YUV_Y422 = COLOR_RGB2YUV_UYVY,
  127.     COLOR_BGR2YUV_Y422 = COLOR_BGR2YUV_UYVY,
  128.     COLOR_RGB2YUV_UYNV = COLOR_RGB2YUV_UYVY,
  129.     COLOR_BGR2YUV_UYNV = COLOR_BGR2YUV_UYVY,
  130.  
  131.     COLOR_RGBA2YUV_UYVY = 131,
  132.     COLOR_BGRA2YUV_UYVY = 132,
  133.     //COLOR_RGBA2YUV_VYUY = 133,
  134.     //COLOR_BGRA2YUV_VYUY = 134,
  135.     COLOR_RGBA2YUV_Y422 = COLOR_RGBA2YUV_UYVY,
  136.     COLOR_BGRA2YUV_Y422 = COLOR_BGRA2YUV_UYVY,
  137.     COLOR_RGBA2YUV_UYNV = COLOR_RGBA2YUV_UYVY,
  138.     COLOR_BGRA2YUV_UYNV = COLOR_BGRA2YUV_UYVY,
  139.  
  140.     COLOR_RGB2YUV_YUY2 = 135,
  141.     COLOR_BGR2YUV_YUY2 = 136,
  142.     COLOR_RGB2YUV_YVYU = 137,
  143.     COLOR_BGR2YUV_YVYU = 138,
  144.     COLOR_RGB2YUV_YUYV = COLOR_RGB2YUV_YUY2,
  145.     COLOR_BGR2YUV_YUYV = COLOR_BGR2YUV_YUY2,
  146.     COLOR_RGB2YUV_YUNV = COLOR_RGB2YUV_YUY2,
  147.     COLOR_BGR2YUV_YUNV = COLOR_BGR2YUV_YUY2,
  148.  
  149.     COLOR_RGBA2YUV_YUY2 = 139,
  150.     COLOR_BGRA2YUV_YUY2 = 140,
  151.     COLOR_RGBA2YUV_YVYU = 141,
  152.     COLOR_BGRA2YUV_YVYU = 142,
  153.     COLOR_RGBA2YUV_YUYV = COLOR_RGBA2YUV_YUY2,
  154.     COLOR_BGRA2YUV_YUYV = COLOR_BGRA2YUV_YUY2,
  155.     COLOR_RGBA2YUV_YUNV = COLOR_RGBA2YUV_YUY2,
  156.     COLOR_BGRA2YUV_YUNV = COLOR_BGRA2YUV_YUY2
  157.  
  158. };
  159. // ============================================================================
  160. namespace {
  161. // ============================================================================
  162. inline bool swapBlue(int code)
  163. {
  164.     switch (code) {
  165.     case COLOR_BGR2YUV_UYVY:
  166.     case COLOR_BGRA2YUV_UYVY:
  167.     case COLOR_BGR2YUV_YUY2:
  168.     case COLOR_BGR2YUV_YVYU:
  169.     case COLOR_BGRA2YUV_YUY2:
  170.     case COLOR_BGRA2YUV_YVYU:
  171.         return false;
  172.     default:
  173.         return true;
  174.     }
  175. }
  176. // ============================================================================
  177. } // anonymous namespace
  178. // ============================================================================
  179. void my_cvtColor(cv::InputArray _src, cv::OutputArray _dst, int code, int dcn = 0)
  180. {
  181.     int stype = _src.type();
  182.     int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), uidx, ycn;
  183.  
  184.     cv::Mat src, dst;
  185.     if (_src.getObj() == _dst.getObj()) // inplace processing (#6653)
  186.         _src.copyTo(src);
  187.     else
  188.         src = _src.getMat();
  189.     cv::Size sz = src.size();
  190.     CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F);
  191.  
  192.     switch (code) {
  193.     case COLOR_RGB2YUV_UYVY: case COLOR_BGR2YUV_UYVY: case COLOR_RGBA2YUV_UYVY: case COLOR_BGRA2YUV_UYVY:
  194.     case COLOR_RGB2YUV_YUY2: case COLOR_BGR2YUV_YUY2: case COLOR_RGB2YUV_YVYU: case COLOR_BGR2YUV_YVYU:
  195.     case COLOR_RGBA2YUV_YUY2: case COLOR_BGRA2YUV_YUY2: case COLOR_RGBA2YUV_YVYU: case COLOR_BGRA2YUV_YVYU:
  196.         //http://www.fourcc.org/yuv.php#UYVY
  197.         //http://www.fourcc.org/yuv.php#YUY2
  198.         //http://www.fourcc.org/yuv.php#YVYU
  199.         if (dcn <= 0) dcn = 2;
  200.         ycn = (code == COLOR_RGB2YUV_UYVY || code == COLOR_BGR2YUV_UYVY || code == COLOR_RGBA2YUV_UYVY || code == COLOR_BGRA2YUV_UYVY) ? 1 : 0;
  201.         uidx = (code == COLOR_RGB2YUV_YVYU || code == COLOR_BGR2YUV_YVYU || code == COLOR_RGBA2YUV_YVYU || code == COLOR_BGRA2YUV_YVYU) ? 1 : 0;
  202.         CV_Assert(dcn == 2 && depth == CV_8U);
  203.         CV_Assert(scn == 3 || scn == 4);
  204.         CV_Assert(sz.width % 2 == 0);
  205.         _dst.create(sz, CV_8UC(dcn));
  206.         dst = _dst.getMat();
  207.         hal::cvtBGRtoOnePlaneYUV(src.data, src.step, dst.data, dst.step, src.cols, src.rows,
  208.             scn, swapBlue(code), uidx, ycn);
  209.         break;
  210.     default:
  211.         cv::cvtColor(_src, _dst, code, dcn);
  212.     };
  213. }
  214. // ============================================================================
  215. cv::Mat3b make_test_image()
  216. {
  217.     cv::Mat3b image(65536, 256);
  218.     cv::Mat3b::iterator it(image.begin());
  219.     for (uint32_t b(0); b < 256; ++b) {
  220.         for (uint32_t g(0); g < 256; ++g) {
  221.             for (uint32_t r(0); r < 256; ++r) {
  222.                 *(it++) = cv::Vec3b(b, g, r);
  223.             }
  224.         }
  225.     }
  226.     return image;
  227. }
  228. // ----------------------------------------------------------------------------
  229. void timeit()
  230. {
  231.     using std::chrono::high_resolution_clock;
  232.     using std::chrono::duration_cast;
  233.     using std::chrono::microseconds;
  234.  
  235.     cv::Mat3b image(make_test_image());
  236.     cv::Mat2b image_yuv(image.size());
  237.     cv::Mat3b image_bgr(image.size());
  238.  
  239.     int32_t const STEPS(256 / 8);
  240.  
  241.     double t1_ms(0), t1_ms_min(std::numeric_limits<float>::max());
  242.     double t2_ms(0), t2_ms_min(std::numeric_limits<float>::max());
  243.     for (uint32_t i(0); i < STEPS; ++i) {
  244.         high_resolution_clock::time_point t1 = high_resolution_clock::now();
  245.         my_cvtColor(image, image_yuv, COLOR_BGR2YUV_Y422);
  246.         high_resolution_clock::time_point t2 = high_resolution_clock::now();
  247.         cv::cvtColor(image_yuv, image_bgr, cv::COLOR_YUV2BGR_Y422);
  248.         high_resolution_clock::time_point t3 = high_resolution_clock::now();
  249.         auto duration1 = duration_cast<microseconds>(t2 - t1).count();
  250.         t1_ms += static_cast<double>(duration1) / 1000.0;
  251.         t1_ms_min = std::min(t1_ms_min, static_cast<double>(duration1) / 1000.0);
  252.         auto duration2 = duration_cast<microseconds>(t3 - t2).count();
  253.         t2_ms += static_cast<double>(duration2) / 1000.0;
  254.         t2_ms_min = std::min(t2_ms_min, static_cast<double>(duration2) / 1000.0);
  255.     }
  256.  
  257.     std::cout << "\nBGR->YUV\n"
  258.         << "Total = " << t1_ms << " ms\n"
  259.         << "Iteration = " << (t1_ms / STEPS) << " ms\n"
  260.         << "Min. Iteration = " << t1_ms_min << " ms\n"
  261.         << "FPS = " << (STEPS / t1_ms * 1000.0) << "\n";
  262.     std::cout << "\nYUV->BGR\n"
  263.         << "Total = " << t2_ms << " ms\n"
  264.         << "Iteration = " << (t2_ms / STEPS) << " ms\n"
  265.         << "Min. Iteration = " << t2_ms_min << " ms\n"
  266.         << "FPS = " << (STEPS / t2_ms * 1000.0) << "\n";
  267. }
  268. // ----------------------------------------------------------------------------
  269. void timeit_umat()
  270. {
  271.     using std::chrono::high_resolution_clock;
  272.     using std::chrono::duration_cast;
  273.     using std::chrono::microseconds;
  274.  
  275.     cv::Mat3b image(make_test_image());
  276.     cv::Mat2b image_yuv(image.size());
  277.     cv::Mat3b image_bgr(image.size());
  278.  
  279.     cv::UMat u_image(image.getUMat(cv::ACCESS_READ));
  280.     cv::UMat u_image_yuv(image.getUMat(cv::ACCESS_RW));
  281.     cv::UMat u_image_bgr(image.getUMat(cv::ACCESS_RW));
  282.  
  283.     int32_t const STEPS(256 / 8);
  284.  
  285.     double t1_ms(0), t1_ms_min(std::numeric_limits<float>::max());
  286.     double t2_ms(0), t2_ms_min(std::numeric_limits<float>::max());
  287.     for (uint32_t i(0); i < STEPS; ++i) {
  288.         high_resolution_clock::time_point t1 = high_resolution_clock::now();
  289.         my_cvtColor(u_image, u_image_yuv, COLOR_BGR2YUV_Y422);
  290.         high_resolution_clock::time_point t2 = high_resolution_clock::now();
  291.         cv::cvtColor(u_image_yuv, u_image_bgr, cv::COLOR_YUV2BGR_Y422);
  292.         high_resolution_clock::time_point t3 = high_resolution_clock::now();
  293.         auto duration1 = duration_cast<microseconds>(t2 - t1).count();
  294.         t1_ms += static_cast<double>(duration1) / 1000.0;
  295.         t1_ms_min = std::min(t1_ms_min, static_cast<double>(duration1) / 1000.0);
  296.         auto duration2 = duration_cast<microseconds>(t3 - t2).count();
  297.         t2_ms += static_cast<double>(duration2) / 1000.0;
  298.         t2_ms_min = std::min(t2_ms_min, static_cast<double>(duration2) / 1000.0);
  299.     }
  300.  
  301.     std::cout << "\nBGR->YUV\n"
  302.         << "Total = " << t1_ms << " ms\n"
  303.         << "Iteration = " << (t1_ms / STEPS) << " ms\n"
  304.         << "Min. Iteration = " << t1_ms_min << " ms\n"
  305.         << "FPS = " << (STEPS / t1_ms * 1000.0) << "\n";
  306.     std::cout << "\nYUV->BGR\n"
  307.         << "Total = " << t2_ms << " ms\n"
  308.         << "Iteration = " << (t2_ms / STEPS) << " ms\n"
  309.         << "Min. Iteration = " << t2_ms_min << " ms\n"
  310.         << "FPS = " << (STEPS / t2_ms * 1000.0) << "\n";
  311. }
  312. // ----------------------------------------------------------------------------
  313. int main()
  314. {
  315.     cv::Mat3b image(make_test_image());
  316.  
  317.     std::cout << "Image size = " << image.size() << "\n\n";
  318.  
  319.     cv::Mat2b image_yuv;
  320.     my_cvtColor(image, image_yuv, COLOR_BGR2YUV_Y422);
  321.  
  322.     cv::Mat3b image_bgr;
  323.     cv::cvtColor(image_yuv, image_bgr, cv::COLOR_YUV2BGR_Y422);
  324.  
  325.     cv::Mat delta_y422;
  326.     cv::absdiff(image, image_bgr, delta_y422);
  327.  
  328.     double minval, maxval;
  329.     cv::minMaxLoc(delta_y422, &minval, &maxval);
  330.     cv::Mat mean, std;
  331.     cv::meanStdDev(delta_y422, mean, std);
  332.     std::cout << minval << "\n" << maxval << "\n" << mean << "\n" << std << "\n";
  333.  
  334.     for (int i(1); i <= 12; ++i) {
  335.         std::cout << "\n\nNum Threads = " << i << "\n";
  336.         cv::setNumThreads(i);
  337.         timeit();
  338.         timeit_umat();
  339.     }
  340.  
  341.     return 0;
  342. }
  343. // ============================================================================
Add Comment
Please, Sign In to add comment