[meta-oe][PATCH] opencv: disable broken Intel FP16 detection

* [meta-oe][PATCH] opencv: disable broken Intel FP16 detection
@ 2017-04-27  1:33 Randy MacLeod
  2017-04-27  1:45 ` Randy MacLeod
  2017-05-02 14:57 ` Randy MacLeod
  0 siblings, 2 replies; 4+ messages in thread
From: Randy MacLeod @ 2017-04-27  1:33 UTC (permalink / raw)
  To: openembedded-devel

With opencv-3.2, the configuration detects if the target supports
half-precision floating-point format. This fails to compile for some
Intel targets such as skylake with an error such as:
   error: '_mm_cvtph_ps' was not declared in this scope

The configuration worked in opencv-3.1 so revert two commits to
drop the FP16 detection even though it may make opencv slower.
The only change in the configure log is:
--- FP16: Compiler support is available
+-- FP16: Compiler support is not available

The patch should be dropped when a newer version of opencv that includes commit:
   https://github.com/opencv/opencv/commit/e5d9b608c47d54e43496041595025fa282fa9de5
is available. Backporting that fix was involved many files and
a cherry-picking explosion so it's not worthwhile.

Signed-off-by: Randy MacLeod <Randy.MacLeod@windriver.com>
---
 .../0001-Revert-cuda-fix-fp16-compilation.patch    |  27 +++
 ...vert-check-FP16-build-condition-correctly.patch | 245 +++++++++++++++++++++
 meta-oe/recipes-support/opencv/opencv_3.2.bb       |   2 +
 3 files changed, 274 insertions(+)
 create mode 100644 meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
 create mode 100644 meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch

diff --git a/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
new file mode 100644
index 0000000..507d796
--- /dev/null
+++ b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
@@ -0,0 +1,27 @@
+From 69f9707678190f6a0948a547dce948251f972676 Mon Sep 17 00:00:00 2001
+From: Randy MacLeod <Randy.MacLeod@windriver.com>
+Date: Wed, 26 Apr 2017 14:57:30 -0400
+Subject: [PATCH 1/2] Revert "cuda: fix fp16 compilation"
+
+This reverts commit 12e00827be40576b686ea4438a6e6ef85208743d.
+---
+ modules/core/include/opencv2/core/cvdef.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
+index 699b166..efc24ca 100644
+--- a/modules/core/include/opencv2/core/cvdef.h
++++ b/modules/core/include/opencv2/core/cvdef.h
+@@ -303,8 +303,7 @@ enum CpuFeatures {
+ #define CV_2PI 6.283185307179586476925286766559
+ #define CV_LOG2 0.69314718055994530941723212145818
+ 
+-#if defined __ARM_FP16_FORMAT_IEEE \
+-    && !defined __CUDACC__
++#if defined (__ARM_FP16_FORMAT_IEEE)
+ #  define CV_FP16_TYPE 1
+ #else
+ #  define CV_FP16_TYPE 0
+-- 
+2.9.3
+
diff --git a/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch
new file mode 100644
index 0000000..d1950a9
--- /dev/null
+++ b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch
@@ -0,0 +1,245 @@
+From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001
+From: Randy MacLeod <Randy.MacLeod@windriver.com>
+Date: Wed, 26 Apr 2017 15:00:32 -0400
+Subject: [PATCH 2/2] Revert "check FP16 build condition correctly"
+
+This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13.
+
+Fix up revert conflicts to take previous behaviour.
+---
+ cmake/OpenCVCompilerOptions.cmake         | 45 +++++++++--------------
+ modules/core/include/opencv2/core/cvdef.h |  2 +-
+ modules/core/src/convert.cpp              | 11 +++---
+ modules/core/test/test_intrin.cpp         | 60 ++++++++++++++-----------------
+ 4 files changed, 48 insertions(+), 70 deletions(-)
+
+diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
+index 5bb0479..4b19fdb 100644
+--- a/cmake/OpenCVCompilerOptions.cmake
++++ b/cmake/OpenCVCompilerOptions.cmake
+@@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
+     add_extra_compiler_option("-mfp16-format=ieee")
+   endif(ARM)
+   if(ENABLE_NEON)
+-    add_extra_compiler_option("-mfpu=neon")
++    add_extra_compiler_option("-mfpu=neon-fp16")
+   endif()
+   if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
+     add_extra_compiler_option("-mfpu=vfpv3")
+@@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
+   add_extra_compiler_option(-fvisibility-inlines-hidden)
+ endif()
+ 
+-if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
+-  if(ARM AND ENABLE_NEON)
+-    set(FP16_OPTION "-mfpu=neon-fp16")
+-  elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
+-    set(FP16_OPTION "-mf16c")
+-  endif()
+-  try_compile(__VALID_FP16
+-    "${OpenCV_BINARY_DIR}"
+-    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
+-    COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
+-    OUTPUT_VARIABLE TRY_OUT
+-    )
+-  if(NOT __VALID_FP16)
+-    if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
+-      # GCC enables AVX when mf16c is passed
+-      message(STATUS "FP16: Feature disabled")
+-    else()
+-      message(STATUS "FP16: Compiler support is not available")
+-    endif()
+-  else()
+-    message(STATUS "FP16: Compiler support is available")
+-    set(HAVE_FP16 1)
+-    if(NOT ${FP16_OPTION} STREQUAL "")
+-      add_extra_compiler_option(${FP16_OPTION})
+-    endif()
+-  endif()
+-endif()
+-
+ #combine all "extra" options
+ set(CMAKE_C_FLAGS           "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
+ set(CMAKE_CXX_FLAGS         "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
+@@ -450,6 +422,21 @@ if(MSVC)
+   endif()
+ endif()
+ 
++if(NOT OPENCV_FP16_DISABLE)
++  try_compile(__VALID_FP16
++    "${OpenCV_BINARY_DIR}"
++    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
++    COMPILE_DEFINITIONS "-DCHECK_FP16"
++    OUTPUT_VARIABLE TRY_OUT
++    )
++  if(NOT __VALID_FP16)
++    message(STATUS "FP16: Compiler support is not available")
++  else()
++    message(STATUS "FP16: Compiler support is available")
++    set(HAVE_FP16 1)
++  endif()
++endif()
++
+ if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
+   link_directories("/usr/local/lib")
+ endif()
+diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
+index efc24ca..a10936b 100644
+--- a/modules/core/include/opencv2/core/cvdef.h
++++ b/modules/core/include/opencv2/core/cvdef.h
+@@ -312,7 +312,7 @@ enum CpuFeatures {
+ typedef union Cv16suf
+ {
+     short i;
+-#if CV_FP16_TYPE
++#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
+     __fp16 h;
+ #endif
+     struct _fp16Format
+diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
+index e04d89e..46db26f 100644
+--- a/modules/core/src/convert.cpp
++++ b/modules/core/src/convert.cpp
+@@ -44,7 +44,6 @@
+ #include "precomp.hpp"
+ 
+ #include "opencl_kernels_core.hpp"
+-#include "opencv2/core/hal/intrin.hpp"
+ 
+ #include "opencv2/core/openvx/ovx_defs.hpp"
+ 
+@@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int>
+ 
+ #endif
+ 
+-#if !CV_FP16_TYPE
++#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
+ // const numbers for floating points format
+ const unsigned int kShiftSignificand    = 13;
+ const unsigned int kMaskFp16Significand = 0x3ff;
+@@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent    = 15;
+ const unsigned int kBiasFp32Exponent    = 127;
+ #endif
+ 
+-#if CV_FP16_TYPE
++#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
+ static float convertFp16SW(short fp16)
+ {
+     // Fp16 -> Fp32
+@@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16)
+ }
+ #endif
+ 
+-#if CV_FP16_TYPE
++#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
+ static short convertFp16SW(float fp32)
+ {
+     // Fp32 -> Fp16
+@@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
+             if ( ( (intptr_t)dst & 0xf ) == 0 )
+ #endif
+             {
+-#if CV_FP16 && CV_SIMD128
++#if CV_FP16
+                 for ( ; x <= size.width - 4; x += 4)
+                 {
+                     v_float32x4 v_src = v_load(src + x);
+@@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
+             if ( ( (intptr_t)src & 0xf ) == 0 )
+ #endif
+             {
+-#if CV_FP16 && CV_SIMD128
++#if CV_FP16
+                 for ( ; x <= size.width - 4; x += 4)
+                 {
+                     v_float16x4 v_src = v_load_f16(src + x);
+diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp
+index 66b2083..7349d48 100644
+--- a/modules/core/test/test_intrin.cpp
++++ b/modules/core/test/test_intrin.cpp
+@@ -729,56 +729,48 @@ template<typename R> struct TheTest
+         return *this;
+     }
+ 
++#if CV_FP16
+     TheTest & test_loadstore_fp16()
+     {
+-#if CV_FP16
+         AlignedData<R> data;
+         AlignedData<R> out;
+ 
+-        if(checkHardwareSupport(CV_CPU_FP16))
+-        {
+-            // check if addresses are aligned and unaligned respectively
+-            EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
+-            EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
+-            EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
+-            EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
+-
+-            // check some initialization methods
+-            R r1 = data.u;
+-            R r2 = v_load_f16(data.a.d);
+-            R r3(r2);
+-            EXPECT_EQ(data.u[0], r1.get0());
+-            EXPECT_EQ(data.a[0], r2.get0());
+-            EXPECT_EQ(data.a[0], r3.get0());
+-
+-            // check some store methods
+-            out.a.clear();
+-            v_store_f16(out.a.d, r1);
+-            EXPECT_EQ(data.a, out.a);
+-        }
++        // check if addresses are aligned and unaligned respectively
++        EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
++        EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
++        EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
++        EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
++
++        // check some initialization methods
++        R r1 = data.u;
++        R r2 = v_load_f16(data.a.d);
++        R r3(r2);
++        EXPECT_EQ(data.u[0], r1.get0());
++        EXPECT_EQ(data.a[0], r2.get0());
++        EXPECT_EQ(data.a[0], r3.get0());
++
++        // check some store methods
++        out.a.clear();
++        v_store_f16(out.a.d, r1);
++        EXPECT_EQ(data.a, out.a);
+ 
+         return *this;
+-#endif
+     }
+ 
+     TheTest & test_float_cvt_fp16()
+     {
+-#if CV_FP16
+         AlignedData<v_float32x4> data;
+ 
+-        if(checkHardwareSupport(CV_CPU_FP16))
+-        {
+-            // check conversion
+-            v_float32x4 r1 = v_load(data.a.d);
+-            v_float16x4 r2 = v_cvt_f16(r1);
+-            v_float32x4 r3 = v_cvt_f32(r2);
+-            EXPECT_EQ(0x3c00, r2.get0());
+-            EXPECT_EQ(r3.get0(), r1.get0());
+-        }
++        // check conversion
++        v_float32x4 r1 = v_load(data.a.d);
++        v_float16x4 r2 = v_cvt_f16(r1);
++        v_float32x4 r3 = v_cvt_f32(r2);
++        EXPECT_EQ(0x3c00, r2.get0());
++        EXPECT_EQ(r3.get0(), r1.get0());
+ 
+         return *this;
+-#endif
+     }
++#endif
+ 
+ };
+ 
+-- 
+2.9.3
+
diff --git a/meta-oe/recipes-support/opencv/opencv_3.2.bb b/meta-oe/recipes-support/opencv/opencv_3.2.bb
index 2cff212..98b6b06 100644
--- a/meta-oe/recipes-support/opencv/opencv_3.2.bb
+++ b/meta-oe/recipes-support/opencv/opencv_3.2.bb
@@ -27,6 +27,8 @@ SRC_URI = "git://github.com/opencv/opencv.git;name=opencv \
     file://fixpkgconfig.patch \
     file://uselocalxfeatures.patch;patchdir=../contrib/ \
     file://useoeprotobuf.patch;patchdir=../contrib/ \
+    file://0001-Revert-cuda-fix-fp16-compilation.patch \
+    file://0002-Revert-check-FP16-build-condition-correctly.patch \
 "
 
 PV = "3.2+git${SRCPV}"
-- 
2.9.3



^ permalink raw reply related	[flat|nested] 4+ messages in thread