Use native support for fp16 where available

Use F16C or ARM FP16 if available at compile time. Configure check added because older clang compilers have F16C defines and flags but not all the intrinsics. Change-Id: I71f358b8fd003e70ab8fcf35097414591e485112 Reviewed-by: Thiago Macieira <[email protected]>
xiers · Feb 16, 2017 · 925a3c6 · 925a3c6
1 parent 5486882
commit 925a3c6
Show file tree

Hide file tree

Showing 6 changed files with 95 additions and 1 deletion.
diff --git a/config.tests/common/f16c/f16c.cpp b/config.tests/common/f16c/f16c.cpp
@@ -0,0 +1,54 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the config.tests of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <immintrin.h>
+
+int main(int, char**)
+{
+    float f = 1.f;
+    unsigned short s = _cvtss_sh(f, 0);
+    float g = _cvtsh_ss(s);
+    bool result = f == g;
+    (void)result;
+    __m128i a = _mm_setzero_si128();
+    __m256 b = _mm256_cvtph_ps(a);
+    __m128i c = _mm256_cvtps_ph(b, 0);
+    (void)c;
+    return 0;
+}
diff --git a/config.tests/common/f16c/f16c.pro b/config.tests/common/f16c/f16c.pro
@@ -0,0 +1,5 @@
+SOURCES = f16c.cpp
+CONFIG -= qt dylib release debug_and_release
+CONFIG += debug console
+!defined(QMAKE_CFLAGS_F16C, "var"):error("This compiler does not support F16C")
+else:QMAKE_CXXFLAGS += $$QMAKE_CFLAGS_F16C
diff --git a/configure.json b/configure.json
@@ -74,6 +74,7 @@
             "developer-build": "void",
             "device": "string",
             "device-option": "addString",
+            "f16c": "boolean",
             "force-asserts": { "type": "boolean", "name": "force_asserts" },
             "force-debug-info": { "type": "boolean", "name": "force_debug_info" },
             "force-pkg-config": { "type": "void", "name": "pkg-config" },
@@ -316,6 +317,11 @@
             "type": "compile",
             "test": "common/sse4_2"
         },
+        "f16c": {
+            "label": "F16C instructions",
+            "type": "compile",
+            "test": "common/f16c"
+        },
         "avx": {
             "label": "AVX instructions",
             "type": "compile",
@@ -777,6 +783,14 @@
                 { "type": "define", "name": "QT_COMPILER_SUPPORTS_AVX", "value": 1 }
             ]
         },
+        "f16c": {
+            "label": "F16C",
+            "condition": "features.avx && tests.f16c",
+            "output": [
+                "privateConfig",
+                { "type": "define", "name": "QT_COMPILER_SUPPORTS_F16C", "value": 1 }
+            ]
+        },
         "avx2": {
             "label": "AVX2",
             "condition": "features.avx && tests.avx2",
@@ -1120,7 +1134,7 @@ Configure with '-qreal float' to create a build that is binary-compatible with 5
                         {
                             "message": "AVX",
                             "type": "featureList",
-                            "args": "avx avx2",
+                            "args": "avx avx2 f16c",
                             "condition": "(arch.i386 || arch.x86_64)"
                         },
                         {

diff --git a/mkspecs/common/gcc-base.conf b/mkspecs/common/gcc-base.conf
@@ -85,6 +85,7 @@ QMAKE_CFLAGS_SSE3      += -msse3
 QMAKE_CFLAGS_SSSE3     += -mssse3
 QMAKE_CFLAGS_SSE4_1    += -msse4.1
 QMAKE_CFLAGS_SSE4_2    += -msse4.2
+QMAKE_CFLAGS_F16C      += -mf16c
 QMAKE_CFLAGS_AVX       += -mavx
 QMAKE_CFLAGS_AVX2      += -mavx2
 QMAKE_CFLAGS_AVX512F   += -mavx512f

diff --git a/mkspecs/features/simd.prf b/mkspecs/features/simd.prf
@@ -104,6 +104,7 @@ addSimdCompiler(avx512bw)
 addSimdCompiler(avx512vl)
 addSimdCompiler(avx512ifma)
 addSimdCompiler(avx512vbmi)
+addSimdCompiler(f16c)
 addSimdCompiler(neon)
 addSimdCompiler(mips_dsp)
 addSimdCompiler(mips_dspr2)

diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h
@@ -44,6 +44,10 @@
 #include <QtCore/qmetatype.h>
 #include <string.h>
 
+#if defined __F16C__
+#include <immintrin.h>
+#endif
+
 QT_BEGIN_NAMESPACE
 
 #if 0
@@ -111,19 +115,34 @@ inline int qIntCast(qfloat16 f) Q_DECL_NOTHROW
 
 inline qfloat16::qfloat16(float f) Q_DECL_NOTHROW
 {
+#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
+    b16 = _cvtss_sh(f, 0);
+#elif defined (__ARM_FP16_FORMAT_IEEE)
+    __fp16 f16 = f;
+    memcpy(&b16, &f16, sizeof(quint16));
+#else
     quint32 u;
     memcpy(&u, &f, sizeof(quint32));
     b16 = basetable[(u >> 23) & 0x1ff]
           + ((u & 0x007fffff) >> shifttable[(u >> 23) & 0x1ff]);
+#endif
 }
 
 inline qfloat16::operator float() const Q_DECL_NOTHROW
 {
+#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
+    return _cvtsh_ss(b16);
+#elif defined (__ARM_FP16_FORMAT_IEEE)
+    __fp16 f16;
+    memcpy(&f16, &b16, sizeof(quint16));
+    return f16;
+#else
     quint32 u = mantissatable[offsettable[b16 >> 10] + (b16 & 0x3ff)]
                 + exponenttable[b16 >> 10];
     float f;
     memcpy(&f, &u, sizeof(quint32));
     return f;
+#endif
 }
 
 inline qfloat16::operator double() const Q_DECL_NOTHROW