128-bit quad floating point (unitaryfund#965)

* Mostly compiles, but does not link * Fix linking * Fix FP_NORM_EPSILON * Tests compile/link * Compiles and links * Avoid fp truncation * Avoid truncation and fix fp16 * Always use <limits>
mphschmitt · Mar 26, 2022 · 9392eee · 9392eee
1 parent 1452f53
commit 9392eee
Show file tree

Hide file tree

Showing 43 changed files with 454 additions and 419 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -59,6 +59,10 @@ if (ENABLE_PTHREAD)
 	endif (NOT ANDROID AND NOT MSVC)
 endif (ENABLE_PTHREAD)
 
+if (FPPOW GREATER 6)
+    set(QRACK_LIBS ${QRACK_LIBS} quadmath)
+endif (FPPOW GREATER 6)
+
 target_link_libraries (qrack_pinvoke ${QRACK_LIBS})
 if (NOT ENABLE_EMIT_LLVM)
     # Declare the unittest executable

diff --git a/cmake/FpPow.cmake b/cmake/FpPow.cmake
@@ -4,9 +4,9 @@ if (FPPOW LESS 4)
     message(FATAL_ERROR "FPPOW must be at least 4, equivalent to \"half\" precision!")
 endif (FPPOW LESS 4)
 
-if (FPPOW GREATER 6)
-    message(FATAL_ERROR "FPPOW must be no greater than 6, equivalent to \"double\" precision!")
-endif (FPPOW GREATER 6)
+if (FPPOW GREATER 7)
+    message(FATAL_ERROR "FPPOW must be no greater than 7, equivalent to 128-bit precision!")
+endif (FPPOW GREATER 7)
 
 if (FPPOW LESS 5)
     set(ENABLE_COMPLEX_X2 OFF)

diff --git a/examples/cosmology.cpp b/examples/cosmology.cpp
@@ -17,9 +17,9 @@ using namespace Qrack;
 
 void StatePrep(QInterfacePtr qReg)
 {
-    real1_f theta = 2 * PI_R1 * qReg->Rand();
-    real1_f phi = 2 * PI_R1 * qReg->Rand();
-    real1_f lambda = 2 * PI_R1 * qReg->Rand();
+    real1_f theta = (real1_f)(2 * PI_R1 * qReg->Rand());
+    real1_f phi = (real1_f)(2 * PI_R1 * qReg->Rand());
+    real1_f lambda = (real1_f)(2 * PI_R1 * qReg->Rand());
     qReg->U(0, theta, phi, lambda);
 }
 

diff --git a/examples/qneuron_classification.cpp b/examples/qneuron_classification.cpp
@@ -157,11 +157,11 @@ void train(std::vector<std::vector<BoolH>>& rawYX, std::vector<real1>& etas, QIn
 
         if (permH.size() == 0) {
             for (i = 0; i < outputLayer.size(); i++) {
-                outputLayer[i]->LearnPermutation(row[0], etas[i] / rowCount);
+                outputLayer[i]->LearnPermutation(row[0], (real1_f)(etas[i] / rowCount));
             }
         } else {
             for (i = 0; i < outputLayer.size(); i++) {
-                outputLayer[i]->Learn(row[0], etas[i] / (rowCount * pow2Ocl(permH.size())));
+                outputLayer[i]->Learn(row[0], (real1_f)(etas[i] / (rowCount * pow2Ocl(permH.size()))));
             }
         }
     }
@@ -243,16 +243,16 @@ real1_f calculateAuc(std::vector<std::vector<BoolH>>& rawYX, std::vector<dfObser
     oFp = fp;
     totT = tp;
     totF = fp;
-    err = ((real1)fp) / rowCount;
+    err = ((real1_f)fp) / rowCount;
     optimumErr = err;
 
     std::set<real1>::iterator it = dfVals.begin();
     while (it != dfVals.end()) {
-        cutoff = *it;
+        cutoff = (real1_f)*it;
         it++;
 
-        lTp = (real1)tp / totT;
-        lFp = (real1)fp / totF;
+        lTp = (real1_f)tp / totT;
+        lFp = (real1_f)fp / totF;
 
         tp = 0;
         fp = 0;
@@ -275,18 +275,18 @@ real1_f calculateAuc(std::vector<std::vector<BoolH>>& rawYX, std::vector<dfObser
             }
         }
 
-        dTp = lTp - ((real1)tp / totT);
-        dFp = lFp - ((real1)fp / totF);
-        auc += dFp * (((real1)tp / totT) + (dTp / 2));
+        dTp = lTp - ((real1_f)tp / totT);
+        dFp = lFp - ((real1_f)fp / totF);
+        auc += dFp * (((real1_f)tp / totT) + (dTp / 2));
 
-        err = ((real1)((fp * fp) + (fn * fn))) / (rowCount * rowCount);
+        err = ((real1_f)((fp * fp) + (fn * fn))) / (rowCount * rowCount);
         if (err < optimumErr) {
             optimumErr = err;
 
             if (it == dfVals.end()) {
                 optimumCutoff = 1;
             } else {
-                optimumCutoff = cutoff + (((*it) - cutoff) / 2);
+                optimumCutoff = cutoff + ((((real1_f)*it) - cutoff) / 2);
             }
 
             oTp = tp;

diff --git a/examples/quantum_associative_memory.cpp b/examples/quantum_associative_memory.cpp
@@ -52,7 +52,7 @@ int main()
         for (bitLenInt i = 0; i < OutputCount; i++) {
             qReg->SetPermutation(perm);
             bit = (comp & pow2(i)) != 0;
-            outputLayer[i]->LearnPermutation(bit, eta);
+            outputLayer[i]->LearnPermutation(bit, (real1_f)eta);
         }
     }
 

diff --git a/examples/quantum_perceptron.cpp b/examples/quantum_perceptron.cpp
@@ -44,7 +44,7 @@ int main()
         std::cout << "Epoch " << (perm + 1U) << " out of " << ControlPower << std::endl;
         qReg->SetPermutation(perm);
         isPowerOf2 = ((perm != 0) && ((perm & (perm - 1U)) == 0));
-        qPerceptron->LearnPermutation(isPowerOf2, eta);
+        qPerceptron->LearnPermutation(isPowerOf2, (real1_f)eta);
     }
 
     std::cout << "Should be close to 1 for powers of two, and close to 0 for all else..." << std::endl;

diff --git a/examples/teleport.cpp b/examples/teleport.cpp
@@ -23,9 +23,9 @@ void StatePrep(QInterfacePtr qReg)
     // "Alice" has a qubit to teleport.
 
     // (To test consistency, comment out this U() gate.)
-    real1_f theta = 2 * PI_R1 * qReg->Rand();
-    real1_f phi = 2 * PI_R1 * qReg->Rand();
-    real1_f lambda = 2 * PI_R1 * qReg->Rand();
+    real1_f theta = (real1_f)(2 * PI_R1 * qReg->Rand());
+    real1_f phi = (real1_f)(2 * PI_R1 * qReg->Rand());
+    real1_f lambda = (real1_f)(2 * PI_R1 * qReg->Rand());
     qReg->U(0, theta, phi, lambda);
     std::cout << "Alice is sending: U(theta=" << theta << ", phi=" << phi << ", lambda=" << lambda << ")" << std::endl;
 

diff --git a/include/common/parallel_for.hpp b/include/common/parallel_for.hpp
@@ -76,7 +76,7 @@ class ParallelFor {
     void par_for_qbdt(const bitCapInt begin, const bitCapInt end, BdtFunc fn);
 
     /** Calculate the normal for the array, (with flooring). */
-    real1_f par_norm(const bitCapIntOcl maxQPower, const StateVectorPtr stateArray, real1_f norm_thresh = ZERO_R1);
+    real1_f par_norm(const bitCapIntOcl maxQPower, const StateVectorPtr stateArray, real1_f norm_thresh = ZERO_R1_F);
 
     /** Calculate the normal for the array, (without flooring.) */
     real1_f par_norm_exact(const bitCapIntOcl maxQPower, const StateVectorPtr stateArray);

diff --git a/include/common/qrack_types.hpp b/include/common/qrack_types.hpp
@@ -19,6 +19,7 @@
 #include <cmath>
 #include <complex>
 #include <functional>
+#include <limits>
 #include <memory>
 #include <random>
 
@@ -83,29 +84,29 @@ typedef std::complex<__fp16> complex;
 typedef __fp16 real1;
 typedef float real1_f;
 #define ZERO_R1 0.0f
+#define ZERO_R1_F 0.0f
 #define ONE_R1 1.0f
+#define ONE_R1_F 1.0f
 #define PI_R1 ((real1_f)M_PI)
 #define SQRT2_R1 ((real1_f)M_SQRT2)
 #define SQRT1_2_R1 ((real1_f)M_SQRT2)
 #define REAL1_DEFAULT_ARG -999.0f
 // Half of the amplitude of 16 maximally superposed qubits in any permutation
 #define REAL1_EPSILON 2e-17f
-// Minimum representable difference from 1
-#define FP_NORM_EPSILON 0.0009765625f
 #else
 typedef std::complex<half_float::half> complex;
 typedef half_float::half real1;
 typedef float real1_f;
 #define ZERO_R1 ((real1)0.0f)
+#define ZERO_R1_F 0.0f
 #define ONE_R1 ((real1)1.0f)
+#define ONE_R1_F 1.0f
 #define PI_R1 ((real1)M_PI)
 #define SQRT2_R1 ((real1)M_SQRT2)
 #define SQRT1_2_R1 ((real1)M_SQRT1_2)
 #define REAL1_DEFAULT_ARG ((real1)-999.0f)
 // Half of the amplitude of 16 maximally superposed qubits in any permutation
 #define REAL1_EPSILON ((real1)2e-17f)
-// Minimum representable difference from 1
-#define FP_NORM_EPSILON ((real1)0.0009765625f)
 #endif
 } // namespace Qrack
 #elif FPPOW < 6
@@ -114,39 +115,60 @@ typedef std::complex<float> complex;
 typedef float real1;
 typedef float real1_f;
 #define ZERO_R1 0.0f
+#define ZERO_R1_F 0.0f
 #define ONE_R1 1.0f
+#define ONE_R1_F 1.0f
 #define PI_R1 ((real1_f)M_PI)
 #define SQRT2_R1 ((real1_f)M_SQRT2)
 #define SQRT1_2_R1 ((real1_f)M_SQRT1_2)
 #define REAL1_DEFAULT_ARG -999.0f
 // Half of the amplitude of 32 maximally superposed qubits in any permutation
 #define REAL1_EPSILON 2e-33f
-// Minimum representable difference from 1
-#define FP_NORM_EPSILON 1.192092896e-07f
 } // namespace Qrack
-#else
+#elif FPPOW < 7
 namespace Qrack {
 typedef std::complex<double> complex;
 typedef double real1;
 typedef double real1_f;
 #define ZERO_R1 0.0
+#define ZERO_R1_F 0.0
 #define ONE_R1 1.0
+#define ONE_R1_F 1.0
 #define PI_R1 M_PI
 #define SQRT2_R1 M_SQRT2
 #define SQRT1_2_R1 M_SQRT1_2
 #define REAL1_DEFAULT_ARG -999.0
 // Half of the amplitude of 64 maximally superposed qubits in any permutation
 #define REAL1_EPSILON 2e-65
+} // namespace Qrack
+#else
+#include <boost/multiprecision/float128.hpp>
+#include <quadmath.h>
+namespace Qrack {
+typedef std::complex<boost::multiprecision::float128> complex;
+typedef boost::multiprecision::float128 real1;
+typedef double real1_f;
+#define ZERO_R1 ((real1)0.0)
+#define ZERO_R1_F 0.0
+#define ONE_R1 ((real1)1.0)
+#define ONE_R1_F 1.0
+#define PI_R1 ((real1)M_PI)
+#define SQRT2_R1 ((real1)M_SQRT2)
+#define SQRT1_2_R1 ((real1)M_SQRT1_2)
+#define REAL1_DEFAULT_ARG -999.0
+// Half of the amplitude of 64 maximally superposed qubits in any permutation
+#define REAL1_EPSILON 2e-129
 // Minimum representable difference from 1
-#define FP_NORM_EPSILON 2.2204460492503131e-16
 } // namespace Qrack
 #endif
 
 #define ONE_CMPLX complex(ONE_R1, ZERO_R1)
 #define ZERO_CMPLX complex(ZERO_R1, ZERO_R1)
 #define I_CMPLX complex(ZERO_R1, ONE_R1)
 #define CMPLX_DEFAULT_ARG complex(REAL1_DEFAULT_ARG, REAL1_DEFAULT_ARG)
-#define TRYDECOMPOSE_EPSILON (8 * FP_NORM_EPSILON)
+#define FP_NORM_EPSILON std::numeric_limits<real1>::epsilon()
+#define FP_NORM_EPSILON_F ((real1_f)FP_NORM_EPSILON)
+#define TRYDECOMPOSE_EPSILON ((real1_f)(8 * FP_NORM_EPSILON))
 
 namespace Qrack {
 typedef std::shared_ptr<complex> BitOp;

diff --git a/include/qbdt.hpp b/include/qbdt.hpp
@@ -130,13 +130,13 @@ class QBdt : public QParity, public QInterface {
         qrack_rand_gen_ptr rgp = nullptr, complex phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false,
         bool randomGlobalPhase = true, bool useHostMem = false, int deviceId = -1, bool useHardwareRNG = true,
         bool useSparseStateVec = false, real1_f norm_thresh = REAL1_EPSILON, std::vector<int> ignored = {},
-        bitLenInt qubitThreshold = 0, real1_f separation_thresh = FP_NORM_EPSILON);
+        bitLenInt qubitThreshold = 0, real1_f separation_thresh = FP_NORM_EPSILON_F);
 
     QBdt(bitLenInt qBitCount, bitCapInt initState = 0, qrack_rand_gen_ptr rgp = nullptr,
         complex phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
         bool useHostMem = false, int deviceId = -1, bool useHardwareRNG = true, bool useSparseStateVec = false,
         real1_f norm_thresh = REAL1_EPSILON, std::vector<int> ignored = {}, bitLenInt qubitThreshold = 0,
-        real1_f separation_thresh = FP_NORM_EPSILON)
+        real1_f separation_thresh = FP_NORM_EPSILON_F)
         : QBdt({ QINTERFACE_OPTIMAL_SCHROEDINGER }, qBitCount, initState, rgp, phaseFac, doNorm, randomGlobalPhase,
               useHostMem, deviceId, useHardwareRNG, useSparseStateVec, norm_thresh, ignored, qubitThreshold,
               separation_thresh)
@@ -151,7 +151,7 @@ class QBdt : public QParity, public QInterface {
     }
 
     virtual void NormalizeState(
-        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1)
+        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F)
     {
         root->Normalize(bdtQubitCount);
     }

diff --git a/include/qengine.hpp b/include/qengine.hpp
@@ -80,7 +80,7 @@ class QEngine : public QParity, public QInterface {
     virtual real1_f GetRunningNorm()
     {
         Finish();
-        return runningNorm;
+        return (real1_f)runningNorm;
     }
 
     /** Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM */
@@ -110,7 +110,7 @@ class QEngine : public QParity, public QInterface {
      * for the next normalization operation. */
     virtual void QueueSetRunningNorm(real1_f runningNrm) = 0;
 
-    virtual void ZMask(bitCapInt mask) { PhaseParity(PI_R1, mask); }
+    virtual void ZMask(bitCapInt mask) { PhaseParity((real1_f)PI_R1, mask); }
 
     virtual bool ForceM(bitLenInt qubitIndex, bool result, bool doForce = true, bool doApply = true);
     virtual bitCapInt ForceM(const bitLenInt* bits, bitLenInt length, const bool* values, bool doApply = true);

diff --git a/include/qengine_cpu.hpp b/include/qengine_cpu.hpp
@@ -48,7 +48,7 @@ class QEngineCPU : virtual public QEngine {
         complex phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true, bool ignored = false,
         int ignored2 = -1, bool useHardwareRNG = true, bool useSparseStateVec = false,
         real1_f norm_thresh = REAL1_EPSILON, std::vector<int> ignored3 = {}, bitLenInt ignored4 = 0,
-        real1_f ignored5 = FP_NORM_EPSILON);
+        real1_f ignored5 = FP_NORM_EPSILON_F);
 
     virtual ~QEngineCPU() { Dump(); }
 
@@ -78,7 +78,7 @@ class QEngineCPU : virtual public QEngine {
     virtual real1_f FirstNonzeroPhase()
     {
         if (!stateVec) {
-            return ZERO_R1;
+            return ZERO_R1_F;
         }
 
         return QInterface::FirstNonzeroPhase();
@@ -312,7 +312,7 @@ class QEngineCPU : virtual public QEngine {
     virtual real1_f ProbParity(bitCapInt mask);
     virtual bool ForceMParity(bitCapInt mask, bool result, bool doForce = true);
     virtual void NormalizeState(
-        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1);
+        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F);
     virtual real1_f SumSqrDiff(QInterfacePtr toCompare)
     {
         return SumSqrDiff(std::dynamic_pointer_cast<QEngineCPU>(toCompare));

diff --git a/include/qengine_opencl.hpp b/include/qengine_opencl.hpp
@@ -206,7 +206,7 @@ class QEngineOCL : virtual public QEngine {
         complex phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
         bool useHostMem = false, int devID = -1, bool useHardwareRNG = true, bool ignored = false,
         real1_f norm_thresh = REAL1_EPSILON, std::vector<int> ignored2 = {}, bitLenInt ignored4 = 0,
-        real1_f ignored3 = FP_NORM_EPSILON);
+        real1_f ignored3 = FP_NORM_EPSILON_F);
 
     virtual ~QEngineOCL() { FreeAll(); }
 
@@ -282,7 +282,7 @@ class QEngineOCL : virtual public QEngine {
     virtual real1_f FirstNonzeroPhase()
     {
         if (!stateBuffer) {
-            return ZERO_R1;
+            return ZERO_R1_F;
         }
 
         return QInterface::FirstNonzeroPhase();
@@ -439,7 +439,7 @@ class QEngineOCL : virtual public QEngine {
     virtual real1_f SumSqrDiff(QEngineOCLPtr toCompare);
 
     virtual void NormalizeState(
-        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1);
+        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F);
     ;
     virtual void UpdateRunningNorm(real1_f norm_thresh = REAL1_DEFAULT_ARG);
     virtual void Finish() { clFinish(); };
@@ -561,7 +561,7 @@ class QEngineOCL : virtual public QEngine {
         const bitCapIntOcl* qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special,
         real1_f norm_thresh = REAL1_DEFAULT_ARG);
 
-    virtual void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase = PI_R1);
+    virtual void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase = (real1_f)PI_R1);
 
     virtual void ApplyM(bitCapInt mask, bool result, complex nrm);
     virtual void ApplyM(bitCapInt mask, bitCapInt result, complex nrm);

diff --git a/include/qengineshard.hpp b/include/qengineshard.hpp
@@ -667,7 +667,7 @@ class QEngineShard {
     real1_f Prob()
     {
         if (!isProbDirty || !unit) {
-            return norm(amp1);
+            return (real1_f)norm(amp1);
         }
 
         return unit->Prob(mapped);

diff --git a/include/qhybrid.hpp b/include/qhybrid.hpp
@@ -42,7 +42,7 @@ class QHybrid : public QEngine {
         complex phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
         bool useHostMem = false, int deviceId = -1, bool useHardwareRNG = true, bool useSparseStateVec = false,
         real1_f norm_thresh = REAL1_EPSILON, std::vector<int> ignored = {}, bitLenInt qubitThreshold = 0,
-        real1_f ignored2 = FP_NORM_EPSILON);
+        real1_f ignored2 = FP_NORM_EPSILON_F);
 
     QEnginePtr MakeEngine(bool isOpenCL, bitCapInt initState = 0);
 
@@ -420,7 +420,7 @@ class QHybrid : public QEngine {
 
     virtual void UpdateRunningNorm(real1_f norm_thresh = REAL1_DEFAULT_ARG) { engine->UpdateRunningNorm(norm_thresh); }
     virtual void NormalizeState(
-        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1)
+        real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F)
     {
         engine->NormalizeState(nrm, norm_thresh, phaseArg);
     }