Release 0.3.0.0.

Haskell-Things · Oct 18, 2023 · 37b6753 · 37b6753
1 parent 4a16232
commit 37b6753
Show file tree

Hide file tree

Showing 9 changed files with 983 additions and 137 deletions.
diff --git a/cbits/PrimOps.cmm b/cbits/PrimOps.cmm
@@ -2,31 +2,82 @@
 #include "MachDeps.h"
 
 /*
- * Prim ops to convert floating point values to integral values of the same size
- * preserving the bits of the floating point values and back again.
+ * Prim-ops for coercing between floating point values and integral values.
+ *
  * Currently we do this by storing the value on the stack and reading it back
- * again (to get from one set of registers to another).
+ * again (to get from one set of registers to another). To avoid dealing with
+ * the possibility of a stack overflow (and the need to trigger a GC), we borrow
+ * the memory of the continuation pointer. This should be faster than any temporary
+ * allocated memory as we do not have to allocate anything.
+ *
+ * Note [Stack order after prim op (x86_64)]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Even if we use the stack during a prim op, it should be exactly in the state
+ * as it was before the prim op after the prim op. Especially this means that
+ * we have to restore the continuation on the stack before jumping to it. I do not
+ * know the exact reason, but several are possible:
+ *   - The continuation calls into the RTS, which may expect the continuation to
+ *     be on top of the stack on return. This seems like the most likely reason.
+ *   - The continuation has to access its info table. Rather than using a static
+ *     (long) address, the address from the stack is taken.
+ *   - The continuation calls itself recursively by calling the continuation from
+ *     the top of the stack. But again it could be written differently.
+ *
+ * Whatever the real reason is, it is important to leave the stack unchanged,
+ * otherwise SIGSEGV awaits.
+ *
+ * Note [Calling conventions]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The calling convention is different depending on the architecture the code runs on.
+ *
+ * The return conventions seems to be the same for both x86 and x86_64:
+ *   - Float# values are returned in F1.
+ *   - Double# values are returned in D1.
+ *   - 32 bit integral values are returned in R1.
+ *   - 64 bit integral values are returned in R1 or L1 (x86, R1 is only 32 bit wide).
+ *   - Sp points to the continuation we jump to, so if something was pushed on the
+ *     stack we have to remove it first.
+ *
+ * x86_64:
+ *   - The first Double#/Float#/32 or 64 bit integral number is passed via D1/F1/R1
+ *
+ * x86:
+ *   - The first Double#/Float#/64 bit integral number is passed via the stack.
+ *   - We have to clean up the arguments passed to us via the stack.
+ *   - The first 32 bit integral number is passed via R1.
+ *
+ * Note [GHC Version]
+ * ~~~~~~~~~~~~~~~~~~
+ *
+ * Between GHC 7.6 and 7.8 there seems to have been a change in the calling
+ * convention, so this code will only work for GHC 7.8 and later.
+ * I tested this with
+ *   - GHC 7.8.3 on Windows, both 32 and 64 bit
+ *   - GHC 7.10.1 on Linux, both 32 and 64 bit
+ *
  */
 
 #if SIZEOF_DOUBLE != 8
-#error "SIZEOF_DOUBLE != 8"
+    #error "SIZEOF_DOUBLE != 8"
 #endif
 
 #if SIZEOF_FLOAT != 4
-#error "SIZEOF_FLOAT != 4"
+    #error "SIZEOF_FLOAT != 4"
 #endif
 
 #if SIZEOF_VOID_P == 8
-#define __64BITS
+    #define __64BITS
 #elif SIZEOF_VOID_P == 4
-#define __32BITS
-#error "32 Bit currently does not really work (although it should). If you want to try nonetheless, remove this line."
+    #define __32BITS
 #else
-#error "Only 32 or 64 bit targets are supported"
+    #error "Only 32 or 64 bit targets are supported"
 #endif
 
 /*
  * Get the bits of a double by saving it to the stack and reading it back.
+ * On the x86 the argument is passed via the stack, so we just pop it of the stack.
  */
 double2WordBwzh
 {
@@ -35,21 +86,18 @@ double2WordBwzh
     j = P_[Sp];
     D_[Sp] = D1;
     R1 = I64[Sp];
+    P_[Sp] = j;
     jump (j) [R1];
 #else
-    P_ j;
-    I32 tmp;
-    j = P_[Sp];
-    tmp = I32[Sp-4];
-    D_[Sp-4] = D1;
-    R1 = I64[Sp-4];
-    I32[Sp-4] = tmp;
-    jump (j) [R1];
+    L1 = I64[Sp];
+    Sp = Sp + 8;
+    jump (P_[Sp]) [L1];
 #endif
 }
 
 /*
  * Convert some bits to a double by saving it to the stack and reading it back.
+ * On the x86 the argument is passed via the stack, so we just pop it of the stack.
  */
 word2DoubleBwzh
 {
@@ -58,39 +106,48 @@ word2DoubleBwzh
     j = P_[Sp];
     I64[Sp] = R1;
     D1 = D_[Sp];
+    P_[Sp] = j;
     jump (j) [D1];
 #else
-    P_ j;
-    I32 tmp;
-    j = P_[Sp];
-    tmp = I32[Sp-4];
-    I64[Sp-4] = R1;
-    D1 = D_[Sp-4];
-    I32[Sp-4] = tmp;
-    jump (j) [R1];
+    D1 = D_[Sp];
+    Sp = Sp + 8;
+    jump (P_[Sp]) [D1];
 #endif
 }
 
 /*
  * Get the bits of a float by saving it to the stack and reading it back.
+ * On the x86 the argument is passed via the stack, so we just pop it of the stack.
  */
 float2WordBwzh
 {
+#ifdef __64BITS
     P_ j;
     j = P_[Sp];
     F_[Sp] = F1;
-    R1 = I32[Sp];
+    I32 tmp;
+    tmp = I32[Sp];
+    R1 = %zx64(tmp);
+    P_[Sp] = j;
     jump (j) [R1];
+#else
+    R1 = I32[Sp];
+    Sp = Sp + 4;
+    jump (P_[Sp]) [R1];
+#endif
 }
 
 /*
  * Convert some bits to a float by saving it to the stack and reading it back.
+ * In contrast to our other prim ops, this has the same code for x86 and x86_64:
+ * x86 passes a 32-bit integer argument via R1 just like x86_64.
  */
 word2FloatBwzh
 {
     P_ j;
     j = P_[Sp];
     I32[Sp] = R1;
     F1 = F_[Sp];
+    P_[Sp] = j;
     jump (j) [F1];
 }
diff --git a/cbits/ref.c b/cbits/ref.c
@@ -0,0 +1,131 @@
+
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+#include <assert.h>
+
+uint32_t float2word_ref(float val)
+{
+    uint32_t rep;
+    assert(sizeof rep == sizeof val);
+    memcpy(&rep, &val, sizeof rep);
+    return rep;
+}
+
+float word2float_ref(uint32_t rep)
+{
+    float val;
+    assert(sizeof rep == sizeof val);
+    memcpy(&val, &rep, sizeof val);
+    return val;
+}
+
+uint64_t double2word_ref(double val)
+{
+    uint64_t rep;
+    assert(sizeof rep == sizeof val);
+    memcpy(&rep, &val, sizeof rep);
+    return rep;
+}
+
+double word2double_ref(uint64_t rep)
+{
+    double val;
+    assert(sizeof rep == sizeof val);
+    memcpy(&val, &rep, sizeof val);
+    return val;
+}
+
+// ulp reference implementation copied from java
+
+#define DOUBLE_MIN_VALUE 4.94065645841246544176568792868E-324
+#define DOUBLE_SIGNIFICAND_WIDTH 53L
+#define DOUBLE_MAX_EXPONENT 1023L
+#define DOUBLE_MIN_EXPONENT -1022L
+#define DOUBLE_EXP_BIAS 1023L
+#define DOUBLE_EXP_BIT_MASK 0x7FF0000000000000L
+
+#define FLOAT_MIN_VALUE 1.40129846432481707092372958329E-45
+#define FLOAT_SIGNIFICAND_WIDTH 24
+#define FLOAT_MAX_EXPONENT 127
+#define FLOAT_MIN_EXPONENT -126
+#define FLOAT_EXP_BIAS 127
+#define FLOAT_EXP_BIT_MASK 0x7F800000
+
+static int64_t get_exponent_d(double d)
+{
+    return (int64_t)(((double2word_ref(d) & DOUBLE_EXP_BIT_MASK) >> (DOUBLE_SIGNIFICAND_WIDTH - 1L)) - DOUBLE_EXP_BIAS);
+}
+
+static int32_t get_exponent_f(float f)
+{
+    return (int32_t)((float2word_ref(f) & FLOAT_EXP_BIT_MASK) >> (FLOAT_SIGNIFICAND_WIDTH - 1)) - FLOAT_EXP_BIAS;
+}
+
+static double power_of_two_d(int64_t n)
+{
+    assert((n >= DOUBLE_MIN_EXPONENT) && (n <= DOUBLE_MAX_EXPONENT));
+    return word2double_ref(((uint64_t)(n + DOUBLE_EXP_BIAS) << (DOUBLE_SIGNIFICAND_WIDTH - 1L)) & DOUBLE_EXP_BIT_MASK);
+}
+
+static float power_of_two_f(int32_t n)
+{
+    assert((n >= FLOAT_MIN_EXPONENT) && (n <= FLOAT_MAX_EXPONENT));
+    return word2float_ref(((uint32_t)(n + FLOAT_EXP_BIAS) << (FLOAT_SIGNIFICAND_WIDTH - 1)) & FLOAT_EXP_BIT_MASK);
+}
+
+float float_ulp_ref(float f)
+{
+    int32_t e = get_exponent_f(f);
+
+    switch(e) {
+    case FLOAT_MAX_EXPONENT+1:        // NaN or infinity
+        return fabsf(f);
+
+    case FLOAT_MIN_EXPONENT-1:        // zero or subnormal
+        return FLOAT_MIN_VALUE;
+
+    default:
+        assert((e <= FLOAT_MAX_EXPONENT) && (e >= FLOAT_MIN_EXPONENT));
+
+        // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
+        e = e - (FLOAT_SIGNIFICAND_WIDTH - 1);
+        if (e >= FLOAT_MIN_EXPONENT) {
+            return power_of_two_f(e);
+        }
+        else {
+            // return a subnormal result; left shift integer
+            // representation of FLOAT_MIN_VALUE appropriate
+            // number of positions
+            return word2float_ref(1 << (e - (FLOAT_MIN_EXPONENT - (FLOAT_SIGNIFICAND_WIDTH - 1))));
+        }
+    }
+}
+
+double double_ulp_ref(double d)
+{
+    int64_t e = get_exponent_d(d);
+
+    switch(e) {
+    case DOUBLE_MAX_EXPONENT+1:       // NaN or infinity
+        return fabs(d);
+
+    case DOUBLE_MIN_EXPONENT-1:       // zero or subnormal
+        return DOUBLE_MIN_VALUE;
+
+    default:
+        assert((e <= DOUBLE_MAX_EXPONENT) && (e >= DOUBLE_MIN_EXPONENT));
+
+        // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
+        e = e - (DOUBLE_SIGNIFICAND_WIDTH - 1);
+        if (e >= DOUBLE_MIN_EXPONENT) {
+            return power_of_two_d(e);
+        }
+        else {
+            // return a subnormal result; left shift integer
+            // representation of Double.MIN_VALUE appropriate
+            // number of positions
+            return word2double_ref(1L << (e - (DOUBLE_MIN_EXPONENT - (DOUBLE_SIGNIFICAND_WIDTH - 1))));
+        }
+    }
+}
diff --git a/floating-bits.cabal b/floating-bits.cabal
@@ -1,5 +1,5 @@
 name:                floating-bits
-version:             0.2.0.0
+version:             0.3.0.0
 synopsis:            Conversions between floating and integral values.
 description:         A small library to cast floating point values to integral values and back preserving the bit-pattern.
 license:             BSD3
@@ -12,10 +12,20 @@ build-type:          Simple
 cabal-version:       >=1.10
 
 library
-  exposed-modules:     Data.Bits.Floating
+  exposed-modules:
+    Data.Bits.Floating
+    Data.Bits.Floating.Ulp
+    Data.Bits.Floating.Prim
   c-sources:           cbits/PrimOps.cmm
-  other-extensions:    ForeignFunctionInterface, MagicHash, GHCForeignImportPrim, UnliftedFFITypes
-  build-depends:       base >=4.6 && < 5
+  other-extensions:    ForeignFunctionInterface,
+                       MagicHash,
+                       GHCForeignImportPrim,
+                       UnliftedFFITypes,
+                       MultiParamTypeClasses,
+                       FunctionalDependencies,
+                       ScopedTypeVariables,
+                       CPP
+  build-depends:       base >=4.7 && < 5
   hs-source-dirs:      src
   ghc-options:         -Wall -fwarn-tabs -fwarn-incomplete-record-updates -fwarn-monomorphism-restriction -fwarn-incomplete-uni-patterns
   default-language:    Haskell2010
@@ -24,6 +34,28 @@ test-suite test
   type:                exitcode-stdio-1.0
   hs-source-dirs:      test
   main-is:             Test.hs
+  other-modules:       TestUtils
+  c-sources:           cbits/ref.c
+  cc-options:          -fPIC
   default-language:    Haskell2010
   ghc-options:         -Wall -fwarn-tabs -fwarn-incomplete-record-updates -fwarn-monomorphism-restriction -fwarn-incomplete-uni-patterns
-  build-depends:       base >= 4.6 && < 5, floating-bits
+  ghc-prof-options:    -prof -fprof-auto
+  ghc-options:         -threaded -with-rtsopts=-N
+  other-extensions:    BangPatterns
+  build-depends:       base >= 4.6 && < 5,
+                       floating-bits
+
+benchmark bench
+  type:                exitcode-stdio-1.0
+  hs-source-dirs:      test
+  main-is:             Bench.hs
+  other-modules:       TestUtils
+  c-sources:           cbits/ref.c
+  cc-options:          -fPIC
+  default-language:    Haskell2010
+  ghc-options:         -Wall -fwarn-tabs -fwarn-incomplete-record-updates -fwarn-monomorphism-restriction -fwarn-incomplete-uni-patterns
+  ghc-prof-options:    -prof -fprof-auto
+  ghc-options:         -threaded -with-rtsopts=-N
+  build-depends:       base >= 4.6 && < 5,
+                       floating-bits,
+                       criterion