Skip to content

Commit

Permalink
Merge pull request emscripten-core#4075 from juj/simd_intrinsics_fixes
Browse files Browse the repository at this point in the history
Simd intrinsics fixes
  • Loading branch information
juj committed Feb 3, 2016
2 parents ab98ff7 + 0cf6d39 commit 7bedc32
Show file tree
Hide file tree
Showing 14 changed files with 810 additions and 19 deletions.
30 changes: 21 additions & 9 deletions emscripten.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,15 +483,24 @@ def make_emulated_param(i):
'select', 'swizzle', 'shuffle',
'load', 'store', 'load1', 'store1', 'load2', 'store2', 'load3', 'store3']
simdintboolfuncs = ['and', 'xor', 'or', 'not']
if metadata['simdUint8x16']:
simdinttypes += ['Uint8x16']
simdintfloatfuncs += ['fromUint8x16Bits']
if metadata['simdInt8x16']:
simdinttypes += ['Int8x16', 'Uint8x16']
simdintfloatfuncs += ['fromInt8x16Bits', 'fromUint8x16Bits']
simdinttypes += ['Int8x16']
simdintfloatfuncs += ['fromInt8x16Bits']
if metadata['simdUint16x8']:
simdinttypes += ['Uint16x8']
simdintfloatfuncs += ['fromUint16x8Bits']
if metadata['simdInt16x8']:
simdinttypes += ['Int16x8', 'Uint16x8']
simdintfloatfuncs += ['fromInt16x8Bits', 'fromUint16x8Bits']
simdinttypes += ['Int16x8']
simdintfloatfuncs += ['fromInt16x8Bits']
if metadata['simdUint32x4']:
simdinttypes += ['Uint32x4']
simdintfloatfuncs += ['fromUint32x4', 'fromUint32x4Bits']
if metadata['simdInt32x4']:
simdinttypes += ['Int32x4', 'Uint32x4']
simdintfloatfuncs += ['fromInt32x4', 'fromInt32x4Bits', 'fromUint32x4Bits']
simdinttypes += ['Int32x4']
simdintfloatfuncs += ['fromInt32x4', 'fromInt32x4Bits']
if metadata['simdFloat32x4']:
simdfloattypes += ['Float32x4']
simdintfloatfuncs += ['fromFloat32x4', 'fromFloat32x4Bits']
Expand All @@ -508,8 +517,8 @@ def make_emulated_param(i):
simdbooltypes += ['Bool64x2']

simdfloatfuncs = simdfuncs + simdintfloatfuncs + ['div', 'min', 'max', 'minNum', 'maxNum', 'sqrt',
'abs', 'reciprocalApproximation', 'reciprocalSqrtApproximation'];
simdintfuncs = simdfuncs + simdintfloatfuncs + simdintboolfuncs + ['shiftLeftByScalar', 'shiftRightByScalar'];
'abs', 'reciprocalApproximation', 'reciprocalSqrtApproximation']
simdintfuncs = simdfuncs + simdintfloatfuncs + simdintboolfuncs + ['shiftLeftByScalar', 'shiftRightByScalar', 'addSaturate', 'subSaturate']
simdboolfuncs = simdfuncs + simdintboolfuncs + ['anyTrue', 'allTrue']
simdtypes = simdfloattypes + simdinttypes + simdbooltypes

Expand Down Expand Up @@ -735,7 +744,10 @@ def string_contains_any(s, str_list):
if sub in s:
return True
return False
nonexisting_simd_symbols = ['Int8x16_fromInt8x16', 'Int16x8_fromInt16x8', 'Int32x4_fromInt32x4', 'Float32x4_fromFloat32x4', 'Float64x2_fromFloat64x2']
nonexisting_simd_symbols = ['Int8x16_fromInt8x16', 'Uint8x16_fromUint8x16', 'Int16x8_fromInt16x8', 'Uint16x8_fromUint16x8', 'Int32x4_fromInt32x4', 'Uint32x4_fromUint32x4', 'Float32x4_fromFloat32x4', 'Float64x2_fromFloat64x2']
nonexisting_simd_symbols += ['Int32x4_addSaturate', 'Int32x4_subSaturate', 'Uint32x4_addSaturate', 'Uint32x4_subSaturate']
nonexisting_simd_symbols += [(x + '_' + y) for x in ['Int8x16', 'Uint8x16', 'Int16x8', 'Uint16x8', 'Float64x2'] for y in ['load2', 'load3', 'store2', 'store3']]
nonexisting_simd_symbols += [(x + '_' + y) for x in ['Int8x16', 'Uint8x16', 'Int16x8', 'Uint16x8'] for y in ['load1', 'load1']]

asm_global_funcs += ''.join([' var SIMD_' + ty + '=global' + access_quote('SIMD') + access_quote(ty) + ';\n' for ty in simdtypes])

Expand Down
5 changes: 5 additions & 0 deletions src/ecmascript_simd.js
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,11 @@ if (typeof simdPhase2 !== 'undefined') {
"load", "store"],
}

// XXX Emscripten: Need these functions for intrinsics, see https://github.com/tc39/ecmascript_simd/issues/316.
float64x2.fns.push("load1");
float64x2.fns.push("store1");
// XXX Emscripten

var bool64x2 = {
name: "Bool64x2",
fn: SIMD.Bool64x2,
Expand Down
20 changes: 10 additions & 10 deletions system/include/emscripten/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ float64x2 emscripten_float64x2_select(bool64x2 __a, float64x2 __b, float64x2 __c
// n.b. No emscripten_float64x2_subSaturate, only defined on 8-bit and 16-bit integer SIMD types.
// n.b. No emscripten_float64x2_shiftLeftByScalar, only defined on integer SIMD types.
// n.b. No emscripten_float64x2_shiftRightByScalar, only defined on integer SIMD types.
inline float emscripten_float64x2_extractLane(float64x2 __a, int __lane) __attribute__((__nothrow__, __const__)) { return __a[__lane]; }
inline float64x2 emscripten_float64x2_replaceLane(float64x2 __a, int __lane, float __s) __attribute__((__nothrow__, __const__)) { __a[__lane] = __s; return __a; }
inline double emscripten_float64x2_extractLane(float64x2 __a, int __lane) __attribute__((__nothrow__, __const__)) { return __a[__lane]; }
inline float64x2 emscripten_float64x2_replaceLane(float64x2 __a, int __lane, double __s) __attribute__((__nothrow__, __const__)) { __a[__lane] = __s; return __a; }
void emscripten_float64x2_store(const void *__p, float64x2 __a) __attribute__((__nothrow__));
void emscripten_float64x2_store1(const void *__p, float64x2 __a) __attribute__((__nothrow__));
float64x2 emscripten_float64x2_load(const void *__p) __attribute__((__nothrow__, __pure__));
Expand Down Expand Up @@ -450,20 +450,20 @@ uint8x16 emscripten_uint8x16_swizzle(uint8x16 __a, int __lane0, int __lane1, int
uint8x16 emscripten_uint8x16_shuffle(uint8x16 __a, uint8x16 __b, int __lane0, int __lane1, int __lane2, int __lane3, int __lane4, int __lane5, int __lane6, int __lane7, int __lane8, int __lane9, int __lane10, int __lane11, int __lane12, int __lane13, int __lane14, int __lane15) __attribute__((__nothrow__, __const__));

// Bool64x2
bool emscripten_bool64x2_anyTrue(bool64x2 __a, bool64x2 __b) __attribute__((__nothrow__, __const__));
bool emscripten_bool64x2_allTrue(bool64x2 __a, bool64x2 __b) __attribute__((__nothrow__, __const__));
int emscripten_bool64x2_anyTrue(bool64x2 __a) __attribute__((__nothrow__, __const__));
int emscripten_bool64x2_allTrue(bool64x2 __a) __attribute__((__nothrow__, __const__));

// Bool32x4
bool emscripten_bool32x4_anyTrue(bool32x4 __a, bool32x4 __b) __attribute__((__nothrow__, __const__));
bool emscripten_bool32x4_allTrue(bool32x4 __a, bool32x4 __b) __attribute__((__nothrow__, __const__));
int emscripten_bool32x4_anyTrue(bool32x4 __a) __attribute__((__nothrow__, __const__));
int emscripten_bool32x4_allTrue(bool32x4 __a) __attribute__((__nothrow__, __const__));

// Bool16x8
bool emscripten_bool16x8_anyTrue(bool16x8 __a, bool16x8 __b) __attribute__((__nothrow__, __const__));
bool emscripten_bool16x8_allTrue(bool16x8 __a, bool16x8 __b) __attribute__((__nothrow__, __const__));
int emscripten_bool16x8_anyTrue(bool16x8 __a) __attribute__((__nothrow__, __const__));
int emscripten_bool16x8_allTrue(bool16x8 __a) __attribute__((__nothrow__, __const__));

// Bool8x16
bool emscripten_bool8x16_anyTrue(bool8x16 __a, bool8x16 __b) __attribute__((__nothrow__, __const__));
bool emscripten_bool8x16_allTrue(bool8x16 __a, bool8x16 __b) __attribute__((__nothrow__, __const__));
int emscripten_bool8x16_anyTrue(bool8x16 __a) __attribute__((__nothrow__, __const__));
int emscripten_bool8x16_allTrue(bool8x16 __a) __attribute__((__nothrow__, __const__));

#ifdef __cplusplus
}
Expand Down
98 changes: 98 additions & 0 deletions tests/core/test_simd_float32x4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#include <emscripten/vector.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>

void dump(const char *name, float32x4 vec)
{
printf("%s: %f %f %f %f\n", name, emscripten_float32x4_extractLane(vec, 0), emscripten_float32x4_extractLane(vec, 1), emscripten_float32x4_extractLane(vec, 2), emscripten_float32x4_extractLane(vec, 3));
}
#define DUMP(V) dump(#V, (V))

void dumpBytes(const char *name, const void *bytes, int n)
{
printf("%s:", name);
for(int i = 0; i < n; ++i)
printf(" %02X", ((uint8_t*)bytes)[i]);
printf("\n");
}
#define DUMPBYTES(name, bytes) dumpBytes(name, bytes, sizeof(bytes))

int main()
{
float32x4 v = emscripten_float32x4_set(-1.f, 0.f, 1.f, 3.5f);
DUMP(v);
float32x4 w = emscripten_float32x4_splat(2.f);
DUMP(w);
DUMP(emscripten_float32x4_add(v, w));
DUMP(emscripten_float32x4_sub(v, w));
DUMP(emscripten_float32x4_mul(v, w));
DUMP(emscripten_float32x4_div(v, w));
DUMP(emscripten_float32x4_max(v, w));
DUMP(emscripten_float32x4_min(v, w));
DUMP(emscripten_float32x4_maxNum(v, w));
DUMP(emscripten_float32x4_minNum(v, w));
DUMP(emscripten_float32x4_neg(v));
DUMP(emscripten_float32x4_sqrt(v));
DUMP(emscripten_float32x4_reciprocalApproximation(v));
DUMP(emscripten_float32x4_reciprocalSqrtApproximation(v));
DUMP(emscripten_float32x4_abs(v));
DUMP(emscripten_float32x4_and(v, w));
DUMP(emscripten_float32x4_xor(v, w));
DUMP(emscripten_float32x4_or(v, w));
DUMP(emscripten_float32x4_not(v));
DUMP(emscripten_float32x4_lessThan(v, w));
DUMP(emscripten_float32x4_lessThanOrEqual(v, w));
DUMP(emscripten_float32x4_greaterThan(v, w));
DUMP(emscripten_float32x4_greaterThanOrEqual(v, w));
DUMP(emscripten_float32x4_equal(v, w));
DUMP(emscripten_float32x4_notEqual(v, w));
bool32x4 b = emscripten_int32x4_set(0, -1, 0, -1);
DUMP(emscripten_float32x4_select(b, v, w));
DUMP(emscripten_float32x4_replaceLane(v, 0, 9.f));
DUMP(emscripten_float32x4_replaceLane(v, 1, -3.f));
DUMP(emscripten_float32x4_replaceLane(v, 2, 0.f));
DUMP(emscripten_float32x4_replaceLane(v, 3, -0.f));
uint8_t bytes[16];
memset(bytes, 0xFF, sizeof(bytes));
emscripten_float32x4_store(bytes, v);
DUMPBYTES("emscripten_float32x4_store", bytes);
memset(bytes, 0xFF, sizeof(bytes));
emscripten_float32x4_store1(bytes, v);
DUMPBYTES("emscripten_float32x4_store1", bytes);
memset(bytes, 0xFF, sizeof(bytes));
emscripten_float32x4_store2(bytes, v);
DUMPBYTES("emscripten_float32x4_store2", bytes);
memset(bytes, 0xFF, sizeof(bytes));
emscripten_float32x4_store3(bytes, v);
DUMPBYTES("emscripten_float32x4_store3", bytes);

emscripten_float32x4_store(bytes, v);
DUMP(emscripten_float32x4_load(bytes));
DUMP(emscripten_float32x4_load1(bytes));
DUMP(emscripten_float32x4_load2(bytes));
DUMP(emscripten_float32x4_load3(bytes));
// TODO: emscripten_float32x4_fromFloat64x2Bits
// TODO: emscripten_float32x4_fromInt32x4Bits
// TODO: emscripten_float32x4_fromUint32x4Bits
// TODO: emscripten_float32x4_fromInt16x8Bits
// TODO: emscripten_float32x4_fromUint16x8Bits
// TODO: emscripten_float32x4_fromInt8x16Bits
// TODO: emscripten_float32x4_fromUint8x16Bits
// TODO: emscripten_float32x4_fromInt32x4
// TODO: emscripten_float32x4_fromUint32x4
DUMP(emscripten_float32x4_swizzle(v, 0, 1, 2, 3));
DUMP(emscripten_float32x4_swizzle(v, 3, 2, 1, 0));
DUMP(emscripten_float32x4_swizzle(v, 0, 0, 0, 0));
DUMP(emscripten_float32x4_swizzle(v, 0, 3, 0, 3));
DUMP(emscripten_float32x4_swizzle(v, 3, 3, 3, 3));
float32x4 z = emscripten_float32x4_set(-5.f, 20.f, 14.f, 9.f);
DUMP(z);
DUMP(emscripten_float32x4_shuffle(v, z, 0, 0, 0, 0));
DUMP(emscripten_float32x4_shuffle(v, z, 4, 4, 4, 4));
DUMP(emscripten_float32x4_shuffle(v, z, 7, 7, 7, 7));
DUMP(emscripten_float32x4_shuffle(v, z, 0, 2, 4, 6));
DUMP(emscripten_float32x4_shuffle(v, z, 7, 0, 3, 5));

printf("Done!\n");
}
50 changes: 50 additions & 0 deletions tests/core/test_simd_float32x4.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
v: -1.000000 0.000000 1.000000 3.500000
w: 2.000000 2.000000 2.000000 2.000000
emscripten_float32x4_add(v, w): 1.000000 2.000000 3.000000 5.500000
emscripten_float32x4_sub(v, w): -3.000000 -2.000000 -1.000000 1.500000
emscripten_float32x4_mul(v, w): -2.000000 0.000000 2.000000 7.000000
emscripten_float32x4_div(v, w): -0.500000 0.000000 0.500000 1.750000
emscripten_float32x4_max(v, w): 2.000000 2.000000 2.000000 3.500000
emscripten_float32x4_min(v, w): -1.000000 0.000000 1.000000 2.000000
emscripten_float32x4_maxNum(v, w): 2.000000 2.000000 2.000000 3.500000
emscripten_float32x4_minNum(v, w): -1.000000 0.000000 1.000000 2.000000
emscripten_float32x4_neg(v): 1.000000 -0.000000 -1.000000 -3.500000
emscripten_float32x4_sqrt(v): nan 0.000000 1.000000 1.870829
emscripten_float32x4_reciprocalApproximation(v): -1.000000 inf 1.000000 0.285714
emscripten_float32x4_reciprocalSqrtApproximation(v): nan inf 1.000000 0.534522
emscripten_float32x4_abs(v): 1.000000 0.000000 1.000000 3.500000
emscripten_float32x4_and(v, w): 0.000000 0.000000 0.000000 2.000000
emscripten_float32x4_xor(v, w): -inf 2.000000 inf 0.000000
emscripten_float32x4_or(v, w): -inf 2.000000 inf 3.500000
emscripten_float32x4_not(v): 4.000000 nan -4.000000 -1.250000
emscripten_float32x4_lessThan(v, w): nan nan nan 0.000000
emscripten_float32x4_lessThanOrEqual(v, w): nan nan nan 0.000000
emscripten_float32x4_greaterThan(v, w): 0.000000 0.000000 0.000000 nan
emscripten_float32x4_greaterThanOrEqual(v, w): 0.000000 0.000000 0.000000 nan
emscripten_float32x4_equal(v, w): 0.000000 0.000000 0.000000 0.000000
emscripten_float32x4_notEqual(v, w): nan nan nan nan
emscripten_float32x4_select(b, v, w): 2.000000 0.000000 2.000000 3.500000
emscripten_float32x4_replaceLane(v, 0, 9.f): 9.000000 0.000000 1.000000 3.500000
emscripten_float32x4_replaceLane(v, 1, -3.f): -1.000000 -3.000000 1.000000 3.500000
emscripten_float32x4_replaceLane(v, 2, 0.f): -1.000000 0.000000 0.000000 3.500000
emscripten_float32x4_replaceLane(v, 3, -0.f): -1.000000 0.000000 1.000000 -0.000000
emscripten_float32x4_store: 00 00 80 BF 00 00 00 00 00 00 80 3F 00 00 60 40
emscripten_float32x4_store1: 00 00 80 BF FF FF FF FF FF FF FF FF FF FF FF FF
emscripten_float32x4_store2: 00 00 80 BF 00 00 00 00 FF FF FF FF FF FF FF FF
emscripten_float32x4_store3: 00 00 80 BF 00 00 00 00 00 00 80 3F FF FF FF FF
emscripten_float32x4_load(bytes): -1.000000 0.000000 1.000000 3.500000
emscripten_float32x4_load1(bytes): -1.000000 0.000000 0.000000 0.000000
emscripten_float32x4_load2(bytes): -1.000000 0.000000 0.000000 0.000000
emscripten_float32x4_load3(bytes): -1.000000 0.000000 1.000000 0.000000
emscripten_float32x4_swizzle(v, 0, 1, 2, 3): -1.000000 0.000000 1.000000 3.500000
emscripten_float32x4_swizzle(v, 3, 2, 1, 0): 3.500000 1.000000 0.000000 -1.000000
emscripten_float32x4_swizzle(v, 0, 0, 0, 0): -1.000000 -1.000000 -1.000000 -1.000000
emscripten_float32x4_swizzle(v, 0, 3, 0, 3): -1.000000 3.500000 -1.000000 3.500000
emscripten_float32x4_swizzle(v, 3, 3, 3, 3): 3.500000 3.500000 3.500000 3.500000
z: -5.000000 20.000000 14.000000 9.000000
emscripten_float32x4_shuffle(v, z, 0, 0, 0, 0): -1.000000 -1.000000 -1.000000 -1.000000
emscripten_float32x4_shuffle(v, z, 4, 4, 4, 4): -5.000000 -5.000000 -5.000000 -5.000000
emscripten_float32x4_shuffle(v, z, 7, 7, 7, 7): 9.000000 9.000000 9.000000 9.000000
emscripten_float32x4_shuffle(v, z, 0, 2, 4, 6): -1.000000 1.000000 -5.000000 14.000000
emscripten_float32x4_shuffle(v, z, 7, 0, 3, 5): 9.000000 -1.000000 3.500000 20.000000
Done!
87 changes: 87 additions & 0 deletions tests/core/test_simd_float64x2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include <emscripten/vector.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>

void dump(const char *name, float64x2 vec)
{
printf("%s: %f %f\n", name, emscripten_float64x2_extractLane(vec, 0), emscripten_float64x2_extractLane(vec, 1));
}
#define DUMP(V) dump(#V, (V))

void dumpBytes(const char *name, const void *bytes, int n)
{
printf("%s:", name);
for(int i = 0; i < n; ++i)
printf(" %02X", ((uint8_t*)bytes)[i]);
printf("\n");
}
#define DUMPBYTES(name, bytes) dumpBytes(name, bytes, sizeof(bytes))

int main()
{
float64x2 v = emscripten_float64x2_set(-1.5f, 2.5f);
DUMP(v);
float64x2 w = emscripten_float64x2_splat(1.5f);
DUMP(w);
DUMP(emscripten_float64x2_add(v, w));
DUMP(emscripten_float64x2_sub(v, w));
DUMP(emscripten_float64x2_mul(v, w));
DUMP(emscripten_float64x2_div(v, w));
DUMP(emscripten_float64x2_max(v, w));
DUMP(emscripten_float64x2_min(v, w));
DUMP(emscripten_float64x2_maxNum(v, w));
DUMP(emscripten_float64x2_minNum(v, w));
DUMP(emscripten_float64x2_neg(v));
DUMP(emscripten_float64x2_sqrt(v));
DUMP(emscripten_float64x2_reciprocalApproximation(v));
DUMP(emscripten_float64x2_reciprocalSqrtApproximation(v));
DUMP(emscripten_float64x2_abs(v));
DUMP(emscripten_float64x2_and(v, w));
DUMP(emscripten_float64x2_xor(v, w));
DUMP(emscripten_float64x2_or(v, w));
DUMP(emscripten_float64x2_not(v));
DUMP(emscripten_float64x2_lessThan(v, w));
DUMP(emscripten_float64x2_lessThanOrEqual(v, w));
DUMP(emscripten_float64x2_greaterThan(v, w));
DUMP(emscripten_float64x2_greaterThanOrEqual(v, w));
DUMP(emscripten_float64x2_equal(v, w));
DUMP(emscripten_float64x2_notEqual(v, w));
//bool64x2 b = emscripten_int64x2_set(0, -1); // TODO: Can't yet use this form, no int64x2.
//DUMP(emscripten_float64x2_select(b, v, w));
DUMP(emscripten_float64x2_replaceLane(v, 0, 9.f));
DUMP(emscripten_float64x2_replaceLane(v, 1, -3.f));
uint8_t bytes[16];
memset(bytes, 0xFF, sizeof(bytes));
emscripten_float64x2_store(bytes, v);
DUMPBYTES("emscripten_float64x2_store", bytes);
memset(bytes, 0xFF, sizeof(bytes));
emscripten_float64x2_store1(bytes, v);
DUMPBYTES("emscripten_float64x2_store1", bytes);

emscripten_float64x2_store(bytes, v);
DUMP(emscripten_float64x2_load(bytes));
DUMP(emscripten_float64x2_load1(bytes));
// TODO: emscripten_float64x2_fromFloat64x2Bits
// TODO: emscripten_float64x2_fromInt64x2Bits
// TODO: emscripten_float64x2_fromUint64x2Bits
// TODO: emscripten_float64x2_fromInt16x8Bits
// TODO: emscripten_float64x2_fromUint16x8Bits
// TODO: emscripten_float64x2_fromInt8x16Bits
// TODO: emscripten_float64x2_fromUint8x16Bits
// TODO: emscripten_float64x2_fromInt64x2
// TODO: emscripten_float64x2_fromUint64x2
DUMP(emscripten_float64x2_swizzle(v, 0, 1));
DUMP(emscripten_float64x2_swizzle(v, 1, 0));
DUMP(emscripten_float64x2_swizzle(v, 0, 0));
DUMP(emscripten_float64x2_swizzle(v, 1, 1));
float64x2 z = emscripten_float64x2_set(-5.5f, 20.5f);
DUMP(z);
DUMP(emscripten_float64x2_shuffle(v, z, 0, 0));
DUMP(emscripten_float64x2_shuffle(v, z, 2, 2));
DUMP(emscripten_float64x2_shuffle(v, z, 3, 3));
DUMP(emscripten_float64x2_shuffle(v, z, 0, 2));
DUMP(emscripten_float64x2_shuffle(v, z, 3, 1));

printf("Done!\n");
}
Loading

0 comments on commit 7bedc32

Please sign in to comment.