Improve handling of various FP CPU features, especially on x64

For instance, basic support for single-precision SSE floats and x87 FP on x64. Some FPUCW improvements apply to 32-bit as well.
ht13 · Feb 7, 2016 · 2e6711d · 2e6711d
1 parent 3862246
commit 2e6711d
Show file tree

Hide file tree

Showing 9 changed files with 515 additions and 76 deletions.
diff --git a/examples/fp-small/.gitignore b/examples/fp-small/.gitignore
@@ -1,3 +1,4 @@
 fp-assoc
 fp-round
 fp-ten
+fpucw-test
diff --git a/examples/fp-small/fpucw-test.c b/examples/fp-small/fpucw-test.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+int main(int argc, char **argv) {
+    float f;
+    double d;
+    long double ld;
+    if (argc != 2) {
+	fprintf(stderr, "Usage: fpucw-test <float>\n");
+	return 1;
+    }
+    f = strtof(argv[1], 0);
+    d = strtod(argv[1], 0);
+    ld = strtold(argv[1], 0);
+    printf("32-bit %a: %snegative\n", f, (signbit(f) ? "" : "non-"));
+    printf("64-bit %a: %snegative\n", d, (signbit(d) ? "" : "non-"));
+    printf("80-bit %La: %snegative\n", ld, (signbit(ld) ? "" : "non-"));
+    return 0;
+}
diff --git a/execution/exec_run_common.ml b/execution/exec_run_common.ml
@@ -123,9 +123,10 @@ let known_unknowns = (
     Hashtbl.replace h "Floating point binop" ();
     Hashtbl.replace h "Floating point triop" ();
     Hashtbl.replace h "floatcast" ();
-    Hashtbl.replace h "CCall: x86g_create_fpucw" ();
+    Hashtbl.replace h "CCall: x86g_create_fpucw" (); (* s/b handled now *)
     Hashtbl.replace h "CCall: x86g_calculate_FXAM" ();
-    Hashtbl.replace h "CCall: x86g_check_fldcw" ();
+    Hashtbl.replace h "CCall: x86g_check_fldcw" (); (* s/b handled now *)
+    Hashtbl.replace h "loadF80" ();
     h)
 
 (* Disable "unknown" statments it seems safe to ignore *)

diff --git a/execution/fragment_machine.ml b/execution/fragment_machine.ml
@@ -809,7 +809,8 @@ struct
 	reg R_CC_DEP1 (D.from_concrete_32 0L);
 	reg R_CC_DEP2 (D.from_concrete_32 0L);
 	reg R_CC_NDEP (D.from_concrete_32 0L);
-	reg R_SSEROUND (D.from_concrete_32 0L);
+	reg R_FPROUND (D.from_concrete_32 0L); (* to nearest *)
+	reg R_SSEROUND (D.from_concrete_32 0L); (* to nearest *)
 	reg R_XMM0L (D.from_concrete_64 0L);
 	reg R_XMM0H (D.from_concrete_64 0L);
 	reg R_XMM1L (D.from_concrete_64 0L);
@@ -866,7 +867,8 @@ struct
 	reg R_CC_DEP1 (D.from_concrete_64 0L);
 	reg R_CC_DEP2 (D.from_concrete_64 0L);
 	reg R_CC_NDEP (D.from_concrete_64 0L);
-	reg R_SSEROUND (D.from_concrete_64 0L);
+	reg R_FPROUND (D.from_concrete_64 0L); (* to nearest *)
+	reg R_SSEROUND (D.from_concrete_64 0L); (* to nearest *)
 	()
 
     method private make_arm_regs_zero =

diff --git a/libasmir/src/include/irtoir-internal.h b/libasmir/src/include/irtoir-internal.h
@@ -68,7 +68,9 @@ void  i386_modify_flags( asm_program_t *prog, vine_block_t *block );
 vector<VarDecl *> x64_get_reg_decls();
 IRStmt *x64_make_pc_put_stmt(Addr64 addr);
 Exp  *x64_translate_get( IRExpr *expr, IRSB *irbb, vector<Stmt *> *irout );
+Exp  *x64_translate_geti( IRExpr *expr, IRSB *irbb, vector<Stmt *> *irout );
 Stmt *x64_translate_put( IRStmt *stmt, IRSB *irbb, vector<Stmt *> *irout );
+Stmt *x64_translate_puti( IRStmt *stmt, IRSB *irbb, vector<Stmt *> *irout );
 Exp  *x64_translate_ccall( IRExpr *expr, IRSB *irbb, vector<Stmt *> *irout );
 Stmt *x64_translate_dirty( IRStmt *stmt, IRSB *irbb, vector<Stmt *> *irout );
 void  x64_modify_flags( asm_program_t *prog, vine_block_t *block );

diff --git a/libasmir/src/vex/irtoir-i386.cpp b/libasmir/src/vex/irtoir-i386.cpp
@@ -890,7 +890,9 @@ Stmt *i386_translate_dirty( IRStmt *stmt, IRSB *irbb, vector<Stmt *> *irout )
 	result = new Special("cpuid");
     }
     else if (func == "x86g_dirtyhelper_loadF80le") {
-        result = new ExpStmt(new Unknown("Unknown: loadF80"));
+	IRTemp lhs = dirty->tmp;
+	assert(lhs != IRTemp_INVALID);
+	result = mk_assign_tmp(lhs, new Unknown("loadF80"), irbb, irout);
     }
     else if (func == "x86g_dirtyhelper_storeF80le") {
         result = new ExpStmt(new Unknown("Unknown: storeF80"));
@@ -1246,6 +1248,12 @@ Exp *i386_translate_ccall( IRExpr *expr, IRSB *irbb, vector<Stmt *> *irout )
 	result = _ex_or(ex_const(0x1f80),
 			_ex_shl(arg, ex_const(13)));
     }
+    else if ( func == "x86g_create_fpucw" )
+    {
+	Exp *arg = translate_expr(expr->Iex.CCall.args[0], irbb, irout);
+	result = _ex_or(ex_const(0x037f),
+			_ex_shl(arg, ex_const(10)));
+    }
     else if ( func == "x86g_check_ldmxcsr" )
     {
 	Exp *arg = translate_expr(expr->Iex.CCall.args[0], irbb, irout);
@@ -1255,6 +1263,15 @@ Exp *i386_translate_ccall( IRExpr *expr, IRSB *irbb, vector<Stmt *> *irout )
 	/* The high word is for emulation warnings: skip it */
 	result = _ex_u_cast(rmode, REG_64);
     }
+    else if ( func == "x86g_check_fldcw" )
+    {
+	Exp *arg = translate_expr(expr->Iex.CCall.args[0], irbb, irout);
+	/* Extract the rounding mode */
+	Exp *rmode = _ex_and(_ex_shr(arg, ex_const(10)),
+			     ex_const(3));
+	/* The high word is for emulation warnings: skip it */
+	result = _ex_u_cast(rmode, REG_64);
+    }
     else
     {
         result = new Unknown("CCall: " + func);