diff --git a/simd/simd-sse.h b/simd/simd-sse.h
index a80a81bd0..556655bfa 100644
--- a/simd/simd-sse.h
+++ b/simd/simd-sse.h
@@ -53,18 +53,18 @@ typedef __m128 V;
 #define UNPCKL _mm_unpacklo_ps
 
 #ifdef __GNUC__
-#define DVK(var, val) const V var = __extension__ ({		\
+#  define DVK(var, val) const V var = __extension__ ({		\
      static const union fvec _var = { {val, val, val, val} };	\
      _var.v;							\
-})
-#define LDK(x) x
+   })
+#  define LDK(x) x
 
-/* we use inline asm because gcc generates slow code for
-   _mm_loadh_pi().  gcc insists upon having an existing variable for
-   VAL, which is however never used.  Thus, it generates code to move
-   values in and out the variable.  Worse still, gcc-4.0 stores VAL on
-   the stack, causing valgrind to complain about uninitialized reads.
-*/   
+  /* we use inline asm because gcc generates slow code for
+     _mm_loadh_pi().  gcc insists upon having an existing variable for
+     VAL, which is however never used.  Thus, it generates code to move
+     values in and out the variable.  Worse still, gcc-4.0 stores VAL on
+     the stack, causing valgrind to complain about uninitialized reads.
+  */   
 
   static inline V LD(const R *x, INT ivs, const R *aligned_like)
   {
@@ -75,19 +75,6 @@ typedef __m128 V;
        return var;
   }
 
-static inline V LOADL0(const R *addr, V val)
-{
-     V retval;
-     /* gcc-3.3 -O3 produces wrong code with the ``obvious'' coding
-
-          __asm__("movlps %1, %0" : "=x"(retval) : "m"(*addr));
-
-        So we are back to the uninitialized variable nonsense.  Grrr... 
-     */
-     __asm__("movlps %1, %0" : "=x"(retval) : "m"(*addr), "x"(val));
-     return retval;
-}
-
 #else
 
 # define DVK(var, val) const R var = K(val)