diff --git a/Makefile b/Makefile
index 0d7ded3fa040611ac93e39ca6b03e1cee0271df9..7f0f8be2baa04d8a5e3e18110b3173327465e005 100644
--- a/Makefile
+++ b/Makefile
@@ -90,7 +90,7 @@ i386.ld ppc.ld exec-i386.h exec-i386.c configure VERSION \
 tests/Makefile\
 tests/test-i386.c tests/test-i386-shift.h tests/test-i386.h\
 tests/test-i386-muldiv.h\
-tests/test2.c tests/hello.c tests/sha1.c
+tests/test2.c tests/hello.c tests/hello tests/sha1.c
 
 FILE=gemu-$(VERSION)
 
diff --git a/configure b/configure
index 62b1e712220be51d7e888c8c4bbad956a2ed3783..d9053cf764fdba5ff673d0f61b9a80627b49a1ba 100755
--- a/configure
+++ b/configure
@@ -11,11 +11,11 @@ else
     TMPDIR1="/tmp"
 fi
 
-TMPC="${TMPDIR1}/qemacs-conf-${RANDOM}-$$-${RANDOM}.c"
-TMPO="${TMPDIR1}/qemacs-conf-${RANDOM}-$$-${RANDOM}.o"
-TMPE="${TMPDIR1}/qemacs-conf-${RANDOM}-$$-${RANDOM}"
-TMPS="${TMPDIR1}/qemacs-conf-${RANDOM}-$$-${RANDOM}.S"
-TMPH="${TMPDIR1}/qemacs-conf-${RANDOM}-$$-${RANDOM}.h"
+TMPC="${TMPDIR1}/gemu-conf-${RANDOM}-$$-${RANDOM}.c"
+TMPO="${TMPDIR1}/gemu-conf-${RANDOM}-$$-${RANDOM}.o"
+TMPE="${TMPDIR1}/gemu-conf-${RANDOM}-$$-${RANDOM}"
+TMPS="${TMPDIR1}/gemu-conf-${RANDOM}-$$-${RANDOM}.S"
+TMPH="${TMPDIR1}/gemu-conf-${RANDOM}-$$-${RANDOM}.h"
 
 # default parameters
 prefix="/usr/local"
@@ -144,12 +144,20 @@ fi
 fi
 
 # check gcc version
+cat > $TMPC <<EOF
+int main(void) {
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)
+return 0;
+#else
+#error gcc < 3.2
+#endif
+}
+EOF
+
 gcc_major="2"
-gcc_version="$($cc -v 2>&1 | grep version | cut -d ' ' -f3-)"
-case "$gcc_version" in
-3.*) gcc_major="3";
-;;
-esac
+if $cc -o $TMPO $TMPC 2> /dev/null ; then
+   gcc_major="3"
+fi
 
 if test x"$1" = x"-h" -o x"$1" = x"--help" ; then
 cat << EOF
@@ -201,9 +209,9 @@ if test "$cpu" = "x86" ; then
 elif test "$cpu" = "armv4l" ; then
   echo "ARCH=arm" >> config.mak
 elif test "$cpu" = "powerpc" ; then
-  echo "ARCH=ppc" > config.mak
+  echo "ARCH=ppc" >> config.mak
 elif test "$cpu" = "mips" ; then
-  echo "ARCH=mips" > config.mak
+  echo "ARCH=mips" >> config.mak
 else
   echo "Unsupported CPU"
   exit 1
diff --git a/cpu-i386.h b/cpu-i386.h
index fc68a91d920c09061d2fea6af0c2a2721f6b918a..550e18387f77b2c3c3d170f8b354349caea8806b 100644
--- a/cpu-i386.h
+++ b/cpu-i386.h
@@ -4,6 +4,7 @@
 #ifndef CPU_I386_H
 #define CPU_I386_H
 
+#include "config.h"
 #include <setjmp.h>
 
 #define R_EAX 0
@@ -174,6 +175,7 @@ typedef struct CPUX86State {
     int exception_index;
 } CPUX86State;
 
+/* all CPU memory access use these macros */
 static inline int ldub(void *ptr)
 {
     return *(uint8_t *)ptr;
@@ -184,6 +186,134 @@ static inline int ldsb(void *ptr)
     return *(int8_t *)ptr;
 }
 
+static inline void stb(void *ptr, int v)
+{
+    *(uint8_t *)ptr = v;
+}
+
+#ifdef WORDS_BIGENDIAN
+
+/* conservative code for little endian unaligned accesses */
+static inline int lduw(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8);
+#endif
+}
+
+static inline int ldsw(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int16_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int16_t)(p[0] | (p[1] << 8));
+#endif
+}
+
+static inline int ldl(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline uint64_t ldq(void *ptr)
+{
+    uint8_t *p = ptr;
+    uint32_t v1, v2;
+    v1 = ldl(p);
+    v2 = ldl(p + 4);
+    return v1 | ((uint64_t)v2 << 32);
+}
+
+static inline void stw(void *ptr, int v)
+{
+#ifdef __powerpc__
+    __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
+#else
+    uint8_t *p = ptr;
+    p[0] = v;
+    p[1] = v >> 8;
+#endif
+}
+
+static inline void stl(void *ptr, int v)
+{
+#ifdef __powerpc__
+    __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
+#else
+    uint8_t *p = ptr;
+    p[0] = v;
+    p[1] = v >> 8;
+    p[2] = v >> 16;
+    p[3] = v >> 24;
+#endif
+}
+
+static inline void stq(void *ptr, uint64_t v)
+{
+    uint8_t *p = ptr;
+    stl(p, (uint32_t)v);
+    stl(p + 4, v >> 32);
+}
+
+/* float access */
+
+static inline float ldfl(void *ptr)
+{
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.i = ldl(ptr);
+    return u.f;
+}
+
+static inline double ldfq(void *ptr)
+{
+    union {
+        double d;
+        uint64_t i;
+    } u;
+    u.i = ldq(ptr);
+    return u.d;
+}
+
+static inline void stfl(void *ptr, float v)
+{
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    stl(ptr, u.i);
+}
+
+static inline void stfq(void *ptr, double v)
+{
+    union {
+        double d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    stq(ptr, u.i);
+}
+
+#else
+
 static inline int lduw(void *ptr)
 {
     return *(uint16_t *)ptr;
@@ -204,11 +334,6 @@ static inline uint64_t ldq(void *ptr)
     return *(uint64_t *)ptr;
 }
 
-static inline void stb(void *ptr, int v)
-{
-    *(uint8_t *)ptr = v;
-}
-
 static inline void stw(void *ptr, int v)
 {
     *(uint16_t *)ptr = v;
@@ -245,6 +370,7 @@ static inline void stfq(void *ptr, double v)
 {
     *(double *)ptr = v;
 }
+#endif
 
 #ifndef IN_OP_I386
 void cpu_x86_outb(int addr, int val);
diff --git a/dis-asm.h b/dis-asm.h
index bd7e47844c913bec631a6681a86742a52eada414..20ca8e26a34c12586087b40c9e1afb954e96f65d 100644
--- a/dis-asm.h
+++ b/dis-asm.h
@@ -10,6 +10,7 @@
 #define DIS_ASM_H
 
 #include <stdio.h>
+#include <string.h>
 #include "bfd.h"
 
 typedef int (*fprintf_ftype) PARAMS((FILE*, const char*, ...));
diff --git a/dyngen.c b/dyngen.c
index 9b2889b67aaed76d6f8d28c9993f26c7d454b9bb..ed6861063995bd9f79e2299d76e18476ec8d7b8a 100644
--- a/dyngen.c
+++ b/dyngen.c
@@ -19,6 +19,7 @@
  */
 #include <stdlib.h>
 #include <stdio.h>
+#include <string.h>
 #include <stdarg.h>
 #include <inttypes.h>
 #include <elf.h>
@@ -228,14 +229,10 @@ void gen_code(const char *name, unsigned long offset, unsigned long size,
         {
             uint8_t *p;
             p = (void *)(p_end - 4);
-            /* find ret */
-            while (p > p_start && get32((uint32_t *)p) != 0x4e800020)
-                p -= 4;
-            /* skip double ret */
-            if (p > p_start && get32((uint32_t *)(p - 4)) == 0x4e800020)
-                p -= 4;
             if (p == p_start)
                 error("empty code for %s", name);
+            if (get32((uint32_t *)p) != 0x4e800020)
+                error("blr expected at the end of %s", name);
             copy_size = p - p_start;
         }
         break;
@@ -361,6 +358,51 @@ void gen_code(const char *name, unsigned long offset, unsigned long size,
                 }
             }
             break;
+        case EM_PPC:
+            {
+                Elf32_Rela *rel;
+                char name[256];
+                int type;
+                long addend;
+                for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                        sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name;
+                        if (strstart(sym_name, "__op_param", &p)) {
+                            snprintf(name, sizeof(name), "param%s", p);
+                        } else {
+                            snprintf(name, sizeof(name), "(long)(&%s)", sym_name);
+                        }
+                        type = ELF32_R_TYPE(rel->r_info);
+                        addend = rel->r_addend;
+                        switch(type) {
+                        case R_PPC_ADDR32:
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %ld) = %s + %ld;\n", 
+                                    rel->r_offset - offset, name, addend);
+                            break;
+                        case R_PPC_ADDR16_LO:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %ld) = (%s + %ld);\n", 
+                                    rel->r_offset - offset, name, addend);
+                            break;
+                        case R_PPC_ADDR16_HI:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %ld) = (%s + %ld) >> 16;\n", 
+                                    rel->r_offset - offset, name, addend);
+                            break;
+                        case R_PPC_ADDR16_HA:
+                            fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %ld) = (%s + %ld + 0x8000) >> 16;\n", 
+                                    rel->r_offset - offset, name, addend);
+                            break;
+                        case R_PPC_REL24:
+                            /* warning: must be at 32 MB distancy */
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %ld) = (*(uint32_t *)(gen_code_ptr + %ld) & ~0x03fffffc) | ((%s - (long)(gen_code_ptr + %ld) + %ld) & 0x03fffffc);\n", 
+                                    rel->r_offset - offset, rel->r_offset - offset, name, rel->r_offset - offset, addend);
+                            break;
+                        default:
+                            error("unsupported powerpc relocation (%d)", type);
+                        }
+                    }
+                }
+            }
+            break;
         default:
             error("unsupported CPU for relocations (%d)", e_machine);
         }
@@ -569,6 +611,9 @@ fprintf(outfile,
     case EM_386:
         fprintf(outfile, "*gen_code_ptr++ = 0xc3; /* ret */\n");
         break;
+    case EM_PPC:
+        fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x4e800020; /* blr */\n");
+        break;
     default:
         error("no return generation for cpu '%s'", cpu_name);
     }
diff --git a/exec-i386.c b/exec-i386.c
index 538ebe0d6361db300da39c373fd6f40262aaa339..0dbaccc8300c28772e9ef609cb3452c218f7db6d 100644
--- a/exec-i386.c
+++ b/exec-i386.c
@@ -171,6 +171,30 @@ int cpu_x86_exec(CPUX86State *env1)
 {
     int saved_T0, saved_T1, saved_A0;
     CPUX86State *saved_env;
+#ifdef reg_EAX
+    int saved_EAX;
+#endif
+#ifdef reg_ECX
+    int saved_ECX;
+#endif
+#ifdef reg_EDX
+    int saved_EDX;
+#endif
+#ifdef reg_EBX
+    int saved_EBX;
+#endif
+#ifdef reg_ESP
+    int saved_ESP;
+#endif
+#ifdef reg_EBP
+    int saved_EBP;
+#endif
+#ifdef reg_ESI
+    int saved_ESI;
+#endif
+#ifdef reg_EDI
+    int saved_EDI;
+#endif
     int code_gen_size, ret;
     void (*gen_func)(void);
     TranslationBlock *tb;
@@ -183,6 +207,38 @@ int cpu_x86_exec(CPUX86State *env1)
     saved_A0 = A0;
     saved_env = env;
     env = env1;
+#ifdef reg_EAX
+    saved_EAX = EAX;
+    EAX = env->regs[R_EAX];
+#endif
+#ifdef reg_ECX
+    saved_ECX = ECX;
+    ECX = env->regs[R_ECX];
+#endif
+#ifdef reg_EDX
+    saved_EDX = EDX;
+    EDX = env->regs[R_EDX];
+#endif
+#ifdef reg_EBX
+    saved_EBX = EBX;
+    EBX = env->regs[R_EBX];
+#endif
+#ifdef reg_ESP
+    saved_ESP = ESP;
+    ESP = env->regs[R_ESP];
+#endif
+#ifdef reg_EBP
+    saved_EBP = EBP;
+    EBP = env->regs[R_EBP];
+#endif
+#ifdef reg_ESI
+    saved_ESI = ESI;
+    ESI = env->regs[R_ESI];
+#endif
+#ifdef reg_EDI
+    saved_EDI = EDI;
+    EDI = env->regs[R_EDI];
+#endif
     
     /* prepare setjmp context for exception handling */
     if (setjmp(env->jmp_env) == 0) {
@@ -217,6 +273,30 @@ int cpu_x86_exec(CPUX86State *env1)
     ret = env->exception_index;
 
     /* restore global registers */
+#ifdef reg_EAX
+    EAX = saved_EAX;
+#endif
+#ifdef reg_ECX
+    ECX = saved_ECX;
+#endif
+#ifdef reg_EDX
+    EDX = saved_EDX;
+#endif
+#ifdef reg_EBX
+    EBX = saved_EBX;
+#endif
+#ifdef reg_ESP
+    ESP = saved_ESP;
+#endif
+#ifdef reg_EBP
+    EBP = saved_EBP;
+#endif
+#ifdef reg_ESI
+    ESI = saved_ESI;
+#endif
+#ifdef reg_EDI
+    EDI = saved_EDI;
+#endif
     T0 = saved_T0;
     T1 = saved_T1;
     A0 = saved_A0;
diff --git a/exec-i386.h b/exec-i386.h
index 0e0cae2756ce72cd5d019d9e045beb249eacdd7a..0384d0bf22c5a16760282c08b58dd67c6decc700 100644
--- a/exec-i386.h
+++ b/exec-i386.h
@@ -36,10 +36,27 @@ register unsigned int A0 asm("edi");
 register struct CPUX86State *env asm("ebp");
 #endif
 #ifdef __powerpc__
+register unsigned int EAX asm("r16");
+register unsigned int ECX asm("r17");
+register unsigned int EDX asm("r18");
+register unsigned int EBX asm("r19");
+register unsigned int ESP asm("r20");
+register unsigned int EBP asm("r21");
+register unsigned int ESI asm("r22");
+register unsigned int EDI asm("r23");
 register unsigned int T0 asm("r24");
 register unsigned int T1 asm("r25");
 register unsigned int A0 asm("r26");
 register struct CPUX86State *env asm("r27");
+#define USE_INT_TO_FLOAT_HELPERS
+#define reg_EAX
+#define reg_ECX
+#define reg_EDX
+#define reg_EBX
+#define reg_ESP
+#define reg_EBP
+#define reg_ESI
+#define reg_EDI
 #endif
 #ifdef __arm__
 register unsigned int T0 asm("r4");
@@ -70,14 +87,30 @@ register struct CPUX86State *env asm("l3");
 #define xglue(x, y) x ## y
 #define glue(x, y) xglue(x, y)
 
+#ifndef reg_EAX
 #define EAX (env->regs[R_EAX])
+#endif
+#ifndef reg_ECX
 #define ECX (env->regs[R_ECX])
+#endif
+#ifndef reg_EDX
 #define EDX (env->regs[R_EDX])
+#endif
+#ifndef reg_EBX
 #define EBX (env->regs[R_EBX])
+#endif
+#ifndef reg_ESP
 #define ESP (env->regs[R_ESP])
+#endif
+#ifndef reg_EBP
 #define EBP (env->regs[R_EBP])
+#endif
+#ifndef reg_ESI
 #define ESI (env->regs[R_ESI])
+#endif
+#ifndef reg_EDI
 #define EDI (env->regs[R_EDI])
+#endif
 #define PC  (env->pc)
 #define DF  (env->df)
 
diff --git a/linux-user/main.c b/linux-user/main.c
index b59c85d9c5cb7a47be7ee5025c560cd25edee3f2..45e81b207c0e63b499ebcf9752430c2100a6fb9c 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
+#include <string.h>
 #include <errno.h>
 #include <unistd.h>
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index e6f04a8408d4bc6bc94eb0e1c42d5a8deb828390..c0bee47f766ae9bbc3a0b25264e3a4973639ca5c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
+#include <string.h>
 #include <elf.h>
 #include <endian.h>
 #include <errno.h>
@@ -42,6 +43,9 @@
 #define termios host_termios
 #define winsize host_winsize
 #define termio host_termio
+#define sgttyb host_sgttyb /* same as target */
+#define tchars host_tchars /* same as target */
+#define ltchars host_ltchars /* same as target */
 
 #include <linux/termios.h>
 #include <linux/unistd.h>
@@ -904,7 +908,8 @@ long do_syscall(void *cpu_env, int num, long arg1, long arg2, long arg3,
     case TARGET_NR_ftime:
         goto unimplemented;
     case TARGET_NR_sync:
-        ret = get_errno(sync());
+        sync();
+        ret = 0;
         break;
     case TARGET_NR_kill:
         ret = get_errno(kill(arg1, arg2));
diff --git a/op-i386.c b/op-i386.c
index 002ce96928dc445d12e5629f94e6843c4cb2b3e0..70b1d7449004d1bf6895fdef213753232d6f9c61 100644
--- a/op-i386.c
+++ b/op-i386.c
@@ -123,6 +123,32 @@ static inline int lshift(int x, int n)
 /* NOTE: not static to force relocation generation by GCC */
 void raise_exception(int exception_index)
 {
+    /* NOTE: the register at this point must be saved by hand because
+       longjmp restore them */
+#ifdef reg_EAX
+    env->regs[R_EAX] = EAX;
+#endif
+#ifdef reg_ECX
+    env->regs[R_ECX] = ECX;
+#endif
+#ifdef reg_EDX
+    env->regs[R_EDX] = EDX;
+#endif
+#ifdef reg_EBX
+    env->regs[R_EBX] = EBX;
+#endif
+#ifdef reg_ESP
+    env->regs[R_ESP] = ESP;
+#endif
+#ifdef reg_EBP
+    env->regs[R_EBP] = EBP;
+#endif
+#ifdef reg_ESI
+    env->regs[R_ESI] = ESI;
+#endif
+#ifdef reg_EDI
+    env->regs[R_EDI] = EDI;
+#endif
     env->exception_index = exception_index;
     longjmp(env->jmp_env, 1);
 }
@@ -1341,6 +1367,41 @@ void OPPROTO op_fldl_FT0_A0(void)
     FT0 = ldfq((void *)A0);
 }
 
+/* helpers are needed to avoid static constant reference. XXX: find a better way */
+#ifdef USE_INT_TO_FLOAT_HELPERS
+
+void helper_fild_FT0_A0(void)
+{
+    FT0 = (CPU86_LDouble)ldsw((void *)A0);
+}
+
+void helper_fildl_FT0_A0(void)
+{
+    FT0 = (CPU86_LDouble)((int32_t)ldl((void *)A0));
+}
+
+void helper_fildll_FT0_A0(void)
+{
+    FT0 = (CPU86_LDouble)((int64_t)ldq((void *)A0));
+}
+
+void OPPROTO op_fild_FT0_A0(void)
+{
+    helper_fild_FT0_A0();
+}
+
+void OPPROTO op_fildl_FT0_A0(void)
+{
+    helper_fildl_FT0_A0();
+}
+
+void OPPROTO op_fildll_FT0_A0(void)
+{
+    helper_fildll_FT0_A0();
+}
+
+#else
+
 void OPPROTO op_fild_FT0_A0(void)
 {
     FT0 = (CPU86_LDouble)ldsw((void *)A0);
@@ -1355,6 +1416,7 @@ void OPPROTO op_fildll_FT0_A0(void)
 {
     FT0 = (CPU86_LDouble)((int64_t)ldq((void *)A0));
 }
+#endif
 
 /* fp load ST0 */
 
@@ -1393,6 +1455,41 @@ void OPPROTO op_fldt_ST0_A0(void)
 }
 #endif
 
+/* helpers are needed to avoid static constant reference. XXX: find a better way */
+#ifdef USE_INT_TO_FLOAT_HELPERS
+
+void helper_fild_ST0_A0(void)
+{
+    ST0 = (CPU86_LDouble)ldsw((void *)A0);
+}
+
+void helper_fildl_ST0_A0(void)
+{
+    ST0 = (CPU86_LDouble)((int32_t)ldl((void *)A0));
+}
+
+void helper_fildll_ST0_A0(void)
+{
+    ST0 = (CPU86_LDouble)((int64_t)ldq((void *)A0));
+}
+
+void OPPROTO op_fild_ST0_A0(void)
+{
+    helper_fild_ST0_A0();
+}
+
+void OPPROTO op_fildl_ST0_A0(void)
+{
+    helper_fildl_ST0_A0();
+}
+
+void OPPROTO op_fildll_ST0_A0(void)
+{
+    helper_fildll_ST0_A0();
+}
+
+#else
+
 void OPPROTO op_fild_ST0_A0(void)
 {
     ST0 = (CPU86_LDouble)ldsw((void *)A0);
@@ -1408,6 +1505,8 @@ void OPPROTO op_fildll_ST0_A0(void)
     ST0 = (CPU86_LDouble)((int64_t)ldq((void *)A0));
 }
 
+#endif
+
 /* fp store */
 
 void OPPROTO op_fsts_ST0_A0(void)
diff --git a/ops_template.h b/ops_template.h
index f8cd5e54ca57586e0fb98d48c62d2ba13b3ce0fb..bc96f651a29e50578a8688993b4e2af654fec930 100644
--- a/ops_template.h
+++ b/ops_template.h
@@ -809,6 +809,7 @@ void OPPROTO glue(op_rep_movs, SUFFIX)(void)
         EDI += inc;
         ECX--;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_stos, SUFFIX)(void)
@@ -826,6 +827,7 @@ void OPPROTO glue(op_rep_stos, SUFFIX)(void)
         EDI += inc;
         ECX--;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_lods, SUFFIX)(void)
@@ -859,6 +861,7 @@ void OPPROTO glue(op_rep_lods, SUFFIX)(void)
         ESI += inc;
         ECX--;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_scas, SUFFIX)(void)
@@ -890,6 +893,7 @@ void OPPROTO glue(op_repz_scas, SUFFIX)(void)
         CC_DST = v1 - v2;
         CC_OP = CC_OP_SUBB + SHIFT;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_repnz_scas, SUFFIX)(void)
@@ -911,6 +915,7 @@ void OPPROTO glue(op_repnz_scas, SUFFIX)(void)
         CC_DST = v1 - v2;
         CC_OP = CC_OP_SUBB + SHIFT;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_cmps, SUFFIX)(void)
@@ -942,6 +947,7 @@ void OPPROTO glue(op_repz_cmps, SUFFIX)(void)
         CC_DST = v1 - v2;
         CC_OP = CC_OP_SUBB + SHIFT;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
@@ -962,6 +968,7 @@ void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
         CC_DST = v1 - v2;
         CC_OP = CC_OP_SUBB + SHIFT;
     }
+    FORCE_RET();
 }
 
 /* port I/O */
@@ -986,6 +993,7 @@ void OPPROTO glue(op_rep_outs, SUFFIX)(void)
         ESI += inc;
         ECX--;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(op_ins, SUFFIX)(void)
@@ -1008,6 +1016,7 @@ void OPPROTO glue(op_rep_ins, SUFFIX)(void)
         EDI += (DF << SHIFT);
         ECX--;
     }
+    FORCE_RET();
 }
 
 void OPPROTO glue(glue(op_out, SUFFIX), _T0_T1)(void)
diff --git a/tests/Makefile b/tests/Makefile
index c7d1154c00a6d2a3cb44ec565b13ba3081dd40c6..72b559dfd55c4f8b1a47692fff668d9bd7a6d715 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -4,8 +4,9 @@ CFLAGS=-Wall -O2 -g
 LDFLAGS=
 
 ifeq ($(ARCH),i386)
-TESTS=hello test2 sha1 test-i386
+TESTS=test2 sha1-i386 test-i386
 endif
+TESTS+=sha1
 
 GEMU=../gemu
 
@@ -13,26 +14,32 @@ all: $(TESTS)
 
 hello: hello.c
 	$(CC) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $<
+	strip hello
 
 test2: test2.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
 
-# i386 emulation test (dump various opcodes) */
+# i386 emulation test (test various opcodes) */
 test-i386: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
 	$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ $< -lm
 
 test: test-i386
+ifeq ($(ARCH),i386)
 	./test-i386 > test-i386.ref
+endif
 	$(GEMU) test-i386 > test-i386.out
 	@if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK"; fi
 
 # speed test
-sha1: sha1.c
+sha1-i386: sha1.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
 
-speed: sha1
+sha1: sha1.c
+	$(HOST_CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+speed: sha1 sha1-i386
 	time ./sha1
-	time $(GEMU) sha1
+	time $(GEMU) ./sha1-i386
 
 clean:
 	rm -f *~ *.o $(TESTS)
diff --git a/tests/test-i386.c b/tests/test-i386.c
index 86aa94915a393b50831e4fcb4cb8f49eeb184eaa..f9a5991863571eea0291575fc829f7917ba5b3b5 100644
--- a/tests/test-i386.c
+++ b/tests/test-i386.c
@@ -653,7 +653,7 @@ void test_segs(void)
 {
     struct modify_ldt_ldt_s ldt;
     long long ldt_table[3];
-    int i, res, res2;
+    int res, res2;
     char tmp;
 
     ldt.entry_number = 1;
@@ -679,9 +679,13 @@ void test_segs(void)
     modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
 
     modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */
-    for(i=0;i<3;i++)
-        printf("%d: %016Lx\n", i, ldt_table[i]);
-
+#if 0
+    {
+        int i;
+        for(i=0;i<3;i++)
+            printf("%d: %016Lx\n", i, ldt_table[i]);
+    }
+#endif
     /* do some tests with fs or gs */
     asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
     asm volatile ("movl %0, %%gs" : : "r" (MK_SEL(2)));
diff --git a/translate-i386.c b/translate-i386.c
index f02c5ea67704d96f5414da0965aa3fe5d430c2d0..4c052a4e47ed4b402fd985713fa431d72d354ebd 100644
--- a/translate-i386.c
+++ b/translate-i386.c
@@ -38,10 +38,40 @@
 #define offsetof(type, field) ((size_t) &((type *)0)->field)
 #endif
 
+/* XXX: move that elsewhere */
 static uint16_t *gen_opc_ptr;
 static uint32_t *gen_opparam_ptr;
 int __op_param1, __op_param2, __op_param3;
 
+#ifdef __i386__
+static inline void flush_icache_range(unsigned long start, unsigned long stop)
+{
+}
+#endif
+
+#ifdef __powerpc__
+
+#define MIN_CACHE_LINE_SIZE 8 /* conservative value */
+
+static void inline flush_icache_range(unsigned long start, unsigned long stop)
+{
+    unsigned long p;
+
+    p = start & ~(MIN_CACHE_LINE_SIZE - 1);
+    stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1);
+    
+    for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
+        asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+    }
+    asm ("sync");
+    for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
+        asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
+    }
+    asm ("sync");
+    asm ("isync");
+}
+#endif
+
 extern FILE *logfile;
 extern int loglevel;
 
@@ -3179,6 +3209,7 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
     DisasContext dc1, *dc = &dc1;
     uint8_t *pc_ptr;
     uint16_t *gen_opc_end;
+    int gen_code_size;
     long ret;
 #ifdef DEBUG_DISAS
     struct disassemble_info disasm_info;
@@ -3264,7 +3295,9 @@ int cpu_x86_gen_code(uint8_t *gen_code_buf, int max_code_size,
 #endif
 
     /* generate machine code */
-    *gen_code_size_ptr = dyngen_code(gen_code_buf, gen_opc_buf, gen_opparam_buf);
+    gen_code_size = dyngen_code(gen_code_buf, gen_opc_buf, gen_opparam_buf);
+    flush_icache_range((unsigned long)gen_code_buf, (unsigned long)(gen_code_buf + gen_code_size));
+    *gen_code_size_ptr = gen_code_size;
 
 #ifdef DEBUG_DISAS
     if (loglevel) {