Check-in rth's support for 64-bit arithmetic, with a few mods to make it build

on Win32.
2024-11-28 15:23:51 +00:00 · 1998-12-23 20:11:59 +00:00 · 1998-12-23 20:11:59 +00:00 · e3d43ac36b
commit e3d43ac36b
parent 5c48535a90
2 changed files with 249 additions and 327 deletions
--- a/ef/Compiler/CodeGenerator/md/x86/x86Win32Emitter.cpp
+++ b/ef/Compiler/CodeGenerator/md/x86/x86Win32Emitter.cpp
@ -695,15 +695,15 @@ emit_Logic_L(Primitive& inPrimitive, x86DoubleOpDirCode insnType)
 }

 #if defined(WIN32)
-extern void x86Mul64Bit();
-extern void x86Div64Bit();
-extern void x86Mod64Bit();
-extern void x86Shl64Bit();
-extern void x86Shr64Bit();
-extern void x86Sar64Bit();
-extern void x86ThreeWayCMP_L();
-extern void x86ThreeWayCMPC_L();
-extern void x86Extract64Bit();
+extern int64 __stdcall x86Mul64Bit(int64 a, int64 b);
+extern int64 __stdcall x86Div64Bit(int64 a, int64 b);
+extern int64 __stdcall x86Mod64Bit(int64 a, int64 b);
+extern int64 __stdcall x86Shl64Bit(int64 a, int b);
+extern uint64 __stdcall x86Shr64Bit(uint64 a, int b);
+extern int64 __stdcall x86Sar64Bit(int64 a, int b);
+extern int64 __stdcall x86ThreeWayCMP_L(int64 a, int64 b);
+extern int64 __stdcall x86ThreeWayCMPC_L(int64 a, int64 b);
+extern int64 __stdcall x86Extract64Bit(int64 a, int b);
 #elif defined(LINUX) || defined(FREEBSD)
 extern "C" {
 extern void x86Mul64Bit(void);
@ -733,55 +733,55 @@ static void x86Extract64Bit()	{trespass("Not implemented");}
 void x86Win32Emitter::
 emit_Mul_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Mul64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Mul64Bit);
 }

 void x86Win32Emitter::
 emit_Div_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Div64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Div64Bit);
 }

 void x86Win32Emitter::
 emit_Mod_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Mod64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Mod64Bit);
 }

 void x86Win32Emitter::
 emit_3wayCmpL_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86ThreeWayCMP_L, &(inPrimitive.nthInputVariable(0)));
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86ThreeWayCMP_L, &(inPrimitive.nthInputVariable(0)));
 }

 void x86Win32Emitter::
 emit_3wayCmpCL_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86ThreeWayCMP_L, &(inPrimitive.nthInputVariable(0)));
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86ThreeWayCMP_L, &(inPrimitive.nthInputVariable(0)));
 }

 void x86Win32Emitter::
 emit_Shl_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Shl64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Shl64Bit);
 }

 void x86Win32Emitter::
 emit_Shr_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Shr64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Shr64Bit);
 }
 
 void x86Win32Emitter::
 emit_Sar_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Sar64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Sar64Bit);
 }

 void x86Win32Emitter::
 emit_Ext_L(Primitive& inPrimitive)
 {
-	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, &x86Extract64Bit);
+	new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)())&x86Extract64Bit);
 }

 void x86Win32Emitter::			
--- a/ef/Compiler/CodeGenerator/md/x86/x86Win32_Support.cpp
+++ b/ef/Compiler/CodeGenerator/md/x86/x86Win32_Support.cpp
@ -16,10 +16,10 @@
 * Reserved.
 */
 //
-// File:	x86Win32_support.cpp
+// File:    x86Win32_support.cpp
 //
-// Authors:	Peter DeSantis
-//			Simon Holmes a Court
+// Authors: Peter DeSantis
+//          Simon Holmes a Court
 //

 #include "NativeCodeCache.h"
@ -28,160 +28,173 @@
 #include "MemoryAccess.h"


-void* JNIenv = 0; 
+void* JNIenv = 0;

 extern ClassWorld world;

-#define Naked   __declspec( naked )

-/*
-								+-------------------------------+
-								|		return address			|
-						========+===============================+========
-								|		EBP link				|	
-								+-------------------------------+
-								|		Saved Non-volatiles		|
-								|		eg.	EDI					|
-								|			ESI					|
-								|			EBX					|
-								+-------------------------------+
+#ifdef __GNUC__
+
+/* 
+  Following code written by rth@cygnus.org.  Comment from fur@netscape.com.
+
+  I suspect that your code will work OK if there are no exceptions while a method is being
+  compiled, but you'll probably crash if the compilation terminates with an exception.  That's
+  because the Java exception code relies on the presence of a "guard frame" when calling from
+  JIT'ed to native code so as to restore the callee-saved registers when unwinding the stack. 
+  See x86ExceptionHandler.cpp for some details.  This code is "temporary".  What we
+  eventually hoped to do was to use a different calling convention (in terms of which registers
+  are volatile) for calls that make an exceptional return versus a normal return, so that the stack
+  unwinding code did not have to restore any registers.  Until we can educate the register
+  allocator about these constraints, we'll need to retain this hacky guard frame and the various
+  bits of code that are used to create it.  (See also x86Win32InvokeNative.cpp).
+
+  Some of the exception debugging code makes assumptions about the location of the call
+  instruction inside the static compile stub - assumptions that are broken by the new code.  (And
+  the exception debugging code should probably be changed to get rid of that dependency.)
 */

-// Fucntion:	staticCompileStub
-//
-// WARNING: if you change this method, you must change compileStubReEntryPoint below.
-// It must point to the instruction after the invokation of compileAndBackPatchMethod
-static Naked void staticCompileStub()
+/* Go through one level of extra indirection to isolate ourselves from name
+   mangling and cdecl vs stdcall changes.  */
+
+static void* compileStub_1(const CacheEntry&, void *) __attribute__((regparm(2), unused));
+static void* compileStub_1(const CacheEntry&, void *) __asm__("compileStub_1");
+
+static void*
+compileStub_1(const CacheEntry& inCacheEntry, void *retAddr)
 {
-	_asm 
-	{	
-		// remove cache entry from the stack
-		pop		eax						
-
-		// make frame
-		push	ebp
-        mov		ebp,esp
-		
-		// save all volatiles (especially for exception handler)
-		push    edi						
-        push    esi
-        push    ebx
-
-		// call compileAndBackPatchMethod with args
-		// third argument is not used
-		push	[esp + 16]				// second argument -- return address
-		push    eax						// first argument -- cacheEntry
-		call	compileAndBackPatchMethod
-
-		// remove  args
-		pop		edx						// <--- compileStubReEntryPoint
-		pop		edx
-
-		pop		ebx						// Restore volatiles
-		pop		esi
-		pop		edi
-
-		// remove frame
-		mov     esp,ebp
-		pop     ebp
-
-		// jump to the compiled method
-		push	eax						// ret will jump to this address
-		ret								// Jump to function leaving the return address at the top of the stack	
-	}	
+  return compileAndBackPatchMethod(inCacheEntry, retAddr);
 }

+extern void compileStub() __asm__("compileStub");
+
+asm("\n\
+compileStub:\n\
+movl 0(%esp), %edx\n\
+call compileStub_1\n\
+jmpl *%eax");
+
+#else /* !__GNUC__ */
+
+static void __declspec( naked )
+compileStub()
+{
+  _asm
+    {
+      // eax contains the cache entry.
+
+      // make frame
+      push    ebp
+      mov     ebp,esp
+
+      // save all volatiles (especially for exception handler)
+      // ??? Um, these are _non_ volatile, ie callee saved.
+      // We shouldn't have to do anything with them.
+      push    edi
+      push    esi
+      push    ebx
+
+      // call compileAndBackPatchMethod with args
+      // third argument is not used
+      push    [esp + 16]              // second argument -- return address
+      push    eax                     // first argument -- cacheEntry
+      call    compileAndBackPatchMethod
+
+      // remove  args
+      pop     edx                     // <--- compileStubReEntryPoint
+      pop     edx
+
+      pop     ebx                     // Restore volatiles
+      pop     esi
+      pop     edi
+
+      // remove frame
+      mov     esp,ebp
+      pop     ebp
+
+      // jump to the compiled method
+      jmp     eax
+    }
+}
+#endif /* __GNUC__ */
+
 #ifdef DEBUG
 // Pointer to the instruction after the call (used by exception handler to check
 // I wanted to use:
 //		void* compileStubReEntryPoint =  (void*) ((Uint8*)staticCompileStub + 17);
 // but MSDev appears to have a bug, in that compileStubReEntryPoint will be set == (void*)staticCompileStub
 // which is clearly wrong.
-void* compileStubAddress = (void*)staticCompileStub;
-void* compileStubReEntryPoint = (Uint8*)compileStubAddress + 17;
+void* compileStubAddress = (void*)compileStub;
+// void* compileStubReEntryPoint = (Uint8*)compileStubAddress + 15; // Correct address ?
+void* compileStubReEntryPoint = NULL;
 #endif // DEBUG

-static Naked void compileStub()
-{
-	_asm {
-		push	0xEFBEADDE				// This is a dummy immediate that will be filled in by	
-		jmp		staticCompileStub		// generateCompileStub with the cacheEntry.
-	}	
-}
-
 void *
 generateNativeStub(NativeCodeCache& inCache, const CacheEntry& inCacheEntry, void *nativeFunction)
 {
-	Method* method = inCacheEntry.descriptor.method;
-	//Uint32 nWords = method->getSignature().nArguments;
-	Uint32 nWords = method->getArgsSize()/sizeof(Int32);
+  Method* method = inCacheEntry.descriptor.method;
+  //Uint32 nWords = method->getSignature().nArguments;
+  Uint32 nWords = method->getArgsSize()/sizeof(Int32);

-	assert(method->getModifiers() & CR_METHOD_NATIVE);
-	assert(nWords <= 256);
+  assert(method->getModifiers() & CR_METHOD_NATIVE);
+  assert(nWords <= 256);

-	extern void *sysInvokeNativeStubs[];
-    Uint8 stubSize = 10;
-    void* stub;
-		
-	// Write out the native stub
-	stub = inCache.acquireMemory(stubSize);
-	Uint8* where = (Uint8*)stub;
-	*where++ = 0x68; // pushl
-	writeLittleWordUnaligned(where, (uint32)(nativeFunction));
-	where += 4;
-	*where++ = 0xe9; // jmp
-	writeLittleWordUnaligned(where, (Uint8 *) sysInvokeNativeStubs[nWords] - (where + 4));
+  extern void *sysInvokeNativeStubs[];
+  Uint8 stubSize = 10;
+  void* stub;

-	// Return the address of the stub.
-	return ((void*)stub);
+  // Write out the native stub
+  stub = inCache.acquireMemory(stubSize);
+  Uint8* where = (Uint8*)stub;
+  *where++ = 0x68; // pushl
+  writeLittleWordUnaligned(where, (uint32)(nativeFunction));
+  where += 4;
+  *where++ = 0xe9; // jmp
+  writeLittleWordUnaligned(where, (Uint8 *) sysInvokeNativeStubs[nWords] - (where + 4));
+
+  // Return the address of the stub.
+  return ((void*)stub);
 }

-void* 
+void*
 generateCompileStub(NativeCodeCache& inCache, const CacheEntry& inCacheEntry)
-{	
-	void*	stub;
-	uint8 stubSize;	
-	uint8 argumentOffset;
-	uint32 locationOfCompileStub;
-	
-	stubSize = 10;
-		
-	// Write out the dynamic compile stub
-	stub = inCache.acquireMemory(stubSize);
-	argumentOffset = 1;
-	locationOfCompileStub = (uint32)compileStub ;
+{
+  void* stub;
+  uint8  *where;

-	// Copy the stub into the allocated memory
-	memcpy(stub, (void*)locationOfCompileStub, stubSize);
+  // Write out the dynamic compile stub

-	// Write your cacheEntry into the proper spot in the stub
-	uint8* loadCacheEntryInstruction = (uint8*)stub + argumentOffset;
-	writeLittleWordUnaligned((void*)loadCacheEntryInstruction, (uint32)(&inCacheEntry));	
+  stub = inCache.acquireMemory(10);
+  where = (uint8 *)stub;

-	// Fix the new dynamic stub to jump to the static stub
-	uint32* relativeCallLocation = (uint32*)(loadCacheEntryInstruction + 5);
-	uint32 newRelativeDisplacement = locationOfCompileStub -  (uint32)stub + *(uint32*)relativeCallLocation;
-	writeLittleWordUnaligned((void*)relativeCallLocation, newRelativeDisplacement);
+  // movl $inCacheEntry, %eax
+  *where++ = 0xb8;
+  writeLittleWordUnaligned(where, (uint32)&inCacheEntry);
+  where += 4;

-	// Return the address of the dynamic stub.
-	return ((void*)stub);
+  // jmp compileStub
+  *where++ = 0xe9;
+  writeLittleWordUnaligned(where, (Uint8 *) compileStub - (where + 4));
+
+  // Return the address of the dynamic stub.
+  return stub;
 }

 void*
 backPatchMethod(void* inMethodAddress, void* inLastPC, void* /*inUserDefined*/)
 {
-		
-	uint32 curAddress = (uint32) inLastPC;
-	uint32 methodAddress = (uint32) inMethodAddress;
-	
-	// Compute the relative branch
-	uint32*	relativeBranch = ((uint32*)inLastPC)-1;
-	int32 offset = methodAddress - curAddress;

-	// Backpatch the method.
-	writeLittleWordUnaligned((void*)relativeBranch, offset);
+  uint32 curAddress = (uint32) inLastPC;
+  uint32 methodAddress = (uint32) inMethodAddress;

-	return (inMethodAddress);
+  // Compute the relative branch
+  uint32*   relativeBranch = ((uint32*)inLastPC)-1;
+  int32 offset = methodAddress - curAddress;
+
+  // Backpatch the method.
+  writeLittleWordUnaligned((void*)relativeBranch, offset);
+
+  return (inMethodAddress);
 }

 // Warning silencing stuff
@ -194,242 +207,151 @@ backPatchMethod(void* inMethodAddress, void* inLastPC, void* /*inUserDefined*/)

 // x86Extract64Bit
 //
-// Purpose:	signed right-aligned field extraction
-// In:		64 bit source (on  stack)
-//			32 bit extraction size (on stack)
-// Out:		64 bit result
-// Note:	Only works in range 1 <= b <= 63, b is extraction amount
-Naked void x86Extract64Bit()
+// Purpose: signed right-aligned field extraction
+// In:      64 bit source (on  stack)
+//          32 bit extraction size (on stack)
+// Out:     64 bit result
+// Note:    Only works in range 1 <= b <= 63, b is extraction amount
+
+int64 __stdcall
+x86Extract64Bit(int64 src, int b)
 {
-	__asm
-	{
-		mov eax, [esp+4]		// load low byte of a
-
-		mov ecx, [esp+12]		// load shift amount
-		cmp ecx, 0x20
-		jg	greater32
-
-		// extract <= than 32 bits
-		// shift amount = 32 - extract
-		neg	ecx
-		add ecx, 0x20			// ecx = 32 - extract
-		shl eax, cl
-		sar eax, cl
-		cdq						// sign extend into EDX:EAX
-		ret 12
-		
-greater32:
-		// ext > 32 bits
-		// shift amount = 64 - extract
-		mov edx, [esp+8]		// load high byte of a
-		neg	ecx
-		add ecx, 0x40			// ecx = 64 - extract
-		shl edx, cl
-		sar edx, cl
-		ret 12
-	}
+  if (b <= 32)
+    {
+      b = 32 - b;
+      return (int)src << b >> b;
+    }
+  else
+    {
+      b = 64 - b;
+      return (int)(src >> 32) << b >> b;
+    }
 }

 // 3WayCompare
 //
-// Purpose:	compare two longs
-// In:		two longs on the stack
-// Out:		depends on condition flags:
-//				less = -1
-//				equal = 0
-//				greater = 1
-Naked void x86ThreeWayCMP_L()
+// Purpose: compare two longs
+// In:      two longs on the stack
+// Out:     depends on condition flags:
+//              less = -1
+//              equal = 0
+//              greater = 1
+
+int64 __stdcall
+x86ThreeWayCMP_L(int64 a, int64 b)
 {
-	// edx:eax is tos, ecx:ebx is nos
-	__asm
-	{
-		mov	ecx,[esp+8]
-		mov	edx,[esp+16]
-
-		cmp	ecx,edx
-		jl	lcmp_m1
-
-		jg	lcmp_1
-
-		mov	ecx,[esp+4]
-		mov	edx,[esp+12]
-
-		cmp	ecx,edx
-		ja	lcmp_1
-
-		mov	eax,0
-		jb	lcmp_m1
-
-		ret	16
-
-		align	4
-	lcmp_m1:
-		mov	eax,-1
-
-		ret	16
-
-		align	4
-	lcmp_1:
-		mov	eax,1
-
-		ret	16
-	}
+  return (a > b) - (a < b);
 }

 // 3WayCompare
 //
-// Purpose:	compare two longs
-// In:		two longs on the stack
-// Out:		depends on condition flags:
-//				less    =  1
-//				equal   =  0
-//				greater = -1
-Naked void x86ThreeWayCMPC_L()
+// Purpose: compare two longs
+// In:      two longs on the stack
+// Out:     depends on condition flags:
+//              less    =  1
+//              equal   =  0
+//              greater = -1
+
+int64 __stdcall
+x86ThreeWayCMPC_L(int64 a, int64 b)
 {
-	// edx:eax is tos, ecx:ebx is nos
-	__asm
-	{
-		mov	ecx,[esp+8]
-		mov	edx,[esp+16]
-
-		cmp	ecx,edx
-		jl	lcmp_m1
-
-		jg	lcmp_1
-
-		mov	ecx,[esp+4]
-		mov	edx,[esp+12]
-
-		cmp	ecx,edx
-		ja	lcmp_1
-
-		mov	eax,0
-		jb	lcmp_m1
-
-		ret	16
-
-		align	4
-	lcmp_m1:
-		mov	eax,1
-
-		ret	16
-
-		align	4
-	lcmp_1:
-		mov	eax,-1
-
-		ret	16
-	}
+  return (a < b) - (a > b);
 }

 // llmul
 //
 // Purpose: long multiply (same for signed/unsigned)
-// In:		args are passed on the stack:
-//				1st pushed: multiplier (QWORD)
-//				2nd pushed: multiplicand (QWORD)
-// Out:		EDX:EAX - product of multiplier and multiplicand
-// Note:	parameters are removed from the stack
-// Uses:	ECX
-Naked void x86Mul64Bit()
+// In:      args are passed on the stack:
+//              1st pushed: multiplier (QWORD)
+//              2nd pushed: multiplicand (QWORD)
+// Out:     EDX:EAX - product of multiplier and multiplicand
+// Note:    parameters are removed from the stack
+// Uses:    ECX
+
+int64 __stdcall
+x86Mul64Bit(int64 a, int64 b)
 {
-    // IMPLEMENT: Needs to be written
-	_asm 
-	{
-        int     3
-	}	
+  return a * b;
 }

 // lldiv
 //
 // Purpose: signed long divide
-// In:		args are passed on the stack:
-//				1st pushed: divisor (QWORD)
-//				2nd pushed: dividend (QWORD)
-// Out:		EDX:EAX contains the quotient (dividend/divisor)
-// Note:	parameters are removed from the stack
-// Uses:	ECX
-Naked void x86Div64Bit()
+// In:      args are passed on the stack:
+//              1st pushed: divisor (QWORD)
+//              2nd pushed: dividend (QWORD)
+// Out:     EDX:EAX contains the quotient (dividend/divisor)
+// Note:    parameters are removed from the stack
+// Uses:    ECX
+
+int64 __stdcall
+x86Div64Bit(int64 dividend, int64 divisor)
 {
-    // IMPLEMENT: Needs to be written
-	_asm 
-	{
-        int     3
-	}
+  return dividend / divisor;
 }

 // llrem
 //
 // Purpose: signed long remainder
-// In:		args are passed on the stack:
-//				1st pushed: divisor (QWORD)
-//				2nd pushed: dividend (QWORD)
-// Out:		EDX:EAX contains the quotient (dividend/divisor)
-// Note:	parameters are removed from the stack
-// Uses:	ECX
-Naked void x86Mod64Bit()
+// In:      args are passed on the stack:
+//              1st pushed: divisor (QWORD)
+//              2nd pushed: dividend (QWORD)
+// Out:     EDX:EAX contains the remainder (dividend/divisor)
+// Note:    parameters are removed from the stack
+// Uses:    ECX
+
+int64 __stdcall
+x86Mod64Bit(int64 dividend, int64 divisor)
 {
-    // IMPLEMENT: Needs to be written
-	_asm 
-	{
-        int     3
-	}
+  return dividend % divisor;
 }

 // llshl
 //
 // Purpose: long shift left
-// In:		args are passed on the stack: (FIX make fastcall) 
-//				1st pushed: amount (int)
-//				2nd pushed: source (long)
-// Out:		EDX:EAX contains the result
-// Note:	parameters are removed from the stack
-// Uses:	ECX, destroyed
-Naked void x86Shl64Bit()
+// In:      args are passed on the stack: (FIX make fastcall)
+//              1st pushed: amount (int)
+//              2nd pushed: source (long)
+// Out:     EDX:EAX contains the result
+// Note:    parameters are removed from the stack
+// Uses:    ECX, destroyed
+
+int64 __stdcall
+x86Shl64Bit(int64 src, int amount)
 {
-    // IMPLEMENT: Needs to be written
-	_asm 
-	{
-        int     3
-	}
+  return src << amount;
 }

 // llshr
 //
-// Origin:	MSDev. modified
 // Purpose: long shift right
-// In:		args are passed on the stack: (FIX make fastcall) 
-//				1st pushed: amount (int)
-//				2nd pushed: source (long)
-// Out:		EDX:EAX contains the result
-// Note:	parameters are removed from the stack
-// Uses:	ECX, destroyed
-Naked void x86Shr64Bit()
+// In:      args are passed on the stack: (FIX make fastcall)
+//              1st pushed: amount (int)
+//              2nd pushed: source (long)
+// Out:     EDX:EAX contains the result
+// Note:    parameters are removed from the stack
+// Uses:    ECX, destroyed
+
+uint64 __stdcall
+x86Shr64Bit(uint64 src, int amount)
 {
-    // IMPLEMENT: Needs to be written
-	_asm 
-	{
-        int     3
-	}
+  return src >> amount;
 }

 // llsar
 //
-// Origin:	MSDev. modified
 // Purpose: long shift right signed
-// In:		args are passed on the stack: (FIX make fastcall) 
-//				1st pushed: amount (int)
-//				2nd pushed: source (long)
-// Out:		EDX:EAX contains the result
-// Note:	parameters are removed from the stack
-// Uses:	ECX, destroyed
-Naked void x86Sar64Bit()
+// In:      args are passed on the stack: (FIX make fastcall)
+//              1st pushed: amount (int)
+//              2nd pushed: source (long)
+// Out:     EDX:EAX contains the result
+// Note:    parameters are removed from the stack
+// Uses:    ECX, destroyed
+
+int64 __stdcall
+x86Sar64Bit(int64 src, int amount)
 {
-    // IMPLEMENT: Needs to be written
-	_asm 
-	{
-        int     3
-	}
+  return src >> amount;
 }

 //================================================================================