dos/exe improvements, not enabled yet.

This commit is contained in:
Markus F.X.J. Oberhumer 2007-03-06 15:08:24 +01:00
parent d9722ad6b3
commit d29c8df45e
5 changed files with 695 additions and 692 deletions

View File

@ -103,9 +103,6 @@ void __pascal p4fshlv_v(unsigned char v, uint32_t __far *a) { *a <<= v; }
uint32_t __pascal p4nshlv(unsigned char v, uint32_t __near *a) { return *a <<= v; }
uint32_t __pascal p4fshlv(unsigned char v, uint32_t __far *a) { return *a <<= v; }
uint32_t __cdecl shlv_2(uint16_t h, uint16_t l, unsigned v)
{ uint16_t x = l >> (16 - v); l <<= v; h <<= v; h |= x; return h * 65536ul + l; }
hptrdiff_t __cdecl hptr2int(hptr a) { return (hptrdiff_t) a; }
hptr __cdecl int2hptr(hptrdiff_t a) { return (hptr) a; }

View File

@ -195,29 +195,42 @@ def main(argv):
if opts.call_rewrite and inst in ["call"]:
k, v = parse_label(inst, args)
if v[:2] == [1, 2]: # external 2-byte
if k == "__aNahdiff":
s = [
["push", "word ptr [bp+8]"],
["push", "word ptr [bp+6]"],
["push", r"word ptr \[bp([+-]\d+)\]$"],
["push", r"word ptr \[bp([+-]\d+)\]$"],
]
dpos = omatch(i-1, -4, s)
if dpos:
orewrite_inst(i, "*DEL*", "", dpos)
continue
if k in ["__LMUL", "__U4M",]:
s1 = [
["mov", "bx,0x300"],
["xor", "cx,cx"],
]
s2 = [
["shl", "ax,1"],
["rcl", "dx,1"],
]
dpos1 = omatch(i-1, -2, s1)
dpos2 = omatch(i+1, 2, s2)
if dpos1 and dpos2:
orewrite_inst(i, "M_U4M_dxax_0x0600", "", dpos1 + dpos2)
continue
s = [
["mov", "bx,word ptr [bx]"],
["xor", "cx,cx"],
]
dpos = omatch(i-1, -2, s, debug=0)
if 0 and dpos:
orewrite_inst(i, "M_LMUL_dxax_00bx_ptr", "", dpos)
orewrite_inst(i, "M_U4M_dxax_00bx_ptr", "", dpos)
continue
dpos = omatch(i-1, -1, s)
if dpos:
orewrite_inst(i, "M_LMUL_dxax_00bx", "", dpos)
continue
if k == "__aNahdiff":
s = [
["push", "word ptr [bp+8]"],
["push", "word ptr [bp+6]"],
["push", "word ptr [bp-66]"],
["push", "word ptr [bp-68]"],
]
dpos = omatch(i-1, -4, s)
if dpos:
orewrite_inst(i, "*DEL*", "", dpos)
orewrite_inst(i, "M_U4M_dxax_00bx", "", dpos)
continue
if k == "__PIA":
s = [
@ -257,6 +270,36 @@ def main(argv):
if dpos:
orewrite_inst(i, "M_shld_8", "", dpos)
continue
s1 = [
["mov", r"^c[lx],0x8$"],
["shl", "si,1"],
["rcl", "di,1"],
]
s2 = [
["les", r"^bx,dword ptr \[bp([+-]\d+)\]$"],
]
dpos1 = omatch(i-1, -3, s1)
dpos2 = omatch(i+1, 1, s2)
if 1 and dpos1 and dpos2:
# bx and cx are free for use
orewrite_inst(i, "M_shld_disi_8_bxcx", "", dpos1)
continue
s1 = [
["mov", "ax,si"],
["mov", r"^c[lx],0x8$"],
["shl", "ax,1"],
["rcl", "di,1"],
]
s2 = [
["mov", "si,ax"],
["les", r"^bx,dword ptr \[bp([+-]\d+)\]$"],
]
dpos1 = omatch(i-1, -4, s1)
dpos2 = omatch(i+1, 2, s2)
if 1 and dpos1 and dpos2:
# bx and cx are free for use
orewrite_inst(i, "M_shld_diax_8_bxcx", "", dpos1[-3:])
continue
s1 = [
["mov", r"^c[lx],0x8$"],
["shl", r"^word ptr \[bp([+-]\d+)\],1$"],

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -144,11 +144,11 @@ L1:
/*************************************************************************
// support macros: LMUL, shld, shrd
// support macros: U4M, shld, shrd
**************************************************************************/
// umul32: dx:ax = dx:ax * 00:bx
.macro M_LMUL_dxax_00bx
.macro M_U4M_dxax_00bx
// mult high-word
mov cx, ax // save ax
mov ax, dx
@ -162,7 +162,7 @@ L1:
// umul32: dx:ax = dx:ax * word ptr [bx]
.macro M_LMUL_dxax_00bx_ptr
.macro M_U4M_dxax_00bx_ptr
// mult high-word
mov cx, ax // save ax
mov ax, dx
@ -176,7 +176,7 @@ L1:
// umul32: dx:ax = ax:cx * 00:bx
.macro M_LMUL_axcx_00bx
.macro M_U4M_axcx_00bx
// mult high-word
mul bx
xchg ax, cx // save high-word result, get low
@ -187,14 +187,95 @@ L1:
.endm
// umul32: dx:ax = dx:ax * 0x0600
.macro M_U4M_dxax_0x0600
// FIXME: compute clocks and optimize this
#if 1
// code size: 18 bytes
// i086: > 140 clocks (mul needs 70 clocks)
// i286: > 26 clocks (mul needs 13 clocks)
mov bx, 0x300
M_U4M_dxax_00bx
shl ax
rcl dx
#elif 1
// code size: 14 bytes
// i086: > 140 clocks (mul needs 70 clocks)
// i286: > 26 clocks (mul needs 13 clocks)
mov bx, 0x600
M_U4M_dxax_00bx
#else
// code size: 16+8 == 24 bytes
// i086: 18+9 == 27 clocks
// i286: 16+8 == 24 clocks
// FIXME: can we further optimize this ?
shl ax
rcl dx // dx:ax <<= 1 v * 2
mov cx, dx
mov bx, ax // cx:bx = dx:ax v * 2
shl ax
rcl dx // dx:ax <<= 1 v * 4
add ax, bx
adc dx, cx // dx:ax += cx:bx v * 6
M_shld_8 // dx:ax <<= 8 v * 0x600
#endif
.endm
// shld: dx:ax <<= 8
.macro M_shld_8
// code size: 8 bytes
// i086: 9 clocks
// i286: 8 clocks
mov dh, dl
mov dl, ah
mov ah, al
xor al, al
.endm
// shld: di:si <<= 8; bx and cx are free
.macro M_shld_disi_8_bxcx
// FIXME: compute clocks and optimize this
#if 1
local L1
mov cx, 8
L1: shl si
rcl di
loop L1
#else
mov bx, di
mov cx, si
mov bh, bl
mov bl, ch
mov ch, cl
xor cl, cl
mov di, bx
mov si, cx
#endif
.endm
// shld: di:ax <<= 8; bx and cx are free
.macro M_shld_diax_8_bxcx
// FIXME: compute clocks and optimize this
#if 1
local L1
mov cx, 8
L1: shl ax
rcl di
loop L1
#else
mov bx, di
mov bh, bl
mov bl, ah
mov ah, al
xor al, al
mov di, bx
#endif
.endm
.macro M_shld_8_bp h l
mov dx, word ptr[bp+h]
mov ax, word ptr[bp+l]