SOP2?34/ASHR fix

This commit is contained in:
red-prig 2023-04-16 17:12:10 +03:00
parent 110109bac4
commit c2a42658d5
2 changed files with 24 additions and 42 deletions

View File

@ -21,8 +21,7 @@ type
procedure emit_S_ADDC_U32;
procedure emit_S_MUL_I32;
procedure OpISccNotZero(src:PsrRegNode);
procedure emit_S_LSHL_B32;
procedure emit_S_LSHR_B32;
procedure emit_S_SH(OpId:DWORD;rtype:TsrDataType);
procedure emit_S_AND_B32;
procedure emit_S_AND_B64;
procedure emit_S_ANDN2_B64;
@ -112,38 +111,20 @@ begin
get_scc^.current^.dtype:=dtBool; //implict cast (int != 0)
end;
procedure TEmit_SOP2.emit_S_LSHL_B32;
procedure TEmit_SOP2.emit_S_SH(OpId:DWORD;rtype:TsrDataType);
Var
dst:PsrRegSlot;
src:array[0..1] of PsrRegNode;
begin
dst:=get_sdst7(FSPI.SOP2.SDST);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,dtUInt32);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,rtype);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,dtUInt32);
src[1]:=OpAndTo(src[1],31);
src[1]^.PrepType(ord(dtUInt32));
Op2(Op.OpShiftLeftLogical,src[0]^.dtype,dst,src[0],src[1]);
OpISccNotZero(dst^.current); //SCC = (sdst.u != 0)
end;
procedure TEmit_SOP2.emit_S_LSHR_B32;
Var
dst:PsrRegSlot;
src:array[0..1] of PsrRegNode;
begin
dst:=get_sdst7(FSPI.SOP2.SDST);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,dtUInt32);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,dtUInt32);
src[1]:=OpAndTo(src[1],31);
src[1]^.PrepType(ord(dtUInt32));
Op2(Op.OpShiftRightLogical,src[0]^.dtype,dst,src[0],src[1]);
Op2(OpId,src[0]^.dtype,dst,src[0],src[1]);
OpISccNotZero(dst^.current); //SCC = (sdst.u != 0)
end;
@ -380,8 +361,8 @@ Var
begin
dst:=get_sdst7(FSPI.SOP2.SDST);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,dtUint32);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,dtUint32);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,dtUInt32);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,dtUInt32);
offset:=OpAndTo(src[1],31);
count :=OpShrTo(src[1],16);
@ -399,8 +380,8 @@ Var
begin
dst:=get_sdst7(FSPI.SOP2.SDST);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,dtUint32);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,dtUint32);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,dtUInt32);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,dtUInt32);
src[0]:=OpAndTo(src[0],31);
src[1]:=OpAndTo(src[1],31);
@ -408,12 +389,12 @@ begin
src[0]^.PrepType(ord(dtUInt32));
src[1]^.PrepType(ord(dtUInt32));
one:=NewReg_q(dtUint32,1);
one:=NewReg_q(dtUInt32,1);
src[0]:=OpShrTo(one,src[0]); //(1 << src0)
src[0]:=OpISubTo(src[0],1); //-1
Op2(Op.OpShiftRightLogical,dtUint32,dst,src[0],src[1]);
Op2(Op.OpShiftRightLogical,dtUInt32,dst,src[0],src[1]);
end;
procedure TEmit_SOP2.emit_SOP2;
@ -431,8 +412,9 @@ begin
S_MUL_I32: emit_S_MUL_I32;
S_LSHL_B32: emit_S_LSHL_B32;
S_LSHR_B32: emit_S_LSHR_B32;
S_LSHL_B32: emit_S_SH(Op.OpShiftLeftLogical ,dtUInt32);
S_LSHR_B32: emit_S_SH(Op.OpShiftRightLogical ,dtUInt32);
S_ASHR_I32: emit_S_SH(Op.OpShiftRightArithmetic,dtInt32);
S_AND_B32: emit_S_AND_B32;
S_AND_B64: emit_S_AND_B64;

View File

@ -20,7 +20,7 @@ type
procedure emit_V_AND_B32;
procedure emit_V_OR_B32;
procedure emit_V_XOR_B32;
procedure emit_V_SH(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SHNRM(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SHREV(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_ADD_I32;
procedure emit_V_SUB_I32;
@ -119,7 +119,7 @@ begin
OpBitwiseXor(dst,src[0],src[1]);
end;
procedure TEmit_VOP2.emit_V_SH(OpId:DWORD;rtype:TsrDataType);
procedure TEmit_VOP2.emit_V_SHNRM(OpId:DWORD;rtype:TsrDataType);
Var
dst:PsrRegSlot;
src:array[0..1] of PsrRegNode;
@ -127,7 +127,7 @@ begin
dst:=get_vdst8(FSPI.VOP2.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32);
src[1]:=OpAndTo(src[1],31);
src[1]^.PrepType(ord(dtUInt32));
@ -142,7 +142,7 @@ Var
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype);
src[0]:=OpAndTo(src[0],31);
@ -160,7 +160,7 @@ begin
dst:=get_vdst8(FSPI.VOP2.VDST);
car:=get_vcc0;
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
OpIAddExt(dst,car,src[0],src[1]);
@ -510,12 +510,12 @@ begin
V_OR_B32 : emit_V_OR_B32;
V_XOR_B32 : emit_V_XOR_B32;
V_LSHL_B32 : emit_V_SH(Op.OpShiftLeftLogical,dtUint32);
V_LSHLREV_B32: emit_V_SHREV(Op.OpShiftLeftLogical,dtUint32);
V_LSHR_B32 : emit_V_SH(Op.OpShiftRightLogical,dtUint32);
V_LSHRREV_B32: emit_V_SHREV(Op.OpShiftRightLogical,dtUint32);
V_ASHR_I32 : emit_V_SH(Op.OpShiftRightLogical,dtInt32);
V_ASHRREV_I32: emit_V_SHREV(Op.OpShiftRightLogical,dtInt32);
V_LSHL_B32 : emit_V_SHNRM(Op.OpShiftLeftLogical ,dtUint32);
V_LSHLREV_B32: emit_V_SHREV(Op.OpShiftLeftLogical ,dtUint32);
V_LSHR_B32 : emit_V_SHNRM(Op.OpShiftRightLogical ,dtUint32);
V_LSHRREV_B32: emit_V_SHREV(Op.OpShiftRightLogical ,dtUint32);
V_ASHR_I32 : emit_V_SHNRM(Op.OpShiftRightArithmetic,dtInt32);
V_ASHRREV_I32: emit_V_SHREV(Op.OpShiftRightArithmetic,dtInt32);
V_ADD_I32 : emit_V_ADD_I32;
V_SUB_I32 : emit_V_SUB_I32;