diff --git a/spirv/emit_vop2.pas b/spirv/emit_vop2.pas index 9420290..490f1db 100644 --- a/spirv/emit_vop2.pas +++ b/spirv/emit_vop2.pas @@ -39,6 +39,8 @@ type procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType); procedure emit_V_LDEXP_F32; procedure emit_V_ADDC_U32; + procedure emit_V_MBCNT_LO_U32_B32; + procedure emit_V_MBCNT_HI_U32_B32; end; implementation @@ -457,6 +459,46 @@ begin OpBitwiseAnd(car,src[0],exc); //carry_out & EXEC end; +//V_MBCNT_LO_U32_B32 v1, -1, v1 + +procedure TEmit_VOP2.emit_V_MBCNT_LO_U32_B32; +Var + dst:PsrRegSlot; + src:array[0..2] of PsrRegNode; +begin + //V_MBCNT_LO_U32_B32 vdst, vsrc, vaccum + //mask_lo_threads_before= (thread_id>32) ? 0xffffffff : (1<32) ? (1<<(thread_id-32))-1 : 0 + //vdst = vaccum.u + bit_count(vsrc & mask_hi_threads_before) + + dst:=get_vdst8(FSPI.VOP2.VDST); + + //src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); + src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + + //only lower thread_id mean + MakeCopy(dst,src[1]); +end; + procedure TEmit_VOP2.emit_VOP2; begin @@ -515,6 +557,9 @@ begin V_ADDC_U32: emit_V_ADDC_U32; + V_MBCNT_LO_U32_B32: emit_V_MBCNT_LO_U32_B32; + V_MBCNT_HI_U32_B32: emit_V_MBCNT_HI_U32_B32; + else Assert(false,'VOP2?'+IntToStr(FSPI.VOP2.OP)); end; diff --git a/spirv/emit_vop3.pas b/spirv/emit_vop3.pas index 130c405..f7f90bb 100644 --- a/spirv/emit_vop3.pas +++ b/spirv/emit_vop3.pas @@ -41,6 +41,8 @@ type procedure emit_V_MUL_HI(rtype:TsrDataType); procedure emit_V_MAC_F32; procedure emit_V_LDEXP_F32; + procedure emit_V_MBCNT_LO_U32_B32; + procedure emit_V_MBCNT_HI_U32_B32; procedure emit_V_BFE_U32; procedure emit_V_BFI_B32; @@ -456,6 +458,54 @@ begin emit_dst_clamp_f(dst); end; +procedure TEmit_VOP3.emit_V_MBCNT_LO_U32_B32; +Var + dst:PsrRegSlot; + src:array[0..1] of PsrRegNode; +begin + //V_MBCNT_LO_U32_B32 vdst, vsrc, vaccum + //mask_lo_threads_before= (thread_id>32) ? 0xffffffff : (1<32) ? (1<<(thread_id-32))-1 : 0 + //vdst = vaccum.u + bit_count(vsrc & mask_hi_threads_before) + + dst:=get_vdst8(FSPI.VOP3a.VDST); + + Assert(FSPI.VOP3a.OMOD =0,'FSPI.VOP3a.OMOD'); + Assert(FSPI.VOP3a.ABS =0,'FSPI.VOP3a.ABS'); + Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP'); + Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG'); + + //src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUint32); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUint32); + + //only lower thread_id mean + MakeCopy(dst,src[1]); +end; + procedure TEmit_VOP3.emit_V_BFE_U32; Var dst:PsrRegSlot; @@ -1062,6 +1112,9 @@ begin 256+V_LDEXP_F32: emit_V_LDEXP_F32; + 256+V_MBCNT_LO_U32_B32: emit_V_MBCNT_LO_U32_B32; + 256+V_MBCNT_HI_U32_B32: emit_V_MBCNT_HI_U32_B32; + //VOP3 only V_MUL_LO_U32: emit_V_MUL_LO(dtUint32);