diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp index b4b3de93e39..045a4ed8dca 100644 --- a/lib/Target/X86/X86FixupBWInsts.cpp +++ b/lib/Target/X86/X86FixupBWInsts.cpp @@ -93,11 +93,6 @@ class FixupBWInstPass : public MachineFunctionPass { /// OK, otherwise return nullptr. MachineInstr *tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const; - /// Change the MachineInstr \p MI into the equivalent 32-bit copy if it is - /// safe to do so. Return the replacement instruction if OK, otherwise return - /// nullptr. - MachineInstr *tryReplaceCopy(MachineInstr *MI) const; - public: FixupBWInstPass() : MachineFunctionPass(ID) {} @@ -224,45 +219,6 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode, return MIB; } -MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { - assert(MI->getNumExplicitOperands() == 2); - auto &OldDest = MI->getOperand(0); - auto &OldSrc = MI->getOperand(1); - - unsigned NewDestReg; - if (!getSuperRegDestIfDead(MI, NewDestReg)) - return nullptr; - - unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32); - - // This is only correct if we access the same subregister index: otherwise, - // we could try to replace "movb %ah, %al" with "movl %eax, %eax". - auto *TRI = &TII->getRegisterInfo(); - if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) != - TRI->getSubRegIndex(NewDestReg, OldDest.getReg())) - return nullptr; - - // Safe to change the instruction. - // Don't set src flags, as we don't know if we're also killing the superreg. - MachineInstrBuilder MIB = - BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg) - .addReg(NewSrcReg); - - // Make sure we don't drop implicit operands. - // We used to imp-def the super, but we don't need to anymore, as we turned - // it into an explicit def. However, we might still need to imp-def the GR64 - // super-register. - for (auto &Op : MI->implicit_operands()) { - if (Op.getReg() == NewDestReg && Op.isDef()) - continue; - assert((!Op.isDef() || TRI->isSubRegister(Op.getReg(), NewDestReg)) && - "Copy imp-defs unrelated reg?"); - MIB.addOperand(Op); - } - - return MIB; -} - void FixupBWInstPass::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB) { @@ -309,15 +265,6 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF, NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI); break; - case X86::MOV8rr: - case X86::MOV16rr: - // Always try to replace 8/16 bit copies with a 32 bit copy. - // Code size is either less (16) or equal (8), and there is sometimes a - // perf advantage from eliminating a false dependence on the upper portion - // of the register. - NewMI = tryReplaceCopy(MI); - break; - default: // nothing to do here. break; diff --git a/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll b/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll index c9b3df83613..114b985f71d 100644 --- a/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll +++ b/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll @@ -6,7 +6,7 @@ define i8 @f(i8 %v1, i8 %v2) nounwind { entry: ; CHECK: callq -; CHECK: movl %{{.*}}, %eax +; CHECK: movb %{{.*}}, %al ; CHECK: mulb ; CHECK: mulb %rval = tail call i8 @bar() nounwind diff --git a/test/CodeGen/X86/anyext.ll b/test/CodeGen/X86/anyext.ll index 3c53983fe4e..0117932035d 100644 --- a/test/CodeGen/X86/anyext.ll +++ b/test/CodeGen/X86/anyext.ll @@ -39,7 +39,7 @@ define i32 @bar(i32 %p, i16 zeroext %x) nounwind { ; X64-LABEL: bar: ; X64: # BB#0: ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movw %di, %ax ; X64-NEXT: divw %si ; X64-NEXT: andl $1, %eax ; X64-NEXT: retq diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll index 49a28319e48..858e061e887 100644 --- a/test/CodeGen/X86/avx512-calling-conv.ll +++ b/test/CodeGen/X86/avx512-calling-conv.ll @@ -461,7 +461,7 @@ define i32 @test12(i32 %a1, i32 %a2, i32 %b1) { ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl %edi, (%esp) ; KNL_X32-NEXT: calll _test11 -; KNL_X32-NEXT: movl %eax, %ebx +; KNL_X32-NEXT: movb %al, %bl ; KNL_X32-NEXT: movzbl %bl, %eax ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp) diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index f601dbcf0a6..de0c97cf0b5 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -81,7 +81,7 @@ define i16 @mand16(i16 %x, i16 %y) { ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movw %di, %ax ; CHECK-NEXT: retq %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll index 10da8facbfe..fd450f54356 100644 --- a/test/CodeGen/X86/avx512-select.ll +++ b/test/CodeGen/X86/avx512-select.ll @@ -72,7 +72,7 @@ define i8 @select05(i8 %a.0, i8 %m) { ; CHECK-LABEL: select05: ; CHECK: ## BB#0: ; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -102,7 +102,7 @@ define i8 @select06(i8 %a.0, i8 %m) { ; CHECK-LABEL: select06: ; CHECK: ## BB#0: ; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> diff --git a/test/CodeGen/X86/avx512dq-mask-op.ll b/test/CodeGen/X86/avx512dq-mask-op.ll index e83aa14d35e..1068bec38be 100644 --- a/test/CodeGen/X86/avx512dq-mask-op.ll +++ b/test/CodeGen/X86/avx512dq-mask-op.ll @@ -36,7 +36,7 @@ define i8 @mand8(i8 %x, i8 %y) { ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: retq %ma = bitcast i8 %x to <8 x i1> %mb = bitcast i8 %y to <8 x i1> diff --git a/test/CodeGen/X86/cmovcmov.ll b/test/CodeGen/X86/cmovcmov.ll index 38ba308ecff..9363d31866d 100644 --- a/test/CodeGen/X86/cmovcmov.ll +++ b/test/CodeGen/X86/cmovcmov.ll @@ -250,14 +250,14 @@ attributes #0 = { nounwind } ; CMOV-DAG: movb $20, %al ; CMOV-DAG: movb $20, %dl ; CMOV: jl [[BB0:.LBB[0-9_]+]] -; CMOV: movl %ecx, %edx +; CMOV: movb %cl, %dl ; CMOV: [[BB0]]: ; CMOV: jg [[BB1:.LBB[0-9_]+]] -; CMOV: movl %edx, %eax +; CMOV: movb %dl, %al ; CMOV: [[BB1]]: ; CMOV: testl %edi, %edi ; CMOV: je [[BB2:.LBB[0-9_]+]] -; CMOV: movl %edx, %eax +; CMOV: movb %dl, %al ; CMOV: [[BB2]]: ; CMOV: movb %al, g8(%rip) ; CMOV: retq diff --git a/test/CodeGen/X86/fixup-bw-copy.ll b/test/CodeGen/X86/fixup-bw-copy.ll deleted file mode 100644 index f8fd8358fc7..00000000000 --- a/test/CodeGen/X86/fixup-bw-copy.ll +++ /dev/null @@ -1,70 +0,0 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py -; RUN: llc -fixup-byte-word-insts=1 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWON64 %s -; RUN: llc -fixup-byte-word-insts=0 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWOFF64 %s -; RUN: llc -fixup-byte-word-insts=1 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWON32 %s -; RUN: llc -fixup-byte-word-insts=0 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWOFF32 %s - -target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" - -define i8 @test_movb(i8 %a0) { -; BWON64-LABEL: test_movb: -; BWON64: # BB#0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movb: -; BWOFF64: # BB#0: -; BWOFF64-NEXT: movb %dil, %al -; BWOFF64-NEXT: retq -; -; X32-LABEL: test_movb: -; X32: # BB#0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: retl - ret i8 %a0 -} - -define i16 @test_movw(i16 %a0) { -; BWON64-LABEL: test_movw: -; BWON64: # BB#0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movw: -; BWOFF64: # BB#0: -; BWOFF64-NEXT: movw %di, %ax -; BWOFF64-NEXT: retq -; -; BWON32-LABEL: test_movw: -; BWON32: # BB#0: -; BWON32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; BWON32-NEXT: retl -; -; BWOFF32-LABEL: test_movw: -; BWOFF32: # BB#0: -; BWOFF32-NEXT: movw {{[0-9]+}}(%esp), %ax -; BWOFF32-NEXT: retl - ret i16 %a0 -} - -; Verify we don't mess with H-reg copies (only generated in 32-bit mode). -define i8 @test_movb_hreg(i16 %a0) { -; X64-LABEL: test_movb_hreg: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl $8, %eax -; X64-NEXT: addb %dil, %al -; X64-NEXT: retq -; -; X32-LABEL: test_movb_hreg: -; X32: # BB#0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addb %al, %ah -; X32-NEXT: movb %ah, %al -; X32-NEXT: retl - %tmp0 = trunc i16 %a0 to i8 - %tmp1 = lshr i16 %a0, 8 - %tmp2 = trunc i16 %tmp1 to i8 - %tmp3 = add i8 %tmp0, %tmp2 - ret i8 %tmp3 -} diff --git a/test/CodeGen/X86/float-conv-elim.ll b/test/CodeGen/X86/float-conv-elim.ll index 7ccad2b80c8..45f46b29e75 100644 --- a/test/CodeGen/X86/float-conv-elim.ll +++ b/test/CodeGen/X86/float-conv-elim.ll @@ -21,7 +21,7 @@ define i32 @foo2(i8 %a) #0 { ; CHECK-LABEL: bar ; CHECK-NOT: cvt -; CHECK: movl +; CHECK: movb define zeroext i8 @bar(i8 zeroext %a) #0 { %conv = uitofp i8 %a to float %conv1 = fptoui float %conv to i8 diff --git a/test/CodeGen/X86/machine-combiner-int.ll b/test/CodeGen/X86/machine-combiner-int.ll index 47a83597f2d..bb5b552036c 100644 --- a/test/CodeGen/X86/machine-combiner-int.ll +++ b/test/CodeGen/X86/machine-combiner-int.ll @@ -60,7 +60,7 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-NEXT: subb %sil, %dil ; CHECK-NEXT: andb %cl, %dl ; CHECK-NEXT: andb %dil, %dl -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movb %dl, %al ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = and i8 %x2, %t0 @@ -107,7 +107,7 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-NEXT: subb %sil, %dil ; CHECK-NEXT: orb %cl, %dl ; CHECK-NEXT: orb %dil, %dl -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movb %dl, %al ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = or i8 %x2, %t0 @@ -154,7 +154,7 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-NEXT: subb %sil, %dil ; CHECK-NEXT: xorb %cl, %dl ; CHECK-NEXT: xorb %dil, %dl -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movb %dl, %al ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = xor i8 %x2, %t0 diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll index b654a81c11c..39e6fd0e6a5 100644 --- a/test/CodeGen/X86/opt-ext-uses.ll +++ b/test/CodeGen/X86/opt-ext-uses.ll @@ -2,8 +2,8 @@ ; This test should get one and only one register to register mov. ; CHECK-LABEL: t: -; CHECK: movl -; CHECK-NOT: mov +; CHECK: movw +; CHECK-NOT: movw ; CHECK: ret define signext i16 @t() { diff --git a/test/CodeGen/X86/pr23664.ll b/test/CodeGen/X86/pr23664.ll index 155fc03de83..a501c0db837 100644 --- a/test/CodeGen/X86/pr23664.ll +++ b/test/CodeGen/X86/pr23664.ll @@ -9,6 +9,6 @@ define i2 @f(i32 %arg) { ; CHECK-LABEL: f: ; CHECK: addb %dil, %dil ; CHECK-NEXT: orb $1, %dil -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: retq } diff --git a/test/CodeGen/X86/vector-bitreverse.ll b/test/CodeGen/X86/vector-bitreverse.ll index 16397bf1afb..46e73b008d8 100644 --- a/test/CodeGen/X86/vector-bitreverse.ll +++ b/test/CodeGen/X86/vector-bitreverse.ll @@ -9,68 +9,68 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; SSE-LABEL: test_bitreverse_i8: ; SSE: # BB#0: -; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: movb %dil, %al ; SSE-NEXT: shlb $7, %al -; SSE-NEXT: movl %edi, %ecx +; SSE-NEXT: movb %dil, %cl ; SSE-NEXT: shlb $5, %cl ; SSE-NEXT: andb $64, %cl -; SSE-NEXT: movl %edi, %edx +; SSE-NEXT: movb %dil, %dl ; SSE-NEXT: shlb $3, %dl ; SSE-NEXT: andb $32, %dl ; SSE-NEXT: orb %cl, %dl -; SSE-NEXT: movl %edi, %ecx +; SSE-NEXT: movb %dil, %cl ; SSE-NEXT: addb %cl, %cl ; SSE-NEXT: andb $16, %cl ; SSE-NEXT: orb %dl, %cl -; SSE-NEXT: movl %edi, %edx +; SSE-NEXT: movb %dil, %dl ; SSE-NEXT: shrb %dl ; SSE-NEXT: andb $8, %dl ; SSE-NEXT: orb %cl, %dl -; SSE-NEXT: movl %edi, %ecx +; SSE-NEXT: movb %dil, %cl ; SSE-NEXT: shrb $3, %cl ; SSE-NEXT: andb $4, %cl ; SSE-NEXT: orb %dl, %cl -; SSE-NEXT: movl %edi, %edx +; SSE-NEXT: movb %dil, %dl ; SSE-NEXT: shrb $5, %dl ; SSE-NEXT: andb $2, %dl ; SSE-NEXT: orb %cl, %dl ; SSE-NEXT: shrb $7, %dil ; SSE-NEXT: orb %dl, %dil ; SSE-NEXT: orb %al, %dil -; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: movb %dil, %al ; SSE-NEXT: retq ; ; AVX-LABEL: test_bitreverse_i8: ; AVX: # BB#0: -; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: movb %dil, %al ; AVX-NEXT: shlb $7, %al -; AVX-NEXT: movl %edi, %ecx +; AVX-NEXT: movb %dil, %cl ; AVX-NEXT: shlb $5, %cl ; AVX-NEXT: andb $64, %cl -; AVX-NEXT: movl %edi, %edx +; AVX-NEXT: movb %dil, %dl ; AVX-NEXT: shlb $3, %dl ; AVX-NEXT: andb $32, %dl ; AVX-NEXT: orb %cl, %dl -; AVX-NEXT: movl %edi, %ecx +; AVX-NEXT: movb %dil, %cl ; AVX-NEXT: addb %cl, %cl ; AVX-NEXT: andb $16, %cl ; AVX-NEXT: orb %dl, %cl -; AVX-NEXT: movl %edi, %edx +; AVX-NEXT: movb %dil, %dl ; AVX-NEXT: shrb %dl ; AVX-NEXT: andb $8, %dl ; AVX-NEXT: orb %cl, %dl -; AVX-NEXT: movl %edi, %ecx +; AVX-NEXT: movb %dil, %cl ; AVX-NEXT: shrb $3, %cl ; AVX-NEXT: andb $4, %cl ; AVX-NEXT: orb %dl, %cl -; AVX-NEXT: movl %edi, %edx +; AVX-NEXT: movb %dil, %dl ; AVX-NEXT: shrb $5, %dl ; AVX-NEXT: andb $2, %dl ; AVX-NEXT: orb %cl, %dl ; AVX-NEXT: shrb $7, %dil ; AVX-NEXT: orb %dl, %dil ; AVX-NEXT: orb %al, %dil -; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: movb %dil, %al ; AVX-NEXT: retq ; ; XOP-LABEL: test_bitreverse_i8: diff --git a/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/test/CodeGen/X86/vector-idiv-sdiv-512.ll index f91babe644c..600e3f2f52c 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -336,7 +336,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -346,7 +346,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %edx ; AVX512BW-NEXT: shrl $8, %edx ; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx +; AVX512BW-NEXT: movb %cl, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %cl ; AVX512BW-NEXT: addb %dl, %cl @@ -358,7 +358,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -369,7 +369,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -380,7 +380,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -391,7 +391,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -402,7 +402,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -413,7 +413,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -424,7 +424,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -435,7 +435,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -446,7 +446,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -457,7 +457,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -468,7 +468,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -479,7 +479,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -490,7 +490,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -501,7 +501,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -513,7 +513,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -523,7 +523,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %edx ; AVX512BW-NEXT: shrl $8, %edx ; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx +; AVX512BW-NEXT: movb %cl, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %cl ; AVX512BW-NEXT: addb %dl, %cl @@ -535,7 +535,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -546,7 +546,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -557,7 +557,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -568,7 +568,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -579,7 +579,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -590,7 +590,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -601,7 +601,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -612,7 +612,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -623,7 +623,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -634,7 +634,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -645,7 +645,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -656,7 +656,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -667,7 +667,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -678,7 +678,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -691,7 +691,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -701,7 +701,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %edx ; AVX512BW-NEXT: shrl $8, %edx ; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx +; AVX512BW-NEXT: movb %cl, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %cl ; AVX512BW-NEXT: addb %dl, %cl @@ -713,7 +713,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -724,7 +724,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -735,7 +735,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -746,7 +746,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -757,7 +757,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -768,7 +768,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -779,7 +779,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -790,7 +790,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -801,7 +801,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -812,7 +812,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -823,7 +823,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -834,7 +834,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -845,7 +845,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -856,7 +856,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -867,7 +867,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -877,7 +877,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %edx ; AVX512BW-NEXT: shrl $8, %edx ; AVX512BW-NEXT: addb %dl, %cl -; AVX512BW-NEXT: movl %ecx, %edx +; AVX512BW-NEXT: movb %cl, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %cl ; AVX512BW-NEXT: addb %dl, %cl @@ -889,7 +889,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -900,7 +900,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -911,7 +911,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -922,7 +922,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -933,7 +933,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -944,7 +944,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -955,7 +955,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -966,7 +966,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -977,7 +977,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -988,7 +988,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -999,7 +999,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1010,7 +1010,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1021,7 +1021,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1032,7 +1032,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %eax, %ecx ; AVX512BW-NEXT: shrl $8, %ecx ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1489,7 +1489,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %edx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1502,7 +1502,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %esi, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %sil, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1516,7 +1516,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1529,7 +1529,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1542,7 +1542,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1555,7 +1555,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1568,7 +1568,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1581,7 +1581,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1594,7 +1594,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1607,7 +1607,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1620,7 +1620,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1633,7 +1633,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1646,7 +1646,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1659,7 +1659,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1672,7 +1672,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1685,7 +1685,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1699,7 +1699,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1711,7 +1711,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %edx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1725,7 +1725,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1738,7 +1738,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1751,7 +1751,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1764,7 +1764,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1777,7 +1777,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1790,7 +1790,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1803,7 +1803,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1816,7 +1816,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1829,7 +1829,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1842,7 +1842,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1855,7 +1855,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1868,7 +1868,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1881,7 +1881,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1894,7 +1894,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1909,7 +1909,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1921,7 +1921,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %edx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -1935,7 +1935,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1948,7 +1948,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1961,7 +1961,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1974,7 +1974,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -1987,7 +1987,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2000,7 +2000,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2013,7 +2013,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2026,7 +2026,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2039,7 +2039,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2052,7 +2052,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2065,7 +2065,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2078,7 +2078,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2091,7 +2091,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2104,7 +2104,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2117,7 +2117,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2129,7 +2129,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %edx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %dl, %al -; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: movb %al, %cl ; AVX512BW-NEXT: shrb $7, %cl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %cl, %al @@ -2143,7 +2143,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2156,7 +2156,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2169,7 +2169,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2182,7 +2182,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2195,7 +2195,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2208,7 +2208,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2221,7 +2221,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2234,7 +2234,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2247,7 +2247,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2260,7 +2260,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2273,7 +2273,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2286,7 +2286,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2299,7 +2299,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al @@ -2312,7 +2312,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: imull $-109, %ecx, %eax ; AVX512BW-NEXT: shrl $8, %eax ; AVX512BW-NEXT: addb %cl, %al -; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: movb %al, %dl ; AVX512BW-NEXT: shrb $7, %dl ; AVX512BW-NEXT: sarb $2, %al ; AVX512BW-NEXT: addb %dl, %al diff --git a/test/CodeGen/X86/vector-idiv-udiv-512.ll b/test/CodeGen/X86/vector-idiv-udiv-512.ll index 35c902c5cc2..e6e7503dc6d 100644 --- a/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ b/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -1323,7 +1323,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $1, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %ecx ; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %cl, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %cl, %al @@ -1335,7 +1335,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $0, %xmm1, %esi ; AVX512BW-NEXT: imull $37, %esi, %edi ; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax +; AVX512BW-NEXT: movb %sil, %al ; AVX512BW-NEXT: subb %dil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %dil, %al @@ -1348,7 +1348,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $2, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1360,7 +1360,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $3, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1372,7 +1372,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $4, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1384,7 +1384,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $5, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1396,7 +1396,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $6, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1408,7 +1408,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $7, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1420,7 +1420,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $8, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1432,7 +1432,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $9, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1444,7 +1444,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $10, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1456,7 +1456,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $11, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1468,7 +1468,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $12, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1480,7 +1480,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $13, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1492,7 +1492,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $14, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1504,7 +1504,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $15, %xmm1, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1517,7 +1517,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1528,7 +1528,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi ; AVX512BW-NEXT: imull $37, %esi, %edi ; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax +; AVX512BW-NEXT: movb %sil, %al ; AVX512BW-NEXT: subb %dil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %dil, %al @@ -1541,7 +1541,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1553,7 +1553,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1565,7 +1565,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1577,7 +1577,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1589,7 +1589,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1601,7 +1601,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1613,7 +1613,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1625,7 +1625,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1637,7 +1637,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1649,7 +1649,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1661,7 +1661,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1673,7 +1673,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1685,7 +1685,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1697,7 +1697,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1711,7 +1711,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1722,7 +1722,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi ; AVX512BW-NEXT: imull $37, %esi, %edi ; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax +; AVX512BW-NEXT: movb %sil, %al ; AVX512BW-NEXT: subb %dil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %dil, %al @@ -1735,7 +1735,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1747,7 +1747,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1759,7 +1759,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1771,7 +1771,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1783,7 +1783,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1795,7 +1795,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1807,7 +1807,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1819,7 +1819,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1831,7 +1831,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1843,7 +1843,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1855,7 +1855,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1867,7 +1867,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1879,7 +1879,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1891,7 +1891,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1903,7 +1903,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1914,7 +1914,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi ; AVX512BW-NEXT: imull $37, %esi, %edi ; AVX512BW-NEXT: shrl $8, %edi -; AVX512BW-NEXT: movl %esi, %eax +; AVX512BW-NEXT: movb %sil, %al ; AVX512BW-NEXT: subb %dil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %dil, %al @@ -1927,7 +1927,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1939,7 +1939,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $3, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1951,7 +1951,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $4, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1963,7 +1963,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1975,7 +1975,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1987,7 +1987,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $7, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -1999,7 +1999,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2011,7 +2011,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2023,7 +2023,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2035,7 +2035,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $11, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2047,7 +2047,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $12, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2059,7 +2059,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2071,7 +2071,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al @@ -2083,7 +2083,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpextrb $15, %xmm0, %edx ; AVX512BW-NEXT: imull $37, %edx, %esi ; AVX512BW-NEXT: shrl $8, %esi -; AVX512BW-NEXT: movl %edx, %eax +; AVX512BW-NEXT: movb %dl, %al ; AVX512BW-NEXT: subb %sil, %al ; AVX512BW-NEXT: shrb %al ; AVX512BW-NEXT: addb %sil, %al diff --git a/test/CodeGen/X86/xaluo.ll b/test/CodeGen/X86/xaluo.ll index b24c43a3d0d..7c4b60d264c 100644 --- a/test/CodeGen/X86/xaluo.ll +++ b/test/CodeGen/X86/xaluo.ll @@ -295,7 +295,7 @@ entry: define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) { entry: ; CHECK-LABEL: smulo.i8 -; CHECK: movl %edi, %eax +; CHECK: movb %dil, %al ; CHECK-NEXT: imulb %sil ; CHECK-NEXT: seto %cl %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) @@ -345,7 +345,7 @@ entry: define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) { entry: ; CHECK-LABEL: umulo.i8 -; CHECK: movl %edi, %eax +; CHECK: movb %dil, %al ; CHECK-NEXT: mulb %sil ; CHECK-NEXT: seto %cl %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)