Merge pull request #4149 from Sonicadvance1/iropsize_convert_class

IR: Convert OpSize over to enum class
2024-11-23 06:30:01 +00:00 · 2024-10-30 23:55:55 -04:00 · 2024-10-30 23:55:55 -04:00 · 5ad7fdb2f3
commit 5ad7fdb2f3
parent 5c6de4ed14 9b6cc8f7e0
25 changed files with 1207 additions and 1177 deletions
--- a/FEXCore/Scripts/json_ir_generator.py
+++ b/FEXCore/Scripts/json_ir_generator.py
@ -632,12 +632,12 @@ def print_ir_allocator_helpers():

    output_file.write("\tIR::OpSize GetOpSize(const OrderedNode *Op) const {\n")
    output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
-    output_file.write("\t\treturn IR::SizeToOpSize(HeaderOp->Size);\n")
+    output_file.write("\t\treturn HeaderOp->Size;\n")
    output_file.write("\t}\n\n")

    output_file.write("\tIR::OpSize GetOpElementSize(const OrderedNode *Op) const {\n")
    output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
-    output_file.write("\t\treturn IR::SizeToOpSize(HeaderOp->ElementSize);\n")
+    output_file.write("\t\treturn HeaderOp->ElementSize;\n")
    output_file.write("\t}\n\n")

    output_file.write("\tuint8_t GetOpElements(const OrderedNode *Op) const {\n")
--- a/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp
+++ b/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp
@ -79,7 +79,7 @@ void InterpreterOps::FillFallbackIndexPointers(uint64_t* Info) {
 }

 bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, const IR::IROp_Header* IROp, FallbackInfo* Info) {
-  uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  switch (IROp->Op) {
  case IR::OP_F80CVTTO: {
    auto Op = IROp->C<IR::IROp_F80CVTTo>();
@ -99,11 +99,11 @@ bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, const IR::I
  }
  case IR::OP_F80CVT: {
    switch (OpSize) {
-    case 4: {
+    case IR::OpSize::i32Bit: {
      *Info = {FABI_F32_I16_F80, (void*)&FEXCore::CPU::OpHandlers<IR::OP_F80CVT>::handle4, Core::OPINDEX_F80CVT_4, SupportsPreserveAllABI};
      return true;
    }
-    case 8: {
+    case IR::OpSize::i64Bit: {
      *Info = {FABI_F64_I16_F80, (void*)&FEXCore::CPU::OpHandlers<IR::OP_F80CVT>::handle8, Core::OPINDEX_F80CVT_8, SupportsPreserveAllABI};
      return true;
    }
@ -115,7 +115,7 @@ bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, const IR::I
    auto Op = IROp->C<IR::IROp_F80CVTInt>();

    switch (OpSize) {
-    case 2: {
+    case IR::OpSize::i16Bit: {
      if (Op->Truncate) {
        *Info = {FABI_I16_I16_F80, (void*)&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t, Core::OPINDEX_F80CVTINT_TRUNC2,
                 SupportsPreserveAllABI};
@ -124,7 +124,7 @@ bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, const IR::I
      }
      return true;
    }
-    case 4: {
+    case IR::OpSize::i32Bit: {
      if (Op->Truncate) {
        *Info = {FABI_I32_I16_F80, (void*)&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t, Core::OPINDEX_F80CVTINT_TRUNC4,
                 SupportsPreserveAllABI};
@ -133,7 +133,7 @@ bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, const IR::I
      }
      return true;
    }
-    case 8: {
+    case IR::OpSize::i64Bit: {
      if (Op->Truncate) {
        *Info = {FABI_I64_I16_F80, (void*)&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t, Core::OPINDEX_F80CVTINT_TRUNC8,
                 SupportsPreserveAllABI};
--- a/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/ALUOps.cpp
@ -54,8 +54,8 @@ DEF_OP(EntrypointOffset) {
  auto Constant = Entry + Op->Offset;
  auto Dst = GetReg(Node);
  uint64_t Mask = ~0ULL;
-  uint8_t OpSize = IROp->Size;
-  if (OpSize == 4) {
+  const auto OpSize = IROp->Size;
+  if (OpSize == IR::OpSize::i32Bit) {
    Mask = 0xFFFF'FFFFULL;
  }

@ -92,10 +92,10 @@ DEF_OP(AddNZCV) {

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
-    LOGMAN_THROW_AA_FMT(IROp->Size >= 4, "Constant not allowed here");
+    LOGMAN_THROW_AA_FMT(IROp->Size >= IR::OpSize::i32Bit, "Constant not allowed here");
    cmn(EmitSize, Src1, Const);
-  } else if (IROp->Size < 4) {
-    unsigned Shift = 32 - (8 * IROp->Size);
+  } else if (IROp->Size < IR::OpSize::i32Bit) {
+    unsigned Shift = 32 - IR::OpSizeAsBits(IROp->Size);

    lsl(ARMEmitter::Size::i32Bit, TMP1, Src1, Shift);
    cmn(EmitSize, TMP1, GetReg(Op->Src2.ID()), ARMEmitter::ShiftType::LSL, Shift);
@ -165,7 +165,7 @@ DEF_OP(TestNZ) {
  // Shift the sign bit into place, clearing out the garbage in upper bits.
  // Adding zero does an effective test, setting NZ according to the result and
  // zeroing CV.
-  if (IROp->Size < 4) {
+  if (IROp->Size < IR::OpSize::i32Bit) {
    // Cheaper to and+cmn than to lsl+lsl+tst, so do the and ourselves if
    // needed.
    if (Op->Src1 != Op->Src2) {
@ -179,7 +179,7 @@ DEF_OP(TestNZ) {
      Src1 = TMP1;
    }

-    unsigned Shift = 32 - (IROp->Size * 8);
+    unsigned Shift = 32 - IR::OpSizeAsBits(IROp->Size);
    cmn(EmitSize, ARMEmitter::Reg::zr, Src1, ARMEmitter::ShiftType::LSL, Shift);
  } else {
    if (IsInlineConstant(Op->Src2, &Const)) {
@ -193,11 +193,11 @@ DEF_OP(TestNZ) {

 DEF_OP(TestZ) {
  auto Op = IROp->C<IR::IROp_TestZ>();
-  LOGMAN_THROW_AA_FMT(IROp->Size < 4, "TestNZ used at higher sizes");
+  LOGMAN_THROW_AA_FMT(IROp->Size < IR::OpSize::i32Bit, "TestNZ used at higher sizes");
  const auto EmitSize = ARMEmitter::Size::i32Bit;

  uint64_t Const;
-  uint64_t Mask = IROp->Size == 8 ? ~0ULL : ((1ull << (IROp->Size * 8)) - 1);
+  uint64_t Mask = IROp->Size == IR::OpSize::i64Bit ? ~0ULL : ((1ull << IR::OpSizeAsBits(IROp->Size)) - 1);
  auto Src1 = GetReg(Op->Src1.ID());

  if (IsInlineConstant(Op->Src2, &Const)) {
@ -223,25 +223,25 @@ DEF_OP(SubShift) {

 DEF_OP(SubNZCV) {
  auto Op = IROp->C<IR::IROp_SubNZCV>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
-    LOGMAN_THROW_AA_FMT(OpSize >= 4, "Constant not allowed here");
+    LOGMAN_THROW_AA_FMT(OpSize >= IR::OpSize::i32Bit, "Constant not allowed here");
    cmp(EmitSize, GetReg(Op->Src1.ID()), Const);
  } else {
-    unsigned Shift = OpSize < 4 ? (32 - (8 * OpSize)) : 0;
+    unsigned Shift = OpSize < IR::OpSize::i32Bit ? (32 - IR::OpSizeAsBits(OpSize)) : 0;
    ARMEmitter::Register ShiftedSrc1 = GetZeroableReg(Op->Src1);

    // Shift to fix flags for <32-bit ops.
    // Any shift of zero is still zero so optimize out silly zero shifts.
-    if (OpSize < 4 && ShiftedSrc1 != ARMEmitter::Reg::zr) {
+    if (OpSize < IR::OpSize::i32Bit && ShiftedSrc1 != ARMEmitter::Reg::zr) {
      lsl(ARMEmitter::Size::i32Bit, TMP1, ShiftedSrc1, Shift);
      ShiftedSrc1 = TMP1;
    }

-    if (OpSize < 4) {
+    if (OpSize < IR::OpSize::i32Bit) {
      cmp(EmitSize, ShiftedSrc1, GetReg(Op->Src2.ID()), ARMEmitter::ShiftType::LSL, Shift);
    } else {
      cmp(EmitSize, ShiftedSrc1, GetReg(Op->Src2.ID()));
@ -286,10 +286,10 @@ DEF_OP(SetSmallNZV) {
  auto Op = IROp->C<IR::IROp_SetSmallNZV>();
  LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM, "Unsupported flagm op");

-  const uint8_t OpSize = IROp->Size;
-  LOGMAN_THROW_AA_FMT(OpSize == 1 || OpSize == 2, "Unsupported {} size: {}", __func__, OpSize);
+  const auto OpSize = IROp->Size;
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i8Bit || OpSize == IR::OpSize::i16Bit, "Unsupported {} size: {}", __func__, OpSize);

-  if (OpSize == 1) {
+  if (OpSize == IR::OpSize::i8Bit) {
    setf8(GetReg(Op->Src.ID()).W());
  } else {
    setf16(GetReg(Op->Src.ID()).W());
@ -401,20 +401,20 @@ DEF_OP(Div) {

  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  auto Src1 = GetReg(Op->Src1.ID());
  auto Src2 = GetReg(Op->Src2.ID());

-  if (OpSize == 1) {
+  if (OpSize == IR::OpSize::i8Bit) {
    sxtb(EmitSize, TMP1, Src1);
    sxtb(EmitSize, TMP2, Src2);

    Src1 = TMP1;
    Src2 = TMP2;
-  } else if (OpSize == 2) {
+  } else if (OpSize == IR::OpSize::i16Bit) {
    sxth(EmitSize, TMP1, Src1);
    sxth(EmitSize, TMP2, Src2);

@ -430,20 +430,20 @@ DEF_OP(UDiv) {

  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  auto Src1 = GetReg(Op->Src1.ID());
  auto Src2 = GetReg(Op->Src2.ID());

-  if (OpSize == 1) {
+  if (OpSize == IR::OpSize::i8Bit) {
    uxtb(EmitSize, TMP1, Src1);
    uxtb(EmitSize, TMP2, Src2);

    Src1 = TMP1;
    Src2 = TMP2;
-  } else if (OpSize == 2) {
+  } else if (OpSize == IR::OpSize::i16Bit) {
    uxth(EmitSize, TMP1, Src1);
    uxth(EmitSize, TMP2, Src2);

@ -458,20 +458,20 @@ DEF_OP(Rem) {
  auto Op = IROp->C<IR::IROp_Rem>();
  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  auto Src1 = GetReg(Op->Src1.ID());
  auto Src2 = GetReg(Op->Src2.ID());

-  if (OpSize == 1) {
+  if (OpSize == IR::OpSize::i8Bit) {
    sxtb(EmitSize, TMP1, Src1);
    sxtb(EmitSize, TMP2, Src2);

    Src1 = TMP1;
    Src2 = TMP2;
-  } else if (OpSize == 2) {
+  } else if (OpSize == IR::OpSize::i16Bit) {
    sxth(EmitSize, TMP1, Src1);
    sxth(EmitSize, TMP2, Src2);

@ -487,20 +487,20 @@ DEF_OP(URem) {
  auto Op = IROp->C<IR::IROp_URem>();
  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  auto Src1 = GetReg(Op->Src1.ID());
  auto Src2 = GetReg(Op->Src2.ID());

-  if (OpSize == 1) {
+  if (OpSize == IR::OpSize::i8Bit) {
    uxtb(EmitSize, TMP1, Src1);
    uxtb(EmitSize, TMP2, Src2);

    Src1 = TMP1;
    Src2 = TMP2;
-  } else if (OpSize == 2) {
+  } else if (OpSize == IR::OpSize::i16Bit) {
    uxth(EmitSize, TMP1, Src1);
    uxth(EmitSize, TMP2, Src2);

@ -514,15 +514,15 @@ DEF_OP(URem) {

 DEF_OP(MulH) {
  auto Op = IROp->C<IR::IROp_MulH>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

-  LOGMAN_THROW_AA_FMT(OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit, "Unsupported {} size: {}", __func__, OpSize);

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1.ID());
  const auto Src2 = GetReg(Op->Src2.ID());

-  if (OpSize == 4) {
+  if (OpSize == IR::OpSize::i32Bit) {
    sxtw(TMP1, Src1.W());
    sxtw(TMP2, Src2.W());
    mul(ARMEmitter::Size::i32Bit, Dst, TMP1, TMP2);
@ -534,15 +534,15 @@ DEF_OP(MulH) {

 DEF_OP(UMulH) {
  auto Op = IROp->C<IR::IROp_UMulH>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

-  LOGMAN_THROW_AA_FMT(OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit, "Unsupported {} size: {}", __func__, OpSize);

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1.ID());
  const auto Src2 = GetReg(Op->Src2.ID());

-  if (OpSize == 4) {
+  if (OpSize == IR::OpSize::i32Bit) {
    uxtw(ARMEmitter::Size::i64Bit, TMP1, Src1);
    uxtw(ARMEmitter::Size::i64Bit, TMP2, Src2);
    mul(ARMEmitter::Size::i64Bit, Dst, TMP1, TMP2);
@ -593,7 +593,7 @@ DEF_OP(Ornror) {

 DEF_OP(AndWithFlags) {
  auto Op = IROp->C<IR::IROp_AndWithFlags>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  uint64_t Const;
@ -601,7 +601,7 @@ DEF_OP(AndWithFlags) {
  auto Src1 = GetReg(Op->Src1.ID());

  // See TestNZ
-  if (OpSize < 4) {
+  if (OpSize < IR::OpSize::i32Bit) {
    if (IsInlineConstant(Op->Src2, &Const)) {
      and_(EmitSize, Dst, Src1, Const);
    } else {
@ -614,7 +614,7 @@ DEF_OP(AndWithFlags) {
      }
    }

-    unsigned Shift = 32 - (OpSize * 8);
+    unsigned Shift = 32 - IR::OpSizeAsBits(OpSize);
    cmn(EmitSize, ARMEmitter::Reg::zr, Dst, ARMEmitter::ShiftType::LSL, Shift);
  } else {
    if (IsInlineConstant(Op->Src2, &Const)) {
@ -648,21 +648,21 @@ DEF_OP(Ashr) {

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
-    if (OpSize >= 4) {
+    if (OpSize >= IR::OpSize::i32Bit) {
      asr(EmitSize, Dst, Src1, (unsigned int)Const);
    } else {
-      sbfx(EmitSize, TMP1, Src1, 0, OpSize * 8);
+      sbfx(EmitSize, TMP1, Src1, 0, IR::OpSizeAsBits(OpSize));
      asr(EmitSize, Dst, TMP1, (unsigned int)Const);
-      ubfx(EmitSize, Dst, Dst, 0, OpSize * 8);
+      ubfx(EmitSize, Dst, Dst, 0, IR::OpSizeAsBits(OpSize));
    }
  } else {
    const auto Src2 = GetReg(Op->Src2.ID());
-    if (OpSize >= 4) {
+    if (OpSize >= IR::OpSize::i32Bit) {
      asrv(EmitSize, Dst, Src1, Src2);
    } else {
-      sbfx(EmitSize, TMP1, Src1, 0, OpSize * 8);
+      sbfx(EmitSize, TMP1, Src1, 0, IR::OpSizeAsBits(OpSize));
      asrv(EmitSize, Dst, TMP1, Src2);
-      ubfx(EmitSize, Dst, Dst, 0, OpSize * 8);
+      ubfx(EmitSize, Dst, Dst, 0, IR::OpSizeAsBits(OpSize));
    }
  }
 }
@ -897,7 +897,7 @@ DEF_OP(PDep) {
 DEF_OP(PExt) {
  auto Op = IROp->C<IR::IROp_PExt>();
  const auto OpSize = IROp->Size;
-  const auto OpSizeBitsM1 = (OpSize * 8) - 1;
+  const auto OpSizeBitsM1 = IR::OpSizeAsBits(OpSize) - 1;
  const auto EmitSize = ConvertSize48(IROp);

  const auto Input = GetReg(Op->Input.ID());
@ -952,8 +952,8 @@ DEF_OP(PExt) {

 DEF_OP(LDiv) {
  auto Op = IROp->C<IR::IROp_LDiv>();
-  const uint8_t OpSize = IROp->Size;
-  const auto EmitSize = OpSize >= 4 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
+  const auto OpSize = IROp->Size;
+  const auto EmitSize = OpSize >= IR::OpSize::i32Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  const auto Dst = GetReg(Node);
  const auto Upper = GetReg(Op->Upper.ID());
@ -963,14 +963,14 @@ DEF_OP(LDiv) {
  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
  switch (OpSize) {
-  case 2: {
+  case IR::OpSize::i16Bit: {
    uxth(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 16, 16);
    sxth(EmitSize, TMP2, Divisor);
    sdiv(EmitSize, Dst, TMP1, TMP2);
    break;
  }
-  case 4: {
+  case IR::OpSize::i32Bit: {
    // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits.
    mov(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 32, 32);
@ -978,7 +978,7 @@ DEF_OP(LDiv) {
    sdiv(EmitSize, Dst, TMP1, TMP2);
    break;
  }
-  case 8: {
+  case IR::OpSize::i64Bit: {
    ARMEmitter::SingleUseForwardLabel Only64Bit {};
    ARMEmitter::SingleUseForwardLabel LongDIVRet {};

@ -1022,8 +1022,8 @@ DEF_OP(LDiv) {

 DEF_OP(LUDiv) {
  auto Op = IROp->C<IR::IROp_LUDiv>();
-  const uint8_t OpSize = IROp->Size;
-  const auto EmitSize = OpSize >= 4 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
+  const auto OpSize = IROp->Size;
+  const auto EmitSize = OpSize >= IR::OpSize::i32Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  const auto Dst = GetReg(Node);
  const auto Upper = GetReg(Op->Upper.ID());
@ -1033,20 +1033,20 @@ DEF_OP(LUDiv) {
  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64=
  switch (OpSize) {
-  case 2: {
+  case IR::OpSize::i16Bit: {
    uxth(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 16, 16);
    udiv(EmitSize, Dst, TMP1, Divisor);
    break;
  }
-  case 4: {
+  case IR::OpSize::i32Bit: {
    // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits.
    mov(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 32, 32);
    udiv(EmitSize, Dst, TMP1, Divisor);
    break;
  }
-  case 8: {
+  case IR::OpSize::i64Bit: {
    ARMEmitter::SingleUseForwardLabel Only64Bit {};
    ARMEmitter::SingleUseForwardLabel LongDIVRet {};

@ -1086,8 +1086,8 @@ DEF_OP(LUDiv) {

 DEF_OP(LRem) {
  auto Op = IROp->C<IR::IROp_LRem>();
-  const uint8_t OpSize = IROp->Size;
-  const auto EmitSize = OpSize >= 4 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
+  const auto OpSize = IROp->Size;
+  const auto EmitSize = OpSize >= IR::OpSize::i32Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  const auto Dst = GetReg(Node);
  const auto Upper = GetReg(Op->Upper.ID());
@ -1097,7 +1097,7 @@ DEF_OP(LRem) {
  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
  switch (OpSize) {
-  case 2: {
+  case IR::OpSize::i16Bit: {
    uxth(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 16, 16);
    sxth(EmitSize, TMP2, Divisor);
@ -1105,7 +1105,7 @@ DEF_OP(LRem) {
    msub(EmitSize, Dst, TMP3, TMP2, TMP1);
    break;
  }
-  case 4: {
+  case IR::OpSize::i32Bit: {
    // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits.
    mov(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 32, 32);
@ -1114,7 +1114,7 @@ DEF_OP(LRem) {
    msub(EmitSize, Dst, TMP2, TMP3, TMP1);
    break;
  }
-  case 8: {
+  case IR::OpSize::i64Bit: {
    ARMEmitter::SingleUseForwardLabel Only64Bit {};
    ARMEmitter::SingleUseForwardLabel LongDIVRet {};

@ -1160,8 +1160,8 @@ DEF_OP(LRem) {

 DEF_OP(LURem) {
  auto Op = IROp->C<IR::IROp_LURem>();
-  const uint8_t OpSize = IROp->Size;
-  const auto EmitSize = OpSize >= 4 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
+  const auto OpSize = IROp->Size;
+  const auto EmitSize = OpSize >= IR::OpSize::i32Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  const auto Dst = GetReg(Node);
  const auto Upper = GetReg(Op->Upper.ID());
@ -1171,14 +1171,14 @@ DEF_OP(LURem) {
  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
  switch (OpSize) {
-  case 2: {
+  case IR::OpSize::i16Bit: {
    uxth(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 16, 16);
    udiv(EmitSize, TMP2, TMP1, Divisor);
    msub(EmitSize, Dst, TMP2, Divisor, TMP1);
    break;
  }
-  case 4: {
+  case IR::OpSize::i32Bit: {
    // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits.
    mov(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 32, 32);
@ -1186,7 +1186,7 @@ DEF_OP(LURem) {
    msub(EmitSize, Dst, TMP2, Divisor, TMP1);
    break;
  }
-  case 8: {
+  case IR::OpSize::i64Bit: {
    ARMEmitter::SingleUseForwardLabel Only64Bit {};
    ARMEmitter::SingleUseForwardLabel LongDIVRet {};

@ -1238,30 +1238,30 @@ DEF_OP(Not) {

 DEF_OP(Popcount) {
  auto Op = IROp->C<IR::IROp_Popcount>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src.ID());

  switch (OpSize) {
-  case 0x1:
+  case IR::OpSize::i8Bit:
    fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
    // only use lowest byte
    cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
    break;
-  case 0x2:
+  case IR::OpSize::i16Bit:
    fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
    cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
    // only count two lowest bytes
    addp(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D(), VTMP1.D());
    break;
-  case 0x4:
+  case IR::OpSize::i32Bit:
    fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
    cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
    // fmov has zero extended, unused bytes are zero
    addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
    break;
-  case 0x8:
+  case IR::OpSize::i64Bit:
    fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), Src);
    cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
    // fmov has zero extended, unused bytes are zero
@ -1288,17 +1288,18 @@ DEF_OP(FindLSB) {

 DEF_OP(FindMSB) {
  auto Op = IROp->C<IR::IROp_FindMSB>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

-  LOGMAN_THROW_AA_FMT(OpSize == 2 || OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
+                      "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src.ID());

-  movz(ARMEmitter::Size::i64Bit, TMP1, OpSize * 8 - 1);
+  movz(ARMEmitter::Size::i64Bit, TMP1, IR::OpSizeAsBits(OpSize) - 1);

-  if (OpSize == 2) {
+  if (OpSize == IR::OpSize::i16Bit) {
    lsl(EmitSize, Dst, Src, 16);
    clz(EmitSize, Dst, Dst);
  } else {
@ -1310,9 +1311,10 @@ DEF_OP(FindMSB) {

 DEF_OP(FindTrailingZeroes) {
  auto Op = IROp->C<IR::IROp_FindTrailingZeroes>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

-  LOGMAN_THROW_AA_FMT(OpSize == 2 || OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
+                      "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
@ -1320,7 +1322,7 @@ DEF_OP(FindTrailingZeroes) {

  rbit(EmitSize, Dst, Src);

-  if (OpSize == 2) {
+  if (OpSize == IR::OpSize::i16Bit) {
    // This orr does two things. First, if the (masked) source is zero, it
    // reverses to zero in the top so it forces clz to return 16. Second, it
    // ensures garbage in the upper bits of the source don't affect clz, because
@ -1334,15 +1336,16 @@ DEF_OP(FindTrailingZeroes) {

 DEF_OP(CountLeadingZeroes) {
  auto Op = IROp->C<IR::IROp_CountLeadingZeroes>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

-  LOGMAN_THROW_AA_FMT(OpSize == 2 || OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
+                      "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src.ID());

-  if (OpSize == 2) {
+  if (OpSize == IR::OpSize::i16Bit) {
    // Expressing as lsl+orr+clz clears away any garbage in the upper bits
    // (alternatively could do uxth+clz+sub.. equal cost in total).
    lsl(EmitSize, Dst, Src, 16);
@ -1355,16 +1358,17 @@ DEF_OP(CountLeadingZeroes) {

 DEF_OP(Rev) {
  auto Op = IROp->C<IR::IROp_Rev>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;

-  LOGMAN_THROW_AA_FMT(OpSize == 2 || OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
+                      "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src.ID());

  rev(EmitSize, Dst, Src);
-  if (OpSize == 2) {
+  if (OpSize == IR::OpSize::i16Bit) {
    lsr(EmitSize, Dst, Dst, 16);
  }
 }
@ -1390,10 +1394,10 @@ DEF_OP(Bfi) {
    mov(EmitSize, TMP1, SrcDst);
    bfi(EmitSize, TMP1, Src, Op->lsb, Op->Width);

-    if (IROp->Size >= 4) {
+    if (IROp->Size >= IR::OpSize::i32Bit) {
      mov(EmitSize, Dst, TMP1.R());
    } else {
-      ubfx(EmitSize, Dst, TMP1, 0, IROp->Size * 8);
+      ubfx(EmitSize, Dst, TMP1, 0, IR::OpSizeAsBits(IROp->Size));
    }
  }
 }
@ -1424,7 +1428,7 @@ DEF_OP(Bfxil) {

 DEF_OP(Bfe) {
  auto Op = IROp->C<IR::IROp_Bfe>();
-  LOGMAN_THROW_AA_FMT(IROp->Size <= 8, "OpSize is too large for BFE: {}", IROp->Size);
+  LOGMAN_THROW_AA_FMT(IROp->Size <= IR::OpSize::i64Bit, "OpSize is too large for BFE: {}", IROp->Size);
  LOGMAN_THROW_AA_FMT(Op->Width != 0, "Invalid BFE width of 0");
  const auto EmitSize = ConvertSize(IROp);

@ -1434,7 +1438,7 @@ DEF_OP(Bfe) {
  if (Op->lsb == 0 && Op->Width == 32) {
    mov(ARMEmitter::Size::i32Bit, Dst, Src);
  } else if (Op->lsb == 0 && Op->Width == 64) {
-    LOGMAN_THROW_AA_FMT(IROp->Size == 8, "Must be 64-bit wide register");
+    LOGMAN_THROW_AA_FMT(IROp->Size == IR::OpSize::i64Bit, "Must be 64-bit wide register");
    mov(ARMEmitter::Size::i64Bit, Dst, Src);
  } else {
    ubfx(EmitSize, Dst, Src, Op->lsb, Op->Width);
@ -1451,7 +1455,7 @@ DEF_OP(Sbfe) {

 DEF_OP(Select) {
  auto Op = IROp->C<IR::IROp_Select>();
-  const uint8_t OpSize = IROp->Size;
+  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);
  const auto CompareEmitSize = Op->CompareSize == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

@ -1479,7 +1483,7 @@ DEF_OP(Select) {
  bool is_const_true = IsInlineConstant(Op->TrueVal, &const_true);
  bool is_const_false = IsInlineConstant(Op->FalseVal, &const_false);

-  uint64_t all_ones = OpSize == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;
+  uint64_t all_ones = OpSize == IR::OpSize::i64Bit ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;

  ARMEmitter::Register Dst = GetReg(Node);

@ -1508,7 +1512,7 @@ DEF_OP(NZCVSelect) {
  bool is_const_true = IsInlineConstant(Op->TrueVal, &const_true);
  bool is_const_false = IsInlineConstant(Op->FalseVal, &const_false);

-  uint64_t all_ones = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;
+  uint64_t all_ones = IROp->Size == IR::OpSize::i64Bit ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;

  ARMEmitter::Register Dst = GetReg(Node);

@ -1547,7 +1551,7 @@ DEF_OP(VExtractToGPR) {

  constexpr auto AVXRegBitSize = Core::CPUState::XMM_AVX_REG_SIZE * 8;
  constexpr auto SSERegBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8;
-  const auto ElementSizeBits = Op->Header.ElementSize * 8;
+  const auto ElementSizeBits = IR::OpSizeAsBits(Op->Header.ElementSize);

  const auto Offset = ElementSizeBits * Op->Index;
  const auto Is256Bit = Offset >= SSERegBitSize;
@ -1558,10 +1562,10 @@ DEF_OP(VExtractToGPR) {

  const auto PerformMove = [&](const ARMEmitter::VRegister reg, int index) {
    switch (OpSize) {
-    case 1: umov<ARMEmitter::SubRegSize::i8Bit>(Dst, Vector, index); break;
-    case 2: umov<ARMEmitter::SubRegSize::i16Bit>(Dst, Vector, index); break;
-    case 4: umov<ARMEmitter::SubRegSize::i32Bit>(Dst, Vector, index); break;
-    case 8: umov<ARMEmitter::SubRegSize::i64Bit>(Dst, Vector, index); break;
+    case IR::OpSize::i8Bit: umov<ARMEmitter::SubRegSize::i8Bit>(Dst, Vector, index); break;
+    case IR::OpSize::i16Bit: umov<ARMEmitter::SubRegSize::i16Bit>(Dst, Vector, index); break;
+    case IR::OpSize::i32Bit: umov<ARMEmitter::SubRegSize::i32Bit>(Dst, Vector, index); break;
+    case IR::OpSize::i64Bit: umov<ARMEmitter::SubRegSize::i64Bit>(Dst, Vector, index); break;
    default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", OpSize); break;
    }
  };
@ -1586,10 +1590,10 @@ DEF_OP(VExtractToGPR) {
    // upper half of the vector.
    const auto SanitizedIndex = [OpSize, Op] {
      switch (OpSize) {
-      case 1: return Op->Index - 16;
-      case 2: return Op->Index - 8;
-      case 4: return Op->Index - 4;
-      case 8: return Op->Index - 2;
+      case IR::OpSize::i8Bit: return Op->Index - 16;
+      case IR::OpSize::i16Bit: return Op->Index - 8;
+      case IR::OpSize::i32Bit: return Op->Index - 4;
+      case IR::OpSize::i64Bit: return Op->Index - 2;
      default: LOGMAN_MSG_A_FMT("Unhandled OpSize: {}", OpSize); return 0;
      }
    }();
--- a/FEXCore/Source/Interface/Core/JIT/ConversionOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/ConversionOps.cpp
@ -15,18 +15,18 @@ DEF_OP(VInsGPR) {

  const auto DestIdx = Op->DestIdx;
  const auto ElementSize = Op->Header.ElementSize;
-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubEmitSize = ConvertSubRegSize8(IROp);
-  const auto ElementsPer128Bit = 16 / ElementSize;
+  const auto ElementsPer128Bit = IR::NumElements(IR::OpSize::i128Bit, ElementSize);

  const auto Dst = GetVReg(Node);
  const auto DestVector = GetVReg(Op->DestVector.ID());
  const auto Src = GetReg(Op->Src.ID());

  if (HostSupportsSVE256 && Is256Bit) {
-    const auto ElementSizeBits = ElementSize * 8;
+    const auto ElementSizeBits = IR::OpSizeAsBits(ElementSize);
    const auto Offset = ElementSizeBits * DestIdx;

    const auto SSEBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8;
@ -90,16 +90,16 @@ DEF_OP(VCastFromGPR) {
  auto Src = GetReg(Op->Src.ID());

  switch (Op->Header.ElementSize) {
-  case 1:
+  case IR::OpSize::i8Bit:
    uxtb(ARMEmitter::Size::i32Bit, TMP1, Src);
    fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1);
    break;
-  case 2:
+  case IR::OpSize::i16Bit:
    uxth(ARMEmitter::Size::i32Bit, TMP1, Src);
    fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1);
    break;
-  case 4: fmov(ARMEmitter::Size::i32Bit, Dst.S(), Src); break;
-  case 8: fmov(ARMEmitter::Size::i64Bit, Dst.D(), Src); break;
+  case IR::OpSize::i32Bit: fmov(ARMEmitter::Size::i32Bit, Dst.S(), Src); break;
+  case IR::OpSize::i64Bit: fmov(ARMEmitter::Size::i64Bit, Dst.D(), Src); break;
  default: LOGMAN_MSG_A_FMT("Unknown castGPR element size: {}", Op->Header.ElementSize);
  }
 }
@ -111,7 +111,7 @@ DEF_OP(VDupFromGPR) {
  const auto Dst = GetVReg(Node);
  const auto Src = GetReg(Op->Src.ID());

-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubEmitSize = ConvertSubRegSize8(IROp);
@ -126,7 +126,7 @@ DEF_OP(VDupFromGPR) {
 DEF_OP(Float_FromGPR_S) {
  const auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();

-  const uint16_t ElementSize = Op->Header.ElementSize;
+  const uint16_t ElementSize = IR::OpSizeToSize(Op->Header.ElementSize);
  const uint16_t Conv = (ElementSize << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  auto Dst = GetVReg(Node);
@ -165,7 +165,7 @@ DEF_OP(Float_FromGPR_S) {

 DEF_OP(Float_FToF) {
  auto Op = IROp->C<IR::IROp_Float_FToF>();
-  const uint16_t Conv = (Op->Header.ElementSize << 8) | IR::OpSizeToSize(Op->SrcElementSize);
+  const uint16_t Conv = (IR::OpSizeToSize(Op->Header.ElementSize) << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  auto Dst = GetVReg(Node);
  auto Src = GetVReg(Op->Scalar.ID());
@ -205,7 +205,7 @@ DEF_OP(Vector_SToF) {

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
@ -215,15 +215,15 @@ DEF_OP(Vector_SToF) {
    scvtf(Dst.Z(), SubEmitSize, Mask.Merging(), Vector.Z(), SubEmitSize);
  } else {
    if (OpSize == ElementSize) {
-      if (ElementSize == 8) {
+      if (ElementSize == IR::OpSize::i64Bit) {
        scvtf(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Vector.D());
-      } else if (ElementSize == 4) {
+      } else if (ElementSize == IR::OpSize::i32Bit) {
        scvtf(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Vector.S());
      } else {
        scvtf(ARMEmitter::ScalarRegSize::i16Bit, Dst.H(), Vector.H());
      }
    } else {
-      if (OpSize == 8) {
+      if (OpSize == IR::OpSize::i64Bit) {
        scvtf(SubEmitSize, Dst.D(), Vector.D());
      } else {
        scvtf(SubEmitSize, Dst.Q(), Vector.Q());
@ -238,7 +238,7 @@ DEF_OP(Vector_FToZS) {

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
@ -248,15 +248,15 @@ DEF_OP(Vector_FToZS) {
    fcvtzs(Dst.Z(), SubEmitSize, Mask.Merging(), Vector.Z(), SubEmitSize);
  } else {
    if (OpSize == ElementSize) {
-      if (ElementSize == 8) {
+      if (ElementSize == IR::OpSize::i64Bit) {
        fcvtzs(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Vector.D());
-      } else if (ElementSize == 4) {
+      } else if (ElementSize == IR::OpSize::i32Bit) {
        fcvtzs(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Vector.S());
      } else {
        fcvtzs(ARMEmitter::ScalarRegSize::i16Bit, Dst.H(), Vector.H());
      }
    } else {
-      if (OpSize == 8) {
+      if (OpSize == IR::OpSize::i64Bit) {
        fcvtzs(SubEmitSize, Dst.D(), Vector.D());
      } else {
        fcvtzs(SubEmitSize, Dst.Q(), Vector.Q());
@ -269,7 +269,7 @@ DEF_OP(Vector_FToS) {
  const auto Op = IROp->C<IR::IROp_Vector_FToS>();
  const auto OpSize = IROp->Size;

-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubEmitSize = ConvertSubRegSize248(IROp);
@ -284,7 +284,7 @@ DEF_OP(Vector_FToS) {
  } else {
    const auto Dst = GetVReg(Node);
    const auto Vector = GetVReg(Op->Vector.ID());
-    if (OpSize == 8) {
+    if (OpSize == IR::OpSize::i64Bit) {
      frinti(SubEmitSize, Dst.D(), Vector.D());
      fcvtzs(SubEmitSize, Dst.D(), Dst.D());
    } else {
@ -300,10 +300,10 @@ DEF_OP(Vector_FToF) {

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

-  const auto Conv = (ElementSize << 8) | IR::OpSizeToSize(Op->SrcElementSize);
+  const auto Conv = (IR::OpSizeToSize(ElementSize) << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector.ID());
@ -403,7 +403,7 @@ DEF_OP(Vector_FToI) {

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
@ -427,15 +427,15 @@ DEF_OP(Vector_FToI) {
 // frinti having AdvSIMD, AdvSIMD scalar, and an SVE version),
 // we can't just use a lambda without some seriously ugly casting.
 // This is fairly self-contained otherwise.
-#define ROUNDING_FN(name)        \
-  if (ElementSize == 2) {        \
-    name(Dst.H(), Vector.H());   \
-  } else if (ElementSize == 4) { \
-    name(Dst.S(), Vector.S());   \
-  } else if (ElementSize == 8) { \
-    name(Dst.D(), Vector.D());   \
-  } else {                       \
-    FEX_UNREACHABLE;             \
+#define ROUNDING_FN(name)                         \
+  if (ElementSize == IR::OpSize::i16Bit) {        \
+    name(Dst.H(), Vector.H());                    \
+  } else if (ElementSize == IR::OpSize::i32Bit) { \
+    name(Dst.S(), Vector.S());                    \
+  } else if (ElementSize == IR::OpSize::i64Bit) { \
+    name(Dst.D(), Vector.D());                    \
+  } else {                                        \
+    FEX_UNREACHABLE;                              \
  }

      switch (Op->Round) {
@ -464,7 +464,7 @@ DEF_OP(Vector_F64ToI32) {
  const auto OpSize = IROp->Size;
  const auto Round = Op->Round;

-  const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || (Is256Bit && HostSupportsSVE256), "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
--- a/FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp
@ -24,7 +24,7 @@ DEF_OP(VAESEnc) {
  const auto State = GetVReg(Op->State.ID());
  const auto ZeroReg = GetVReg(Op->ZeroReg.ID());

-  LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations.");
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
@ -49,7 +49,7 @@ DEF_OP(VAESEncLast) {
  const auto State = GetVReg(Op->State.ID());
  const auto ZeroReg = GetVReg(Op->ZeroReg.ID());

-  LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations.");
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
@ -72,7 +72,7 @@ DEF_OP(VAESDec) {
  const auto State = GetVReg(Op->State.ID());
  const auto ZeroReg = GetVReg(Op->ZeroReg.ID());

-  LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations.");
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
@ -97,7 +97,7 @@ DEF_OP(VAESDecLast) {
  const auto State = GetVReg(Op->State.ID());
  const auto ZeroReg = GetVReg(Op->ZeroReg.ID());

-  LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations.");
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
@ -193,7 +193,7 @@ DEF_OP(PCLMUL) {
  const auto Src1 = GetVReg(Op->Src1.ID());
  const auto Src2 = GetVReg(Op->Src2.ID());

-  LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations.");
+  LOGMAN_THROW_AA_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  switch (Op->Selector) {
  case 0b00000000: pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), Src1.D(), Src2.D()); break;
--- a/FEXCore/Source/Interface/Core/JIT/JITClass.h
+++ b/FEXCore/Source/Interface/Core/JIT/JITClass.h
@ -228,7 +228,7 @@ private:
  bool IsGPR(IR::NodeID Node) const;

  [[nodiscard]]
-  ARMEmitter::ExtendedMemOperand GenerateMemOperand(uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
+  ARMEmitter::ExtendedMemOperand GenerateMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
                                                    IR::MemOffsetType OffsetType, uint8_t OffsetScale);

  // NOTE: Will use TMP1 as a way to encode immediates that happen to fall outside
@ -237,7 +237,7 @@ private:
  //       TMP1 is safe to use again once this memory operand is used with its
  //       equivalent loads or stores that this was called for.
  [[nodiscard]]
-  ARMEmitter::SVEMemOperand GenerateSVEMemOperand(uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
+  ARMEmitter::SVEMemOperand GenerateSVEMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
                                                  IR::MemOffsetType OffsetType, uint8_t OffsetScale);

  [[nodiscard]]
@ -318,15 +318,16 @@ private:

  using ScalarFMAOpCaller =
    std::function<void(ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2, ARMEmitter::VRegister Src3)>;
-  void VFScalarFMAOperation(uint8_t OpSize, uint8_t ElementSize, ScalarFMAOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
+  void VFScalarFMAOperation(IR::OpSize OpSize, IR::OpSize ElementSize, ScalarFMAOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
                            ARMEmitter::VRegister Upper, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2,
                            ARMEmitter::VRegister Addend);
  using ScalarBinaryOpCaller = std::function<void(ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2)>;
-  void VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit,
+  void VFScalarOperation(IR::OpSize OpSize, IR::OpSize ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit,
                         ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2);
  using ScalarUnaryOpCaller = std::function<void(ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar)>;
-  void VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
-                              ARMEmitter::VRegister Vector1, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> Vector2);
+  void VFScalarUnaryOperation(IR::OpSize OpSize, IR::OpSize ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit,
+                              ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1,
+                              std::variant<ARMEmitter::VRegister, ARMEmitter::Register> Vector2);

  void Emulate128BitGather(IR::OpSize Size, IR::OpSize ElementSize, ARMEmitter::VRegister Dst, ARMEmitter::VRegister IncomingDst,
                           std::optional<ARMEmitter::Register> BaseAddr, ARMEmitter::VRegister VectorIndexLow,
--- a/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp
--- a/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp
+++ b/FEXCore/Source/Interface/Core/JIT/VectorOps.cpp
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
@ -168,7 +168,7 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) {

  if (Op->OP == 0xC2) {
    auto Offset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags);
-    SP = _Add(IR::SizeToOpSize(GPRSize), SP, Offset);
+    SP = _Add(GPRSize, SP, Offset);
  }

  // Store the new stack pointer
@ -297,7 +297,7 @@ void OpDispatchBuilder::ADCOp(OpcodeArgs, uint32_t SrcIndex) {
    HandledLock = true;

    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
-    Before = _AtomicFetchAdd(IR::SizeToOpSize(Size), ALUOp, DestMem);
+    Before = _AtomicFetchAdd(Size, ALUOp, DestMem);
  } else {
    Before = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  }
@ -334,7 +334,7 @@ void OpDispatchBuilder::SBBOp(OpcodeArgs, uint32_t SrcIndex) {

    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    auto SrcPlusCF = IncrementByCarry(OpSize, Src);
-    Before = _AtomicFetchSub(IR::SizeToOpSize(Size), SrcPlusCF, DestMem);
+    Before = _AtomicFetchSub(Size, SrcPlusCF, DestMem);
  } else {
    Before = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  }
@ -494,7 +494,7 @@ void OpDispatchBuilder::POPAOp(OpcodeArgs) {
  StoreGPRRegister(X86State::REG_RBP, Pop(Size, SP), Size);

  // Skip loading RSP because it'll be correct at the end
-  SP = _RMWHandle(_Add(OpSize::i64Bit, SP, _InlineConstant(Size)));
+  SP = _RMWHandle(_Add(OpSize::i64Bit, SP, _InlineConstant(IR::OpSizeToSize(Size))));

  StoreGPRRegister(X86State::REG_RBX, Pop(Size, SP), Size);
  StoreGPRRegister(X86State::REG_RDX, Pop(Size, SP), Size);
@ -567,7 +567,7 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) {
  uint64_t InstRIP = Op->PC + Op->InstSize;
  uint64_t TargetRIP = InstRIP + TargetOffset;

-  Ref NewRIP = _Add(IR::SizeToOpSize(GPRSize), ConstantPC, _Constant(TargetOffset));
+  Ref NewRIP = _Add(GPRSize, ConstantPC, _Constant(TargetOffset));

  // Push the return address.
  Push(GPRSize, ConstantPC);
@ -715,7 +715,7 @@ void OpDispatchBuilder::CMOVOp(OpcodeArgs) {
    Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags);
  }

-  auto SrcCond = SelectCC(Op->OP & 0xF, IR::SizeToOpSize(std::max<uint8_t>(OpSize::i32Bit, GetSrcSize(Op))), Src, Dest);
+  auto SrcCond = SelectCC(Op->OP & 0xF, std::max(OpSize::i32Bit, OpSizeFromSrc(Op)), Src, Dest);

  StoreResult(GPRClass, Op, SrcCond, OpSize::iInvalid);
 }
@ -731,7 +731,7 @@ void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) {
  uint64_t InstRIP = Op->PC + Op->InstSize;
  uint64_t Target = InstRIP + TargetOffset;

-  if (CTX->GetGPRSize() == OpSize::i32Bit) {
+  if (CTX->GetGPROpSize() == OpSize::i32Bit) {
    // If the GPRSize is 4 then we need to be careful about PC wrapping
    if (TargetOffset < 0 && -TargetOffset > InstRIP) {
      // Invert the signed value if we are underflowing
@ -802,7 +802,7 @@ void OpDispatchBuilder::CondJUMPRCXOp(OpcodeArgs) {

  BlockSetRIP = true;
  auto JcxGPRSize = CTX->GetGPROpSize();
-  JcxGPRSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? (IR::DivideOpSize(JcxGPRSize, 2)) : JcxGPRSize;
+  JcxGPRSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? (JcxGPRSize >> 1) : JcxGPRSize;

  uint64_t Target = Op->PC + Op->InstSize + Op->Src[0].Literal();

@ -937,7 +937,7 @@ void OpDispatchBuilder::JUMPOp(OpcodeArgs) {
  uint64_t InstRIP = Op->PC + Op->InstSize;
  uint64_t TargetRIP = InstRIP + TargetOffset;

-  if (CTX->GetGPRSize() == OpSize::i32Bit) {
+  if (CTX->GetGPROpSize() == OpSize::i32Bit) {
    // If the GPRSize is 4 then we need to be careful about PC wrapping
    if (TargetOffset < 0 && -TargetOffset > InstRIP) {
      // Invert the signed value if we are underflowing
@ -1000,18 +1000,18 @@ void OpDispatchBuilder::TESTOp(OpcodeArgs, uint32_t SrcIndex) {
  Ref Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
  Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

-  auto Size = GetDstSize(Op);
+  const auto Size = OpSizeFromDst(Op);

  uint64_t Const;
  bool AlwaysNonnegative = false;
  if (IsValueConstant(WrapNode(Src), &Const)) {
    // Optimize out masking constants
-    if (Const == (Size == OpSize::i64Bit ? ~0ULL : ((1ull << Size * 8) - 1))) {
+    if (Const == (Size == OpSize::i64Bit ? ~0ULL : ((1ull << IR::OpSizeAsBits(Size)) - 1))) {
      Src = Dest;
    }

    // Optimize test with non-sign bits
-    AlwaysNonnegative = (Const & (1ull << ((Size * 8) - 1))) == 0;
+    AlwaysNonnegative = (Const & (1ull << (IR::OpSizeAsBits(Size) - 1))) == 0;
  }

  if (Dest == Src) {
@ -1024,7 +1024,7 @@ void OpDispatchBuilder::TESTOp(OpcodeArgs, uint32_t SrcIndex) {
    SetNZ_ZeroCV(OpSize::i32Bit, Res);
  } else {
    HandleNZ00Write();
-    CalculatePF(_AndWithFlags(IR::SizeToOpSize(Size), Dest, Src));
+    CalculatePF(_AndWithFlags(Size, Dest, Src));
  }

  InvalidateAF();
@ -1049,7 +1049,7 @@ void OpDispatchBuilder::MOVSXDOp(OpcodeArgs) {
    StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, Size, OpSize::iInvalid);
  } else if (Sext) {
    // With REX.W then Sext
-    Src = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src);
+    Src = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(Size), 0, Src);
    StoreResult(GPRClass, Op, Src, OpSize::iInvalid);
  } else {
    // Without REX.W then Zext (store result implicitly zero extends)
@ -1059,13 +1059,13 @@ void OpDispatchBuilder::MOVSXDOp(OpcodeArgs) {

 void OpDispatchBuilder::MOVSXOp(OpcodeArgs) {
  // Load garbage in upper bits, since we're sign extending anyway
-  uint8_t Size = GetSrcSize(Op);
+  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  // Sign-extend to DstSize and zero-extend to the register size, using a fast
  // path for 32-bit dests where the native 32-bit Sbfe zero extends the top.
-  uint8_t DstSize = GetDstSize(Op);
-  Src = _Sbfe(DstSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, Size * 8, 0, Src);
+  const auto DstSize = OpSizeFromDst(Op);
+  Src = _Sbfe(DstSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, Src);
  StoreResult(GPRClass, Op, Op->Dest, Src, OpSize::iInvalid);
 }

@ -1134,10 +1134,10 @@ void OpDispatchBuilder::XCHGOp(OpcodeArgs) {

 void OpDispatchBuilder::CDQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(DstSize) >> 1);
+  const auto SrcSize = DstSize / 2;
  Ref Src = LoadGPRRegister(X86State::REG_RAX, SrcSize, 0, true);

-  Src = _Sbfe(DstSize <= OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, SrcSize * 8, 0, Src);
+  Src = _Sbfe(DstSize <= OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, IR::OpSizeAsBits(SrcSize), 0, Src);

  StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, OpSize::iInvalid);
 }
@ -1374,7 +1374,7 @@ void OpDispatchBuilder::XGetBVOp(OpcodeArgs) {
 }

 void OpDispatchBuilder::SHLOp(OpcodeArgs) {
-  const auto Size = GetSrcSize(Op);
+  const auto Size = OpSizeFromSrc(Op);
  auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

@ -1398,7 +1398,7 @@ void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs, bool SHL1Bit) {

 void OpDispatchBuilder::SHROp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
-  auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 4});
+  auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= OpSize::i32Bit});
  auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  auto ALUOp = _Lshr(std::max(OpSize::i32Bit, Size), Dest, Src);
@ -1557,29 +1557,29 @@ void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) {
 }

 void OpDispatchBuilder::ASHROp(OpcodeArgs, bool Immediate, bool SHR1Bit) {
-  const auto Size = GetSrcSize(Op);
+  const auto Size = OpSizeFromSrc(Op);
  const auto OpSize = std::max(OpSize::i32Bit, OpSizeFromDst(Op));

  // If Size < 4, then we Sbfe the Dest so we can have garbage.
  // Otherwise, if Size = Opsize, then both are 4 or 8 and match the a64
  // semantics directly, so again we can have garbage. The only case where we
  // need zero-extension here is when the sizes mismatch.
-  auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = (OpSize == Size) || (Size < 4)});
+  auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = (OpSize == Size) || (Size < OpSize::i32Bit)});

  if (Size < OpSize::i32Bit) {
-    Dest = _Sbfe(OpSize::i64Bit, Size * 8, 0, Dest);
+    Dest = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(Size), 0, Dest);
  }

  if (Immediate) {
    uint64_t Shift = LoadConstantShift(Op, SHR1Bit);
-    Ref Result = _Ashr(IR::SizeToOpSize(OpSize), Dest, _Constant(Shift));
+    Ref Result = _Ashr(OpSize, Dest, _Constant(Shift));

    CalculateFlags_SignShiftRightImmediate(OpSizeFromSrc(Op), Result, Dest, Shift);
    CalculateDeferredFlags();
    StoreResult(GPRClass, Op, Result, OpSize::iInvalid);
  } else {
    auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
-    Ref Result = _Ashr(IR::SizeToOpSize(OpSize), Dest, Src);
+    Ref Result = _Ashr(OpSize, Dest, Src);

    HandleShift(Op, Result, Dest, ShiftType::ASR, Src);
  }
@ -1660,12 +1660,12 @@ void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) {
  // Essentially (Src1 >> Start) & ((1 << Length) - 1)
  // along with some edge-case handling and flag setting.

-  LOGMAN_THROW_A_FMT(Op->InstSize >= OpSize::i32Bit, "No masking needed");
+  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  auto* Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromSrc(Op);
-  const auto SrcSize = Size * 8;
+  const auto SrcSize = IR::OpSizeAsBits(Size);
  const auto MaxSrcBit = SrcSize - 1;
  auto MaxSrcBitOp = _Constant(Size, MaxSrcBit);

@ -1701,8 +1701,8 @@ void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) {

 void OpDispatchBuilder::BLSIBMIOp(OpcodeArgs) {
  // Equivalent to performing: SRC & -SRC
-  LOGMAN_THROW_A_FMT(Op->InstSize >= OpSize::i32Bit, "No masking needed");
-  auto Size = OpSizeFromSrc(Op);
+  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
+  const auto Size = OpSizeFromSrc(Op);

  auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto NegatedSrc = _Neg(Size, Src);
@ -1715,15 +1715,15 @@ void OpDispatchBuilder::BLSIBMIOp(OpcodeArgs) {
  // inverted ZF.
  //
  // ZF/SF/OF set as usual.
-  SetNZ_ZeroCV(GetSrcSize(Op), Result);
+  SetNZ_ZeroCV(Size, Result);
  InvalidatePF_AF();
  SetCFInverted(GetRFLAG(X86State::RFLAG_ZF_RAW_LOC));
 }

 void OpDispatchBuilder::BLSMSKBMIOp(OpcodeArgs) {
  // Equivalent to: (Src - 1) ^ Src
-  LOGMAN_THROW_A_FMT(Op->InstSize >= OpSize::i32Bit, "No masking needed");
-  auto Size = OpSizeFromSrc(Op);
+  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
+  const auto Size = OpSizeFromSrc(Op);

  auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _Xor(Size, _Sub(Size, Src, _InlineConstant(1)), Src);
@ -1738,24 +1738,25 @@ void OpDispatchBuilder::BLSMSKBMIOp(OpcodeArgs) {

  // The output of BLSMSK is always nonzero, so TST will clear Z (along with C
  // and O) while setting S.
-  SetNZ_ZeroCV(GetSrcSize(Op), Result);
+  SetNZ_ZeroCV(Size, Result);
  SetCFInverted(CFInv);
 }

 void OpDispatchBuilder::BLSRBMIOp(OpcodeArgs) {
  // Equivalent to: (Src - 1) & Src
-  LOGMAN_THROW_A_FMT(Op->InstSize >= OpSize::i32Bit, "No masking needed");
-  auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
-  auto Size = OpSizeFromSrc(Op);
+  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
+  const auto Size = OpSizeFromSrc(Op);

+  auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _And(Size, _Sub(Size, Src, _InlineConstant(1)), Src);
+
  StoreResult(GPRClass, Op, Result, OpSize::iInvalid);

  auto Zero = _Constant(0);
  auto One = _Constant(1);
  auto CFInv = _Select(IR::COND_NEQ, Src, Zero, One, Zero);

-  SetNZ_ZeroCV(GetSrcSize(Op), Result);
+  SetNZ_ZeroCV(Size, Result);
  SetCFInverted(CFInv);
  InvalidatePF_AF();
 }
@ -1774,13 +1775,13 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {
  Ref Result;
  if (Op->OP == 0x6F7) {
    // SARX
-    Result = _Ashr(IR::SizeToOpSize(Size), Src, Shift);
+    Result = _Ashr(Size, Src, Shift);
  } else if (Op->OP == 0x5F7) {
    // SHLX
-    Result = _Lshl(IR::SizeToOpSize(Size), Src, Shift);
+    Result = _Lshl(Size, Src, Shift);
  } else {
    // SHRX
-    Result = _Lshr(IR::SizeToOpSize(Size), Src, Shift);
+    Result = _Lshr(Size, Src, Shift);
  }

  StoreResult(GPRClass, Op, Result, OpSize::iInvalid);
@ -1788,7 +1789,7 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {

 void OpDispatchBuilder::BZHI(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
-  const auto OperandSize = Size * 8;
+  const auto OperandSize = IR::OpSizeAsBits(Size);

  // In 32-bit mode we only look at bottom 32-bit, no 8 or 16-bit BZHI so no
  // need to zero-extend sources
@ -1853,13 +1854,12 @@ void OpDispatchBuilder::RORX(OpcodeArgs) {

 void OpDispatchBuilder::MULX(OpcodeArgs) {
  // RDX is the implied source operand in the instruction
-  const auto OperandSize = OpSizeFromSrc(Op);
-  const auto OpSize = IR::SizeToOpSize(OperandSize);
+  const auto OpSize = OpSizeFromSrc(Op);

  // Src1 can be a memory operand, so ensure we constrain to the
  // absolute width of the access in that scenario.
  const auto GPRSize = CTX->GetGPROpSize();
-  const auto Src1Size = Op->Src[1].IsGPR() ? GPRSize : OperandSize;
+  const auto Src1Size = Op->Src[1].IsGPR() ? GPRSize : OpSize;

  Ref Src1 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], Src1Size, Op->Flags);
  Ref Src2 = LoadGPRRegister(X86State::REG_RDX, GPRSize);
@ -1880,7 +1880,7 @@ void OpDispatchBuilder::MULX(OpcodeArgs) {
 }

 void OpDispatchBuilder::PDEP(OpcodeArgs) {
-  LOGMAN_THROW_A_FMT(Op->InstSize >= OpSize::i32Bit, "No masking needed");
+  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  auto* Input = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Mask = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _PDep(OpSizeFromSrc(Op), Input, Mask);
@ -1889,7 +1889,7 @@ void OpDispatchBuilder::PDEP(OpcodeArgs) {
 }

 void OpDispatchBuilder::PEXT(OpcodeArgs) {
-  LOGMAN_THROW_A_FMT(Op->InstSize >= OpSize::i32Bit, "No masking needed");
+  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  auto* Input = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Mask = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _PExt(OpSizeFromSrc(Op), Input, Mask);
@ -2093,7 +2093,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {

    StoreResult(GPRClass, Op, Res, OpSize::iInvalid);
    },
-    GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
+    OpSizeFromSrc(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
 }

 void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) {
@ -2315,7 +2315,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {

    StoreResult(GPRClass, Op, Res, OpSize::iInvalid);
    },
-    GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
+    OpSizeFromSrc(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
 }

 void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) {
@ -2405,7 +2405,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {

    // Get the bit selection from the src. We need to mask for 8/16-bit, but
    // rely on the implicit masking of Lshr for native sizes.
-    unsigned LshrSize = std::max<uint8_t>(OpSize::i32Bit, Size / 8);
+    unsigned LshrSize = std::max<uint8_t>(IR::OpSizeToSize(OpSize::i32Bit), Size / 8);
    auto BitSelect = (Size == (LshrSize * 8)) ? Src : _And(OpSize::i64Bit, Src, _Constant(Mask));

    // OF/SF/ZF/AF/PF undefined.
@ -2458,7 +2458,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
    // Load the address to the memory location
    Ref Dest = MakeSegmentAddress(Op, Op->Dest);
    // Get the bit selection from the src
-    Ref BitSelect = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Src))), 3, 0, Src);
+    Ref BitSelect = _Bfe(std::max(OpSize::i32Bit, GetOpSize(Src)), 3, 0, Src);

    // Address is provided as bits we want BYTE offsets
    // Extract Signed offset
@ -2523,7 +2523,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
    }

    // Now shift in to the correct bit location
-    Value = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Value))), Value, BitSelect);
+    Value = _Lshr(std::max(OpSize::i32Bit, GetOpSize(Value)), Value, BitSelect);

    // OF/SF/ZF/AF/PF undefined.
    SetCFDirect(Value, ConstantShift, true);
@ -2536,21 +2536,22 @@ void OpDispatchBuilder::IMUL1SrcOp(OpcodeArgs) {
  Ref Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromSrc(Op);
+  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Dest {};
  Ref ResultHigh {};
  switch (Size) {
  case OpSize::i8Bit:
  case OpSize::i16Bit: {
-    Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1);
-    Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2);
+    Src1 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src1);
+    Src2 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src2);
    Dest = _Mul(OpSize::i64Bit, Src1, Src2);
-    ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, Dest);
+    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, Dest);
    break;
  }
  case OpSize::i32Bit: {
    ResultHigh = _SMull(Src1, Src2);
-    ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, ResultHigh);
+    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, ResultHigh);
    // Flipped order to save a move
    Dest = _Mul(OpSize::i32Bit, Src1, Src2);
    break;
@ -2573,6 +2574,7 @@ void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) {
  Ref Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromSrc(Op);
+  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Dest {};
  Ref ResultHigh {};
@ -2580,15 +2582,15 @@ void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) {
  switch (Size) {
  case OpSize::i8Bit:
  case OpSize::i16Bit: {
-    Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1);
-    Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2);
+    Src1 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src1);
+    Src2 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src2);
    Dest = _Mul(OpSize::i64Bit, Src1, Src2);
-    ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, Dest);
+    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, Dest);
    break;
  }
  case OpSize::i32Bit: {
    ResultHigh = _SMull(Src1, Src2);
-    ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, ResultHigh);
+    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, ResultHigh);
    // Flipped order to save a move
    Dest = _Mul(OpSize::i32Bit, Src1, Src2);
    break;
@ -2608,13 +2610,14 @@ void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) {

 void OpDispatchBuilder::IMULOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
+  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src2 = LoadGPRRegister(X86State::REG_RAX);

  if (Size != OpSize::i64Bit) {
-    Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1);
-    Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2);
+    Src1 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src1);
+    Src2 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src2);
  }

  // 64-bit special cased to save a move
@ -2659,14 +2662,15 @@ void OpDispatchBuilder::IMULOp(OpcodeArgs) {

 void OpDispatchBuilder::MULOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
+  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src2 = LoadGPRRegister(X86State::REG_RAX);
  Ref Result;

  if (Size != OpSize::i64Bit) {
-    Src1 = _Bfe(OpSize::i64Bit, Size * 8, 0, Src1);
-    Src2 = _Bfe(OpSize::i64Bit, Size * 8, 0, Src2);
+    Src1 = _Bfe(OpSize::i64Bit, SizeBits, 0, Src1);
+    Src2 = _Bfe(OpSize::i64Bit, SizeBits, 0, Src2);
    Result = _UMul(OpSize::i64Bit, Src1, Src2);
  }
  Ref ResultHigh {};
@ -2709,17 +2713,19 @@ void OpDispatchBuilder::MULOp(OpcodeArgs) {

 void OpDispatchBuilder::NOTOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
+  const auto SizeBits = IR::OpSizeAsBits(Size);
+
  Ref MaskConst {};
  if (Size == OpSize::i64Bit) {
    MaskConst = _Constant(~0ULL);
  } else {
-    MaskConst = _Constant((1ULL << (Size * 8)) - 1);
+    MaskConst = _Constant((1ULL << SizeBits) - 1);
  }

  if (DestIsLockedMem(Op)) {
    HandledLock = true;
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
-    _AtomicXor(IR::SizeToOpSize(Size), MaskConst, DestMem);
+    _AtomicXor(Size, MaskConst, DestMem);
  } else if (!Op->Dest.IsGPR()) {
    // GPR version plays fast and loose with sizes, be safe for memory tho.
    Ref Src = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
@ -2742,13 +2748,13 @@ void OpDispatchBuilder::NOTOp(OpcodeArgs) {

    // For 8/16-bit, use 64-bit invert so we invert in place, while getting
    // insert behaviour. For 32-bit, use 32-bit invert to zero the upper bits.
-    unsigned EffectiveSize = Size == OpSize::i32Bit ? OpSize::i32Bit : GPRSize;
+    const auto EffectiveSize = Size == OpSize::i32Bit ? OpSize::i32Bit : GPRSize;

    // If we're inverting the whole thing, use Not instead of Xor to save a constant.
    if (Size >= OpSize::i32Bit) {
-      Src = _Not(IR::SizeToOpSize(EffectiveSize), Src);
+      Src = _Not(EffectiveSize, Src);
    } else {
-      Src = _Xor(IR::SizeToOpSize(EffectiveSize), Src, MaskConst);
+      Src = _Xor(EffectiveSize, Src, MaskConst);
    }

    // Always store 64-bit, the Not/Xor correctly handle the upper bits and this
@ -2816,7 +2822,7 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) {

  // SF, ZF, PF set according to result. CF set per above. OF undefined.
  StoreGPRRegister(X86State::REG_RAX, AL, OpSize::i8Bit);
-  SetNZ_ZeroCV(1, AL);
+  SetNZ_ZeroCV(OpSize::i8Bit, AL);
  SetCFInverted(CFInv);
  CalculatePF(AL);
  SetAFAndFixup(AF);
@ -2842,7 +2848,7 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {

  // SF, ZF, PF set according to result. CF set per above. OF undefined.
  StoreGPRRegister(X86State::REG_RAX, AL, OpSize::i8Bit);
-  SetNZ_ZeroCV(1, AL);
+  SetNZ_ZeroCV(OpSize::i8Bit, AL);
  SetCFDirect(NewCF);
  CalculatePF(AL);
  SetAFAndFixup(AF);
@ -2898,7 +2904,7 @@ void OpDispatchBuilder::AAMOp(OpcodeArgs) {
  auto Res = _AddShift(OpSize::i64Bit, URemOp, UDivOp, ShiftType::LSL, 8);
  StoreGPRRegister(X86State::REG_RAX, Res, OpSize::i16Bit);

-  SetNZ_ZeroCV(1, Res);
+  SetNZ_ZeroCV(OpSize::i8Bit, Res);
  CalculatePF(Res);
  InvalidateAF();
 }
@ -2913,7 +2919,7 @@ void OpDispatchBuilder::AADOp(OpcodeArgs) {
  auto Result = _And(OpSize::i64Bit, NewAL, _Constant(0xFF));
  StoreGPRRegister(X86State::REG_RAX, Result, OpSize::i16Bit);

-  SetNZ_ZeroCV(1, Result);
+  SetNZ_ZeroCV(OpSize::i8Bit, Result);
  CalculatePF(Result);
  InvalidateAF();
 }
@ -2978,14 +2984,14 @@ void OpDispatchBuilder::EnterOp(OpcodeArgs) {

  if (Level > 0) {
    for (uint8_t i = 1; i < Level; ++i) {
-      auto Offset = _Constant(i * GPRSize);
-      auto MemLoc = _Sub(IR::SizeToOpSize(GPRSize), OldBP, Offset);
+      auto Offset = _Constant(i * IR::OpSizeToSize(GPRSize));
+      auto MemLoc = _Sub(GPRSize, OldBP, Offset);
      auto Mem = _LoadMem(GPRClass, GPRSize, MemLoc, GPRSize);
      NewSP = PushValue(GPRSize, Mem);
    }
    NewSP = PushValue(GPRSize, temp_RBP);
  }
-  NewSP = _Sub(IR::SizeToOpSize(GPRSize), NewSP, _Constant(AllocSpace));
+  NewSP = _Sub(GPRSize, NewSP, _Constant(AllocSpace));
  StoreGPRRegister(X86State::REG_RSP, NewSP);
  StoreGPRRegister(X86State::REG_RBP, temp_RBP);
 }
@ -3186,7 +3192,7 @@ void OpDispatchBuilder::STOSOp(OpcodeArgs) {

    // Offset the pointer
    Ref TailDest = LoadGPRRegister(X86State::REG_RDI);
-    StoreGPRRegister(X86State::REG_RDI, OffsetByDir(TailDest, Size));
+    StoreGPRRegister(X86State::REG_RDI, OffsetByDir(TailDest, IR::OpSizeToSize(Size)));
  } else {
    // FEX doesn't support partial faulting REP instructions.
    // Converting this to a `MemSet` IR op optimizes this quite significantly in our codegen.
@ -3255,7 +3261,7 @@ void OpDispatchBuilder::MOVSOp(OpcodeArgs) {
    // Store to memory where RDI points
    _StoreMemAutoTSO(GPRClass, Size, RDI, Src, Size);

-    auto PtrDir = LoadDir(Size);
+    auto PtrDir = LoadDir(IR::OpSizeToSize(Size));
    RSI = _Add(OpSize::i64Bit, RSI, PtrDir);
    RDI = _Add(OpSize::i64Bit, RDI, PtrDir);

@ -3285,7 +3291,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {

    CalculateFlags_SUB(OpSizeFromSrc(Op), Src2, Src1);

-    auto PtrDir = LoadDir(Size);
+    auto PtrDir = LoadDir(IR::OpSizeToSize(Size));

    // Offset the pointer
    Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, PtrDir);
@ -3342,11 +3348,11 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
        StoreGPRRegister(X86State::REG_RCX, TailCounter);

        // Offset the pointer
-        Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, _Constant(PtrDir * Size));
+        Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, _Constant(PtrDir * IR::OpSizeToSize(Size)));
        StoreGPRRegister(X86State::REG_RDI, Dest_RDI);

        // Offset second pointer
-        Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, _Constant(PtrDir * Size));
+        Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, _Constant(PtrDir * IR::OpSizeToSize(Size)));
        StoreGPRRegister(X86State::REG_RSI, Dest_RSI);

        // If TailCounter != 0, compare sources.
@ -3403,7 +3409,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) {

    // Offset the pointer
    Ref TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);
-    StoreGPRRegister(X86State::REG_RSI, OffsetByDir(TailDest_RSI, Size));
+    StoreGPRRegister(X86State::REG_RSI, OffsetByDir(TailDest_RSI, IR::OpSizeToSize(Size)));
  } else {
    // Calculate flags early. because end of block
    CalculateDeferredFlags();
@ -3452,7 +3458,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) {
        StoreGPRRegister(X86State::REG_RCX, TailCounter);

        // Offset the pointer
-        TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, _Constant(PtrDir * Size));
+        TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, _Constant(PtrDir * IR::OpSizeToSize(Size)));
        StoreGPRRegister(X86State::REG_RSI, TailDest_RSI);

        // Jump back to the start, we have more work to do
@ -3487,7 +3493,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {

    // Offset the pointer
    Ref TailDest_RDI = LoadGPRRegister(X86State::REG_RDI);
-    StoreGPRRegister(X86State::REG_RDI, OffsetByDir(TailDest_RDI, Size));
+    StoreGPRRegister(X86State::REG_RDI, OffsetByDir(TailDest_RDI, IR::OpSizeToSize(Size)));
  } else {
    // Calculate flags early. because end of block
    CalculateDeferredFlags();
@ -3536,7 +3542,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
        StoreGPRRegister(X86State::REG_RCX, TailCounter);

        // Offset the pointer
-        TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, _Constant(Dir * Size));
+        TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, _Constant(Dir * IR::OpSizeToSize(Size)));
        StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);

        CalculateDeferredFlags();
@ -3598,7 +3604,7 @@ void OpDispatchBuilder::NEGOp(OpcodeArgs) {

  if (DestIsLockedMem(Op)) {
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
-    Ref Dest = _AtomicFetchNeg(IR::SizeToOpSize(Size), DestMem);
+    Ref Dest = _AtomicFetchNeg(Size, DestMem);
    CalculateFlags_SUB(Size, ZeroConst, Dest);
  } else {
    Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
@ -3622,7 +3628,7 @@ void OpDispatchBuilder::DIVOp(OpcodeArgs) {
    auto URemOp = _URem(OpSize::i16Bit, Src1, Divisor);

    // AX[15:0] = concat<URem[7:0]:UDiv[7:0]>
-    auto ResultAX = _Bfi(IR::SizeToOpSize(GPRSize), 8, 8, UDivOp, URemOp);
+    auto ResultAX = _Bfi(GPRSize, 8, 8, UDivOp, URemOp);
    StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
  } else if (Size == OpSize::i16Bit) {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
@ -3636,8 +3642,8 @@ void OpDispatchBuilder::DIVOp(OpcodeArgs) {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
    Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);

-    Ref UDivOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LUDiv(OpSize::i32Bit, Src1, Src2, Divisor));
-    Ref URemOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LURem(OpSize::i32Bit, Src1, Src2, Divisor));
+    Ref UDivOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LUDiv(OpSize::i32Bit, Src1, Src2, Divisor));
+    Ref URemOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LURem(OpSize::i32Bit, Src1, Src2, Divisor));

    StoreGPRRegister(X86State::REG_RAX, UDivOp);
    StoreGPRRegister(X86State::REG_RDX, URemOp);
@ -3674,7 +3680,7 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
    auto URemOp = _Rem(OpSize::i64Bit, Src1, Divisor);

    // AX[15:0] = concat<URem[7:0]:UDiv[7:0]>
-    auto ResultAX = _Bfi(IR::SizeToOpSize(GPRSize), 8, 8, UDivOp, URemOp);
+    auto ResultAX = _Bfi(GPRSize, 8, 8, UDivOp, URemOp);
    StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
  } else if (Size == OpSize::i16Bit) {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
@ -3688,8 +3694,8 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
    Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);

-    Ref UDivOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LDiv(OpSize::i32Bit, Src1, Src2, Divisor));
-    Ref URemOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LRem(OpSize::i32Bit, Src1, Src2, Divisor));
+    Ref UDivOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LDiv(OpSize::i32Bit, Src1, Src2, Divisor));
+    Ref URemOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LRem(OpSize::i32Bit, Src1, Src2, Divisor));

    StoreGPRRegister(X86State::REG_RAX, UDivOp);
    StoreGPRRegister(X86State::REG_RDX, URemOp);
@ -3728,7 +3734,7 @@ void OpDispatchBuilder::BSFOp(OpcodeArgs) {
  // Although Intel does not guarantee that semantic, AMD does and Intel
  // hardware satisfies it. We provide the stronger AMD behaviour as
  // applications might rely on that in the wild.
-  auto SelectOp = NZCVSelect(IR::SizeToOpSize(GPRSize), {COND_EQ}, Dest, Result);
+  auto SelectOp = NZCVSelect(GPRSize, {COND_EQ}, Dest, Result);
  StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, OpSize::iInvalid);
 }

@ -3746,7 +3752,7 @@ void OpDispatchBuilder::BSROp(OpcodeArgs) {
  SetZ_InvalidateNCV(OpSizeFromSrc(Op), Src);

  // If Src was zero then the destination doesn't get modified
-  auto SelectOp = NZCVSelect(IR::SizeToOpSize(GPRSize), {COND_EQ}, Dest, Result);
+  auto SelectOp = NZCVSelect(GPRSize, {COND_EQ}, Dest, Result);
  StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, OpSize::iInvalid);
 }

@ -3784,7 +3790,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {

    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
      Src1 = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags, {.AllowUpperGarbage = true});
-      Src1Lower = _Bfe(IR::SizeToOpSize(GPRSize), Size * 8, 0, Src1);
+      Src1Lower = _Bfe(GPRSize, IR::OpSizeAsBits(Size), 0, Src1);
    } else {
      Src1 = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, Size, Op->Flags, {.AllowUpperGarbage = true});
      Src1Lower = Src1;
@ -3797,7 +3803,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
    if (!Trivial) {
      if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
        // This allows us to only hit the ZEXT case on failure
-        Ref RAXResult = NZCVSelect(IR::i64Bit, {COND_EQ}, Src3, Src1Lower);
+        Ref RAXResult = NZCVSelect(OpSize::i64Bit, {COND_EQ}, Src3, Src1Lower);

        // When the size is 4 we need to make sure not zext the GPR when the comparison fails
        StoreGPRRegister(X86State::REG_RAX, RAXResult);
@ -3809,7 +3815,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
    // Op1 = RAX == Op1 ? Op2 : Op1
    // If they match then set the rm operand to the input
    // else don't set the rm operand
-    Ref DestResult = Trivial ? Src2 : NZCVSelect(IR::i64Bit, CondClassType {COND_EQ}, Src2, Src1);
+    Ref DestResult = Trivial ? Src2 : NZCVSelect(OpSize::i64Bit, CondClassType {COND_EQ}, Src2, Src1);

    // Store in to GPR Dest
    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
@ -3837,7 +3843,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
    // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc
    // This will write to memory! Careful!
    // Third operand must be a calculated guest memory address
-    Ref CASResult = _CAS(IR::SizeToOpSize(Size), Src3Lower, Src2, Src1);
+    Ref CASResult = _CAS(Size, Src3Lower, Src2, Src1);
    Ref RAXResult = CASResult;

    CalculateFlags_SUB(OpSizeFromSrc(Op), Src3Lower, CASResult);
@ -3845,7 +3851,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {

    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
      // This allows us to only hit the ZEXT case on failure
-      RAXResult = _NZCVSelect(IR::i64Bit, {COND_EQ}, Src3, CASResult);
+      RAXResult = _NZCVSelect(OpSize::i64Bit, {COND_EQ}, Src3, CASResult);
      Size = OpSize::i64Bit;
    }

@ -3885,10 +3891,10 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) {

  Ref Result_Lower = _AllocateGPR(true);
  Ref Result_Upper = _AllocateGPRAfter(Result_Lower);
-  _CASPair(IR::SizeToOpSize(Size), Expected_Lower, Expected_Upper, Desired_Lower, Desired_Upper, Src1, Result_Lower, Result_Upper);
+  _CASPair(Size, Expected_Lower, Expected_Upper, Desired_Lower, Desired_Upper, Src1, Result_Lower, Result_Upper);

  HandleNZCV_RMW();
-  _CmpPairZ(IR::SizeToOpSize(Size), Result_Lower, Result_Upper, Expected_Lower, Expected_Upper);
+  _CmpPairZ(Size, Result_Lower, Result_Upper, Expected_Lower, Expected_Upper);
  CalculateDeferredFlags();

  auto UpdateIfNotZF = [this](auto Reg, auto Value) {
@ -4020,7 +4026,7 @@ Ref OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefix, bool O
 Ref OpDispatchBuilder::AppendSegmentOffset(Ref Value, uint32_t Flags, uint32_t DefaultPrefix, bool Override) {
  auto Segment = GetSegment(Flags, DefaultPrefix, Override);
  if (Segment) {
-    Value = _Add(IR::SizeToOpSize(std::max<uint8_t>(OpSize::i32Bit, std::max(GetOpSize(Value), GetOpSize(Segment)))), Value, Segment);
+    Value = _Add(std::max(OpSize::i32Bit, std::max(GetOpSize(Value), GetOpSize(Segment))), Value, Segment);
  }

  return Value;
@ -4144,7 +4150,7 @@ Ref OpDispatchBuilder::LoadEffectiveAddress(AddressMode A, bool AddSegmentBase,

  if (A.Offset) {
    Ref Offset = _Constant(A.Offset);
-    Tmp = Tmp ? _Add(IR::SizeToOpSize(GPRSize), Tmp, Offset) : Offset;
+    Tmp = Tmp ? _Add(GPRSize, Tmp, Offset) : Offset;
  }

  if (A.Index) {
@ -4167,7 +4173,7 @@ Ref OpDispatchBuilder::LoadEffectiveAddress(AddressMode A, bool AddSegmentBase,
  //
  // If the AddrSize is not the GPRSize then we need to clear the upper bits.
  if ((A.AddrSize < GPRSize) && !AllowUpperGarbage && Tmp) {
-    Tmp = _Bfe(GPRSize, A.AddrSize * 8, 0, Tmp);
+    Tmp = _Bfe(GPRSize, IR::OpSizeAsBits(A.AddrSize), 0, Tmp);
  }

  if (A.Segment && AddSegmentBase) {
@ -4177,7 +4183,7 @@ Ref OpDispatchBuilder::LoadEffectiveAddress(AddressMode A, bool AddSegmentBase,
  return Tmp ?: _Constant(0);
 }

-AddressMode OpDispatchBuilder::SelectAddressMode(AddressMode A, bool AtomicTSO, bool Vector, unsigned AccessSize) {
+AddressMode OpDispatchBuilder::SelectAddressMode(AddressMode A, bool AtomicTSO, bool Vector, IR::OpSize AccessSize) {
  const auto GPRSize = CTX->GetGPROpSize();

  // In the future this also needs to account for LRCPC3.
@ -4207,9 +4213,10 @@ AddressMode OpDispatchBuilder::SelectAddressMode(AddressMode A, bool AtomicTSO,
    }

    // Try a (possibly scaled) register index.
-    if (A.AddrSize == OpSize::i64Bit && A.Base && (A.Index || A.Segment) && !A.Offset && (A.IndexScale == 1 || A.IndexScale == AccessSize)) {
+    if (A.AddrSize == OpSize::i64Bit && A.Base && (A.Index || A.Segment) && !A.Offset &&
+        (A.IndexScale == 1 || A.IndexScale == IR::OpSizeToSize(AccessSize))) {
      if (A.Index && A.Segment) {
-        A.Base = _Add(IR::SizeToOpSize(GPRSize), A.Base, A.Segment);
+        A.Base = _Add(GPRSize, A.Base, A.Segment);
      } else if (A.Segment) {
        A.Index = A.Segment;
        A.IndexScale = 1;
@ -4231,7 +4238,7 @@ AddressMode OpDispatchBuilder::DecodeAddress(const X86Tables::DecodedOp& Op, con

  AddressMode A {};
  A.Segment = GetSegment(Op->Flags);
-  A.AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (IR::DivideOpSize(GPRSize, 2)) : GPRSize;
+  A.AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize;
  A.NonTSO = AccessType == MemoryAccessType::NONTSO || AccessType == MemoryAccessType::STREAM;

  if (Operand.IsLiteral()) {
@ -4312,7 +4319,7 @@ Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86T
      // Now extract the subregister if it was a partial load /smaller/ than SSE size
      // TODO: Instead of doing the VMov implicitly on load, hunt down all use cases that require partial loads and do it after load.
      // We don't have information here to know if the operation needs zero upper bits or can contain data.
-      if (!AllowUpperGarbage && OpSize < Core::CPUState::XMM_SSE_REG_SIZE) {
+      if (!AllowUpperGarbage && OpSize < OpSize::i128Bit) {
        A.Base = _VMov(OpSize, A.Base);
      }
    } else {
@ -4345,7 +4352,7 @@ Ref OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, IR::OpSize Size, uint8_t Of
    if (AllowUpperGarbage) {
      Reg = _Lshr(OpSize, Reg, _Constant(Offset));
    } else {
-      Reg = _Bfe(OpSize, Size * 8, Offset, Reg);
+      Reg = _Bfe(OpSize, IR::OpSizeAsBits(Size), Offset, Reg);
    }
  }
  return Reg;
@ -4360,7 +4367,7 @@ void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, const Ref Src, IR::OpSize
  Ref Reg = Src;
  if (Size != GPRSize || Offset != 0) {
    // Need to do an insert if not automatic size or zero offset.
-    Reg = _Bfi(GPRSize, Size * 8, Offset, LoadGPRRegister(GPR), Src);
+    Reg = _Bfi(GPRSize, IR::OpSizeAsBits(Size), Offset, LoadGPRRegister(GPR), Src);
  }

  StoreRegister(GPR, false, Reg);
@ -4408,7 +4415,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl
        LOGMAN_THROW_A_FMT(Class != IR::GPRClass, "Partial writes from GPR not allowed. Instruction: {}", Op->TableInfo->Name);

        // XMM-size is handled in implementations.
-        if (VectorSize != Core::CPUState::XMM_AVX_REG_SIZE || OpSize != Core::CPUState::XMM_SSE_REG_SIZE) {
+        if (VectorSize != OpSize::i256Bit || OpSize != OpSize::i128Bit) {
          auto SrcVector = LoadXMMRegister(gprIndex);
          Result = _VInsElement(VectorSize, OpSize, 0, 0, SrcVector, Src);
        }
@ -4443,7 +4450,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl

  AddressMode A = DecodeAddress(Op, Operand, AccessType, false /* IsLoad */);

-  if (OpSize == 10) {
+  if (OpSize == OpSize::f80Bit) {
    Ref MemStoreDst = LoadEffectiveAddress(A, true);

    // For X87 extended doubles, split before storing
@ -4547,7 +4554,7 @@ void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::I
      (ALUIROp == IR::IROps::OP_XOR || ALUIROp == IR::IROps::OP_OR || ALUIROp == IR::IROps::OP_ANDWITHFLAGS)) {

    RoundedSize = ResultSize = CTX->GetGPROpSize();
-    LOGMAN_THROW_A_FMT(Const < (1ull << (Size * 8)), "does not clobber");
+    LOGMAN_THROW_A_FMT(Const < (1ull << IR::OpSizeAsBits(Size)), "does not clobber");

    // For AND, we can play the same trick but we instead need the upper bits of
    // the constant to be all-1s instead of all-0s to preserve. We also can't
@ -4559,7 +4566,7 @@ void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::I
    // adjusted constant here will inline into the arm64 and instruction, so if
    // flags are not needed, we save an instruction overall.
    if (ALUIROp == IR::IROps::OP_ANDWITHFLAGS) {
-      Src = _Constant(Const | ~((1ull << (Size * 8)) - 1));
+      Src = _Constant(Const | ~((1ull << IR::OpSizeAsBits(Size)) - 1));
      ALUIROp = IR::IROps::OP_AND;
    }
  }
@ -4570,13 +4577,13 @@ void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::I
  if (DestIsLockedMem(Op)) {
    HandledLock = true;
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
-    DeriveOp(FetchOp, AtomicFetchOp, _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem));
+    DeriveOp(FetchOp, AtomicFetchOp, _AtomicFetchAdd(Size, Src, DestMem));
    Dest = FetchOp;
  } else {
    Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  }

-  const auto OpSize = IR::SizeToOpSize(RoundedSize);
+  const auto OpSize = RoundedSize;
  DeriveOp(ALUOp, ALUIROp, _AndWithFlags(OpSize, Dest, Src));
  Result = ALUOp;

@ -4756,7 +4763,7 @@ void OpDispatchBuilder::MOVBEOp(OpcodeArgs) {
    // Rev of 16-bit value as 32-bit replaces the result in the upper 16-bits of the result.
    // bfxil the 16-bit result in to the GPR.
    Ref Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags);
-    auto Result = _Bfxil(IR::SizeToOpSize(GPRSize), 16, 16, Dest, Src);
+    auto Result = _Bfxil(GPRSize, 16, 16, Dest, Src);
    StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, OpSize::iInvalid);
  } else {
    // 32-bit does regular zext
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
@ -938,12 +938,12 @@ public:
  void AVX128_VectorALU(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void AVX128_VectorUnary(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, std::function<Ref(IR::OpSize ElementSize, Ref Src)> Helper);
-  void AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, IR::OpSize ElementSize,
+  void AVX128_VectorBinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize,
                               std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2)> Helper);
  void AVX128_VectorShiftWideImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp);
  void AVX128_VectorShiftImmImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp);
-  void AVX128_VectorTrinaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, Ref Src3,
-                                std::function<Ref(size_t ElementSize, Ref Src1, Ref Src2, Ref Src3)> Helper);
+  void AVX128_VectorTrinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, Ref Src3,
+                                std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref Src3)> Helper);

  enum class ShiftDirection { RIGHT, LEFT };
  void AVX128_ShiftDoubleImm(OpcodeArgs, ShiftDirection Dir);
@ -993,7 +993,7 @@ public:
  template<IR::OpSize ElementSize>
  void AVX128_PExtr(OpcodeArgs);
  void AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed);
-  template<size_t ElementSize>
+  template<IR::OpSize ElementSize>
  void AVX128_MOVMSK(OpcodeArgs);
  void AVX128_MOVMSKB(OpcodeArgs);
  void AVX128_PINSRImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
@ -1065,7 +1065,7 @@ public:
  template<IR::OpSize ElementSize>
  void AVX128_VSHUF(OpcodeArgs);

-  template<size_t ElementSize>
+  template<IR::OpSize ElementSize>
  void AVX128_VPERMILImm(OpcodeArgs);

  template<IROps IROp, IR::OpSize ElementSize>
@ -1137,7 +1137,7 @@ public:
  void StoreResult_WithAVXInsert(VectorOpType Type, FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, Ref Value,
                                 IR::OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= X86State::REG_XMM_0 && Op->Dest.Data.GPR.GPR <= X86State::REG_XMM_15 &&
-        GetGuestVectorLength() == Core::CPUState::XMM_AVX_REG_SIZE && Type == VectorOpType::SSE) {
+        GetGuestVectorLength() == OpSize::i256Bit && Type == VectorOpType::SSE) {
      const auto gpr = Op->Dest.Data.GPR.GPR;
      const auto gprIndex = gpr - X86State::REG_XMM_0;
      auto DestVector = LoadXMMRegister(gprIndex);
@ -1150,7 +1150,7 @@ public:
  }

  void StoreXMMRegister_WithAVXInsert(VectorOpType Type, uint32_t XMM, Ref Value) {
-    if (GetGuestVectorLength() == Core::CPUState::XMM_AVX_REG_SIZE && Type == VectorOpType::SSE) {
+    if (GetGuestVectorLength() == OpSize::i256Bit && Type == VectorOpType::SSE) {
      ///< SSE vector stores need to insert in the low 128-bit lane of the 256-bit register.
      auto DestVector = LoadXMMRegister(XMM);
      Value = _VInsElement(GetGuestVectorLength(), OpSize::i128Bit, 0, 0, DestVector, Value);
@ -1233,12 +1233,14 @@ public:
        // Use stp where possible to store multiple values at a time. This accelerates AVX.
        // TODO: this is all really confusing because of backwards iteration,
        // can we peel back that hack?
-        if ((Bits & NextBit) && !Partial && Size >= 4 && CacheIndexToContextOffset(Index - 1) == Offset - Size && (Offset - Size) / Size < 64) {
+        const auto SizeInt = IR::OpSizeToSize(Size);
+        if ((Bits & NextBit) && !Partial && Size >= OpSize::i32Bit && CacheIndexToContextOffset(Index - 1) == Offset - SizeInt &&
+            (Offset - SizeInt) / SizeInt < 64) {
          LOGMAN_THROW_A_FMT(CacheIndexClass(Index - 1) == Class, "construction");
-          LOGMAN_THROW_A_FMT((Offset % Size) == 0, "construction");
+          LOGMAN_THROW_A_FMT((Offset % SizeInt) == 0, "construction");
          Ref ValueNext = RegCache.Value[Index - 1];

-          _StoreContextPair(Size, Class, ValueNext, Value, Offset - Size);
+          _StoreContextPair(Size, Class, ValueNext, Value, Offset - SizeInt);
          Bits &= ~NextBit;
        } else {
          _StoreContext(Size, Class, Value, Offset);
@ -1380,7 +1382,7 @@ private:
  Ref InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                     const X86Tables::DecodedOperand& Imm);

-  Ref MPSADBWOpImpl(size_t SrcSize, Ref Src1, Ref Src2, uint8_t Select);
+  Ref MPSADBWOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, uint8_t Select);

  Ref PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                    const X86Tables::DecodedOperand& Imm, bool IsAVX);
@ -1503,7 +1505,7 @@ private:
  Ref GetRelocatedPC(const FEXCore::X86Tables::DecodedOp& Op, int64_t Offset = 0);

  Ref LoadEffectiveAddress(AddressMode A, bool AddSegmentBase, bool AllowUpperGarbage = false);
-  AddressMode SelectAddressMode(AddressMode A, bool AtomicTSO, bool Vector, unsigned AccessSize);
+  AddressMode SelectAddressMode(AddressMode A, bool AtomicTSO, bool Vector, IR::OpSize AccessSize);

  bool IsOperandMem(const X86Tables::DecodedOperand& Operand, bool Load) {
    // Literals are immediates as sources but memory addresses as destinations.
@ -1627,24 +1629,24 @@ private:
    NZCVDirty = true;
  }

-  void SetNZ_ZeroCV(unsigned SrcSize, Ref Res, bool SetPF = false) {
+  void SetNZ_ZeroCV(IR::OpSize SrcSize, Ref Res, bool SetPF = false) {
    HandleNZ00Write();

    // x - 0 = x. NZ set according to Res. C always set. V always unset. This
    // matches what we want since we want carry inverted.
    //
    // This is currently worse for 8/16-bit, but that should be optimized. TODO
-    if (SrcSize >= 4) {
+    if (SrcSize >= OpSize::i32Bit) {
      if (SetPF) {
-        CalculatePF(_SubWithFlags(IR::SizeToOpSize(SrcSize), Res, _Constant(0)));
+        CalculatePF(_SubWithFlags(SrcSize, Res, _Constant(0)));
      } else {
-        _SubNZCV(IR::SizeToOpSize(SrcSize), Res, _Constant(0));
+        _SubNZCV(SrcSize, Res, _Constant(0));
      }

      PossiblySetNZCVBits |= 1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC);
      CFInverted = true;
    } else {
-      _TestNZ(IR::SizeToOpSize(SrcSize), Res, Res);
+      _TestNZ(SrcSize, Res, Res);
      CFInverted = false;

      if (SetPF) {
@ -1653,7 +1655,7 @@ private:
    }
  }

-  void SetNZP_ZeroCV(unsigned SrcSize, Ref Res) {
+  void SetNZP_ZeroCV(IR::OpSize SrcSize, Ref Res) {
    SetNZ_ZeroCV(SrcSize, Res, true);
  }

@ -1705,8 +1707,8 @@ private:
    HandleNZCVWrite();
    CFInverted = true;

-    if (Size < 4) {
-      _TestNZ(OpSize::i32Bit, Src, _InlineConstant((1u << (8 * Size)) - 1));
+    if (Size < OpSize::i32Bit) {
+      _TestNZ(OpSize::i32Bit, Src, _InlineConstant((1u << (IR::OpSizeAsBits(Size))) - 1));
    } else {
      _TestNZ(Size, Src, Src);
    }
@ -1882,7 +1884,7 @@ private:
    LOGMAN_THROW_AA_FMT(Index < 64, "valid index");
    uint64_t Bit = (1ull << (uint64_t)Index);

-    if (Size == 16 && (RegCache.Partial & Bit)) {
+    if (Size == OpSize::i128Bit && (RegCache.Partial & Bit)) {
      // We need to load the full register extend if we previously did a partial access.
      Ref Value = RegCache.Value[Index];
      Ref Full = _LoadContext(Size, RegClass, Offset);
@ -1902,7 +1904,7 @@ private:
        RegCache.Value[Index] = _LoadContext(Size, RegClass, Offset);

        // We may have done a partial load, this requires special handling.
-        if (Size == 8) {
+        if (Size == OpSize::i64Bit) {
          RegCache.Partial |= Bit;
        }
      } else if (Index == PFIndex) {
@ -1938,12 +1940,13 @@ private:

    // Try to load a pair into the cache
    uint64_t Bits = (3ull << (uint64_t)Index);
-    if (((RegCache.Partial | RegCache.Cached) & Bits) == 0 && ((Offset / Size) < 64)) {
+    const auto SizeInt = IR::OpSizeToSize(Size);
+    if (((RegCache.Partial | RegCache.Cached) & Bits) == 0 && ((Offset / SizeInt) < 64)) {
      auto Values = LoadContextPair_Uncached(RegClass, Size, Offset);
      RegCache.Value[Index] = Values.Low;
      RegCache.Value[Index + 1] = Values.High;
      RegCache.Cached |= Bits;
-      if (Size == 8) {
+      if (Size == OpSize::i64Bit) {
        RegCache.Partial |= Bits;
      }
      return Values;
@ -1952,7 +1955,7 @@ private:
    // Fallback on a pair of loads
    return {
      .Low = LoadRegCache(Offset, Index, RegClass, Size),
-      .High = LoadRegCache(Offset + Size, Index + 1, RegClass, Size),
+      .High = LoadRegCache(Offset + SizeInt, Index + 1, RegClass, Size),
    };
  }

@ -2427,10 +2430,11 @@ private:
  }

  AddressMode SelectPairAddressMode(AddressMode A, IR::OpSize Size) {
+    const auto SizeInt = IR::OpSizeToSize(Size);
    AddressMode Out {};

-    signed OffsetEl = A.Offset / Size;
-    if ((A.Offset % Size) == 0 && OffsetEl >= -64 && OffsetEl < 64) {
+    signed OffsetEl = A.Offset / SizeInt;
+    if ((A.Offset % SizeInt) == 0 && OffsetEl >= -64 && OffsetEl < 64) {
      Out.Offset = A.Offset;
      A.Offset = 0;
    }
@ -2477,6 +2481,7 @@ private:

  void _StoreMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, AddressMode A, Ref Value1, Ref Value2,
                            IR::OpSize Align = IR::OpSize::i8Bit) {
+    const auto SizeInt = IR::OpSizeToSize(Size);
    bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO;

    // Use stp if possible, otherwise fallback on two stores.
@ -2485,7 +2490,7 @@ private:
      _StoreMemPair(Class, Size, Value1, Value2, A.Base, A.Offset);
    } else {
      _StoreMemAutoTSO(Class, Size, A, Value1, OpSize::i8Bit);
-      A.Offset += Size;
+      A.Offset += SizeInt;
      _StoreMemAutoTSO(Class, Size, A, Value2, OpSize::i8Bit);
    }
  }
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
@ -74,8 +74,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
    {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::AVX128_UCOMISx<OpSize::i32Bit>},
    {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::AVX128_UCOMISx<OpSize::i64Bit>},

-    {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::AVX128_MOVMSK<4>},
-    {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::AVX128_MOVMSK<8>},
+    {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::AVX128_MOVMSK<OpSize::i32Bit>},
+    {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::AVX128_MOVMSK<OpSize::i64Bit>},

    {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, OpSize::i64Bit>},
@ -158,7 +158,7 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
    {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::AVX128_VMOVAPS},

-    {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::AVX128_VPERMILImm<4>},
+    {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::AVX128_VPERMILImm<OpSize::i32Bit>},
    {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSHUFW, false>},
    {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSHUFW, true>},

@ -379,8 +379,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
    {OPD(3, 0b01, 0x00), 1, &OpDispatchBuilder::AVX128_VPERMQ},
    {OPD(3, 0b01, 0x01), 1, &OpDispatchBuilder::AVX128_VPERMQ},
    {OPD(3, 0b01, 0x02), 1, &OpDispatchBuilder::AVX128_VBLEND<OpSize::i32Bit>},
-    {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::AVX128_VPERMILImm<4>},
-    {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::AVX128_VPERMILImm<8>},
+    {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::AVX128_VPERMILImm<OpSize::i32Bit>},
+    {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::AVX128_VPERMILImm<OpSize::i64Bit>},
    {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::AVX128_VPERM2},
    {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVX128_VectorRound<OpSize::i32Bit>},
    {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVX128_VectorRound<OpSize::i64Bit>},
@ -665,7 +665,7 @@ void OpDispatchBuilder::AVX128_VectorUnary(OpcodeArgs, IROps IROp, IR::OpSize El

 void OpDispatchBuilder::AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize,
                                               std::function<Ref(IR::OpSize ElementSize, Ref Src)> Helper) {
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  RefPair Result {};
@ -680,9 +680,9 @@ void OpDispatchBuilder::AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, I
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }

-void OpDispatchBuilder::AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, IR::OpSize ElementSize,
+void OpDispatchBuilder::AVX128_VectorBinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize,
                                                std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2)> Helper) {
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
@ -698,9 +698,9 @@ void OpDispatchBuilder::AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, IR::
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }

-void OpDispatchBuilder::AVX128_VectorTrinaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, Ref Src3,
-                                                 std::function<Ref(size_t ElementSize, Ref Src1, Ref Src2, Ref Src3)> Helper) {
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+void OpDispatchBuilder::AVX128_VectorTrinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, Ref Src3,
+                                                 std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref Src3)> Helper) {
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
@ -984,13 +984,13 @@ void OpDispatchBuilder::AVX128_VBROADCAST(OpcodeArgs) {

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPUNPCKL(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VZip(OpSize::i128Bit, _ElementSize, Src1, Src2); });
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPUNPCKH(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VZip2(OpSize::i128Bit, _ElementSize, Src1, Src2); });
 }

@ -1039,7 +1039,7 @@ void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) {
    Result.Low = _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1.Low, Src2.Low, false, false);
  }

-  [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  [[maybe_unused]] const auto Is128Bit = DstSize == OpSize::i128Bit;
  LOGMAN_THROW_A_FMT(Is128Bit, "Programming Error: This should never occur!");
  Result.High = LoadZeroVector(OpSize::i128Bit);

@ -1073,33 +1073,33 @@ void OpDispatchBuilder::AVX128_CVTFPR_To_GPR(OpcodeArgs) {
 }

 void OpDispatchBuilder::AVX128_VANDN(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::i128Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), OpSize::i128Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VAndn(OpSize::i128Bit, _ElementSize, Src2, Src1); });
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPACKSS(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return _VSQXTNPair(OpSize::i128Bit, _ElementSize, Src1, Src2);
  });
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPACKUS(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return _VSQXTUNPair(OpSize::i128Bit, _ElementSize, Src1, Src2);
  });
 }

 Ref OpDispatchBuilder::AVX128_PSIGNImpl(IR::OpSize ElementSize, Ref Src1, Ref Src2) {
-  Ref Control = _VSQSHL(OpSize::i128Bit, ElementSize, Src2, (ElementSize * 8) - 1);
-  Control = _VSRSHR(OpSize::i128Bit, ElementSize, Control, (ElementSize * 8) - 1);
+  Ref Control = _VSQSHL(OpSize::i128Bit, ElementSize, Src2, IR::OpSizeAsBits(ElementSize) - 1);
+  Control = _VSRSHR(OpSize::i128Bit, ElementSize, Control, IR::OpSizeAsBits(ElementSize) - 1);
  return _VMul(OpSize::i128Bit, ElementSize, Src1, Control);
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPSIGN(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return AVX128_PSIGNImpl(_ElementSize, Src1, Src2); });
 }

@ -1154,7 +1154,7 @@ void OpDispatchBuilder::AVX128_VFCMP(OpcodeArgs) {
    .CompType = CompType,
  };

-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, &Capture](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this, &Capture](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return VFCMPOpImpl(OpSize::i128Bit, _ElementSize, Src1, Src2, Capture.CompType);
  });
 }
@ -1234,7 +1234,7 @@ void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) {
  }

  // AVX version only operates on 128-bit.
-  const uint8_t NumElements = std::min<uint8_t>(GetSrcSize(Op), OpSize::i128Bit) / OverridenElementSize;
+  const uint8_t NumElements = IR::NumElements(std::min(OpSizeFromSrc(Op), OpSize::i128Bit), OverridenElementSize);
  Index &= NumElements - 1;

  if (Op->Dest.IsGPR()) {
@ -1251,14 +1251,14 @@ void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) {
 }

 void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed) {
-  const auto DstSize = GetDstSize(Op);
+  const auto DstSize = OpSizeFromDst(Op);

  const auto GetSrc = [&] {
    if (Op->Src[0].IsGPR()) {
      return AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false).Low;
    } else {
      // For memory operands the 256-bit variant loads twice the size specified in the table.
-      const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+      const auto Is256Bit = DstSize == OpSize::i256Bit;
      const auto SrcSize = OpSizeFromSrc(Op);
      const auto LoadSize = Is256Bit ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) * 2) : SrcSize;

@ -1267,8 +1267,7 @@ void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize Eleme
  };

  auto Transform = [=, this](Ref Src) {
-    for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize;
-         CurrentElementSize = IR::MultiplyOpSize(CurrentElementSize, 2)) {
+    for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize = CurrentElementSize << 1) {
      if (Signed) {
        Src = _VSXTL(OpSize::i128Bit, CurrentElementSize, Src);
      } else {
@ -1286,8 +1285,8 @@ void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize Eleme
    Result.Low = Transform(Src);
  } else {
    // 256-bit operation is a bit special. It splits the incoming source between lower and upper registers.
-    size_t TotalElementCount = OpSize::i256Bit / DstElementSize;
-    size_t TotalElementsToSplitSize = (TotalElementCount / 2) * ElementSize;
+    size_t TotalElementCount = IR::NumElements(OpSize::i256Bit, DstElementSize);
+    size_t TotalElementsToSplitSize = (TotalElementCount / 2) * IR::OpSizeToSize(ElementSize);

    // Split the number of elements in half between lower and upper.
    Ref SrcHigh = _VDupElement(OpSize::i128Bit, IR::SizeToOpSize(TotalElementsToSplitSize), Src, 1);
@ -1303,10 +1302,10 @@ void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize Eleme
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }

-template<size_t ElementSize>
+template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_MOVMSK(OpcodeArgs) {
-  const auto SrcSize = GetSrcSize(Op);
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto SrcSize = OpSizeFromSrc(Op);
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

@ -1385,7 +1384,7 @@ void OpDispatchBuilder::AVX128_MOVMSKB(OpcodeArgs) {

 void OpDispatchBuilder::AVX128_PINSRImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                                         const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) {
-  const auto NumElements = OpSize::i128Bit / ElementSize;
+  const auto NumElements = IR::NumElements(OpSize::i128Bit, ElementSize);
  const uint64_t Index = Imm.Literal() & (NumElements - 1);
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Src1Op, Op->Flags, false);

@ -1419,7 +1418,7 @@ void OpDispatchBuilder::AVX128_VPINSRDQ(OpcodeArgs) {
 }

 void OpDispatchBuilder::AVX128_VariableShiftImpl(OpcodeArgs, IROps IROp) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSizeFromSrc(Op), [this, IROp](IR::OpSize ElementSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSizeFromSrc(Op), [this, IROp](IR::OpSize ElementSize, Ref Src1, Ref Src2) {
    DeriveOp(Shift, IROp, _VUShr(OpSize::i128Bit, ElementSize, Src1, Src2, true));
    return Shift;
  });
@ -1431,7 +1430,7 @@ void OpDispatchBuilder::AVX128_ShiftDoubleImm(OpcodeArgs, ShiftDirection Dir) {
  const bool Right = Dir == ShiftDirection::RIGHT;

  const uint64_t Shift = Op->Src[1].Literal();
-  const uint64_t ExtrShift = Right ? Shift : OpSize::i128Bit - Shift;
+  const uint64_t ExtrShift = Right ? Shift : IR::OpSizeToSize(OpSize::i128Bit) - Shift;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

@ -1486,40 +1485,40 @@ void OpDispatchBuilder::AVX128_VINSERTPS(OpcodeArgs) {

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPHSUB(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return PHSUBOpImpl(OpSize::i128Bit, Src1, Src2, _ElementSize);
  });
 }

 void OpDispatchBuilder::AVX128_VPHSUBSW(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PHSUBSOpImpl(OpSize::i128Bit, Src1, Src2); });
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VADDSUBP(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return ADDSUBPOpImpl(OpSize::i128Bit, _ElementSize, Src1, Src2);
  });
 }

 template<IR::OpSize ElementSize, bool Signed>
 void OpDispatchBuilder::AVX128_VPMULL(OpcodeArgs) {
-  static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit");
+  static_assert(ElementSize == OpSize::i32Bit, "Currently only handles 32-bit -> 64-bit");

-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref {
    return PMULLOpImpl(OpSize::i128Bit, ElementSize, Signed, Src1, Src2);
  });
 }

 void OpDispatchBuilder::AVX128_VPMULHRSW(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref { return PMULHRSWOpImpl(OpSize::i128Bit, Src1, Src2); });
 }

 template<bool Signed>
 void OpDispatchBuilder::AVX128_VPMULHW(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref {
    if (Signed) {
      return _VSMulH(OpSize::i128Bit, _ElementSize, Src1, Src2);
    } else {
@ -1546,9 +1545,9 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);

-  const auto IsFloatSrc = SrcElementSize == 4;
-  auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
-  auto Is128BitDst = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto IsFloatSrc = SrcElementSize == OpSize::i32Bit;
+  auto Is128BitSrc = SrcSize == OpSize::i128Bit;
+  auto Is128BitDst = DstSize == OpSize::i128Bit;

  ///< Decompose correctly.
  if (DstElementSize > SrcElementSize && !Is128BitDst) {
@ -1630,7 +1629,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) {
    auto Convert = [this](Ref Src) -> Ref {
      auto ElementSize = SrcElementSize;
      if (Narrow) {
-        ElementSize = IR::DivideOpSize(ElementSize, 2);
+        ElementSize = ElementSize >> 1;
        Src = _Vector_FToF(OpSize::i128Bit, ElementSize, Src, SrcElementSize);
      }

@ -1663,7 +1662,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) {
 template<IR::OpSize SrcElementSize, bool Widen>
 void OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float(OpcodeArgs) {
  const auto Size = OpSizeFromDst(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;

  RefPair Src = [&] {
    if (Widen && !Op->Src[0].IsGPR()) {
@ -1682,7 +1681,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float(OpcodeArgs) {
    if (Widen) {
      DeriveOp(Extended, Op, _VSXTL(OpSize::i128Bit, ElementSize, Src));
      Src = Extended;
-      ElementSize = IR::MultiplyOpSize(ElementSize, 2);
+      ElementSize = ElementSize << 1;
    }

    return _Vector_SToF(OpSize::i128Bit, ElementSize, Src);
@ -1732,23 +1731,23 @@ void OpDispatchBuilder::AVX128_VAESImc(OpcodeArgs) {
 }

 void OpDispatchBuilder::AVX128_VAESEnc(OpcodeArgs) {
-  AVX128_VectorTrinaryImpl(Op, GetDstSize(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
-                           [this](size_t, Ref Src1, Ref Src2, Ref Src3) { return _VAESEnc(OpSize::i128Bit, Src1, Src2, Src3); });
+  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
+                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESEnc(OpSize::i128Bit, Src1, Src2, Src3); });
 }

 void OpDispatchBuilder::AVX128_VAESEncLast(OpcodeArgs) {
-  AVX128_VectorTrinaryImpl(Op, GetDstSize(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
-                           [this](size_t, Ref Src1, Ref Src2, Ref Src3) { return _VAESEncLast(OpSize::i128Bit, Src1, Src2, Src3); });
+  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
+                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESEncLast(OpSize::i128Bit, Src1, Src2, Src3); });
 }

 void OpDispatchBuilder::AVX128_VAESDec(OpcodeArgs) {
-  AVX128_VectorTrinaryImpl(Op, GetDstSize(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
-                           [this](size_t, Ref Src1, Ref Src2, Ref Src3) { return _VAESDec(OpSize::i128Bit, Src1, Src2, Src3); });
+  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
+                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESDec(OpSize::i128Bit, Src1, Src2, Src3); });
 }

 void OpDispatchBuilder::AVX128_VAESDecLast(OpcodeArgs) {
-  AVX128_VectorTrinaryImpl(Op, GetDstSize(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
-                           [this](size_t, Ref Src1, Ref Src2, Ref Src3) { return _VAESDecLast(OpSize::i128Bit, Src1, Src2, Src3); });
+  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
+                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESDecLast(OpSize::i128Bit, Src1, Src2, Src3); });
 }

 void OpDispatchBuilder::AVX128_VAESKeyGenAssist(OpcodeArgs) {
@ -1838,7 +1837,7 @@ template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VDPP(OpcodeArgs) {
  const uint64_t Literal = Op->Src[2].Literal();

-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, Literal](IR::OpSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this, Literal](IR::OpSize, Ref Src1, Ref Src2) {
    return DPPOpImpl(OpSize::i128Bit, Src1, Src2, Literal, ElementSize);
  });
 }
@ -1927,7 +1926,7 @@ void OpDispatchBuilder::AVX128_VSHUF(OpcodeArgs) {
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }

-template<size_t ElementSize>
+template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPERMILImm(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
@ -1967,31 +1966,31 @@ void OpDispatchBuilder::AVX128_VPERMILImm(OpcodeArgs) {

 template<IROps IROp, IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VHADDP(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](IR::OpSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize, Ref Src1, Ref Src2) {
    DeriveOp(Res, IROp, _VFAddP(OpSize::i128Bit, ElementSize, Src1, Src2));
    return Res;
  });
 }

 void OpDispatchBuilder::AVX128_VPHADDSW(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PHADDSOpImpl(OpSize::i128Bit, Src1, Src2); });
 }

 void OpDispatchBuilder::AVX128_VPMADDUBSW(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::i128Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PMADDUBSWOpImpl(OpSize::i128Bit, Src1, Src2); });
 }

 void OpDispatchBuilder::AVX128_VPMADDWD(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::i128Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PMADDWDOpImpl(OpSize::i128Bit, Src1, Src2); });
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VBLEND(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;
  const uint64_t Selector = Op->Src[2].Literal();

  ///< High Selector shift depends on element size:
@ -2017,19 +2016,19 @@ void OpDispatchBuilder::AVX128_VBLEND(OpcodeArgs) {

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VHSUBP(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize,
                          [this](IR::OpSize, Ref Src1, Ref Src2) { return HSUBPOpImpl(OpSize::i128Bit, ElementSize, Src1, Src2); });
 }

 void OpDispatchBuilder::AVX128_VPSHUFB(OpcodeArgs) {
  auto MaskVector = GeneratePSHUFBMask(OpSize::i128Bit);
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i8Bit, [this, MaskVector](IR::OpSize, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i8Bit, [this, MaskVector](IR::OpSize, Ref Src1, Ref Src2) {
    return PSHUFBOpImpl(OpSize::i128Bit, Src1, Src2, MaskVector);
  });
 }

 void OpDispatchBuilder::AVX128_VPSADBW(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i8Bit,
+  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i8Bit,
                          [this](IR::OpSize, Ref Src1, Ref Src2) { return PSADBWOpImpl(OpSize::i128Bit, Src1, Src2); });
 }

@ -2061,7 +2060,7 @@ void OpDispatchBuilder::AVX128_VPALIGNR(OpcodeArgs) {
  const auto SanitizedDstSize = std::min(Size, OpSize::i128Bit);

  AVX128_VectorBinaryImpl(Op, Size, SanitizedDstSize, [this, Index](IR::OpSize SanitizedDstSize, Ref Src1, Ref Src2) -> Ref {
-    if (Index >= (SanitizedDstSize * 2)) {
+    if (Index >= (IR::OpSizeToSize(SanitizedDstSize) * 2)) {
      // If the immediate is greater than both vectors combined then it zeroes the vector
      return LoadZeroVector(OpSize::i128Bit);
    }
@ -2076,7 +2075,7 @@ void OpDispatchBuilder::AVX128_VPALIGNR(OpcodeArgs) {

 void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstSize, bool IsStore,
                                            const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp) {
-  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = DstSize == OpSize::i128Bit;

  auto Mask = AVX128_LoadSource_WithOpSize(Op, MaskOp, Op->Flags, !Is128Bit);

@ -2098,14 +2097,14 @@ void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, IR::OpSize ElementSize,
    auto Address = MakeAddress(DataOp);

    RefPair Result {};
-    Result.Low = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.Low, Address, Invalid(), MEM_OFFSET_SXTX, OpSize::i8Bit);
+    Result.Low = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.Low, Address, Invalid(), MEM_OFFSET_SXTX, 1);

    if (Is128Bit) {
      Result.High = LoadZeroVector(OpSize::i128Bit);
    } else {
      ///< TODO: This can be cleaner if AVX128_LoadSource_WithOpSize could return both constructed addresses.
      auto AddressHigh = _Add(OpSize::i64Bit, Address, _Constant(16));
-      Result.High = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.High, AddressHigh, Invalid(), MEM_OFFSET_SXTX, OpSize::i8Bit);
+      Result.High = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.High, AddressHigh, Invalid(), MEM_OFFSET_SXTX, 1);
    }
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
  }
@ -2124,7 +2123,7 @@ void OpDispatchBuilder::AVX128_VMASKMOV(OpcodeArgs) {
 void OpDispatchBuilder::AVX128_MASKMOV(OpcodeArgs) {
  ///< This instruction only supports 128-bit.
  const auto Size = OpSizeFromSrc(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;

  auto MaskSrc = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

@ -2147,11 +2146,9 @@ void OpDispatchBuilder::AVX128_MASKMOV(OpcodeArgs) {
 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VectorVariableBlend(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;
  const auto Src3Selector = Op->Src[2].Literal();

-  constexpr auto ElementSizeBits = ElementSize * 8;
-
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

@ -2163,6 +2160,7 @@ void OpDispatchBuilder::AVX128_VectorVariableBlend(OpcodeArgs) {
  }

  auto Convert = [this](Ref Src1, Ref Src2, Ref Mask) {
+    const auto ElementSizeBits = IR::OpSizeAsBits(ElementSize);
    Ref Shifted = _VSShrI(OpSize::i128Bit, ElementSize, Mask, ElementSizeBits - 1);
    return _VBSL(OpSize::i128Bit, Shifted, Src2, Src1);
  };
@ -2248,7 +2246,7 @@ void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) {
  Ref ZeroConst = _Constant(0);
  Ref OneConst = _Constant(1);

-  const auto ElementSizeInBits = ElementSize * 8;
+  const auto ElementSizeInBits = IR::OpSizeAsBits(ElementSize);

  {
    // Calculate ZF first.
@ -2292,7 +2290,7 @@ void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) {
  }

  // As in PTest, this sets Z appropriately while zeroing the rest of NZCV.
-  SetNZ_ZeroCV(32, ZF);
+  SetNZ_ZeroCV(OpSize::i32Bit, ZF);
  SetCFInverted(CFInv);
  ZeroPF_AF();
 }
@ -2339,14 +2337,14 @@ void OpDispatchBuilder::AVX128_PTest(OpcodeArgs) {
  // Set ZF according to Test1. SF will be zeroed since we do a 32-bit test on
  // the results of a 16-bit value from the UMaxV, so the 32-bit sign bit is
  // cleared even if the 16-bit scalars were negative.
-  SetNZ_ZeroCV(32, Test1);
+  SetNZ_ZeroCV(OpSize::i32Bit, Test1);
  SetCFInverted(Test2);
  ZeroPF_AF();
 }

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::AVX128_VPERMILReg(OpcodeArgs) {
-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](size_t _ElementSize, Ref Src, Ref Indices) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src, Ref Indices) {
    return VPERMILRegOpImpl(OpSize::i128Bit, ElementSize, Src, Indices);
  });
 }
@ -2376,7 +2374,7 @@ void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) {
 void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) {
  const auto Selector = static_cast<uint8_t>(Op->Src[2].Literal());

-  AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::iInvalid, [this, Selector](size_t _, Ref Src1, Ref Src2) {
+  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), OpSize::iInvalid, [this, Selector](IR::OpSize, Ref Src1, Ref Src2) {
    return _PCLMUL(OpSize::i128Bit, Src1, Src2, Selector & 0b1'0001);
  });
 }
@ -2548,7 +2546,7 @@ void OpDispatchBuilder::AVX128_VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Sr
 OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_VPGatherImpl(OpSize Size, OpSize ElementLoadSize, OpSize AddrElementSize, RefPair Dest,
                                                                  RefPair Mask, RefVSIB VSIB) {
  LOGMAN_THROW_A_FMT(AddrElementSize == OpSize::i32Bit || AddrElementSize == OpSize::i64Bit, "Unknown address element size");
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;

  ///< BaseAddr doesn't need to exist, calculate that here.
  Ref BaseAddr = VSIB.BaseAddr;
@ -2686,17 +2684,17 @@ OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_VPGatherQPSImpl(Ref Dest, R
 template<OpSize AddrElementSize>
 void OpDispatchBuilder::AVX128_VPGATHER(OpcodeArgs) {

-  const auto Size = GetDstSize(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Size = OpSizeFromDst(Op);
+  const auto Is128Bit = Size == OpSize::i128Bit;

  ///< Element size is determined by W flag.
  const OpSize ElementLoadSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  // We only need the high address register if the number of data elements is more than what the low half can consume.
  // But also the number of address elements is clamped by the destination size as well.
-  const size_t NumDataElements = Size / ElementLoadSize;
-  const size_t NumAddrElementBytes = std::min<size_t>(Size, (NumDataElements * AddrElementSize));
-  const bool NeedsHighAddrBytes = NumAddrElementBytes > OpSize::i128Bit;
+  const size_t NumDataElements = IR::NumElements(Size, ElementLoadSize);
+  const size_t NumAddrElementBytes = std::min<size_t>(IR::OpSizeToSize(Size), (NumDataElements * IR::OpSizeToSize(AddrElementSize)));
+  const bool NeedsHighAddrBytes = NumAddrElementBytes > IR::OpSizeToSize(OpSize::i128Bit);

  auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
  auto VSIB = AVX128_LoadVSIB(Op, Op->Src[0], Op->Flags, NeedsHighAddrBytes);
@ -2740,7 +2738,7 @@ void OpDispatchBuilder::AVX128_VPGATHER(OpcodeArgs) {
  } else if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
    Result = AVX128_VPGatherQPSImpl(Dest.Low, Mask.Low, VSIB);
  } else {
-    Result = AVX128_VPGatherImpl(SizeToOpSize(Size), ElementLoadSize, AddrElementSize, Dest, Mask, VSIB);
+    Result = AVX128_VPGatherImpl(Size, ElementLoadSize, AddrElementSize, Dest, Mask, VSIB);
  }
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);

@ -2754,8 +2752,8 @@ void OpDispatchBuilder::AVX128_VPGATHER(OpcodeArgs) {
 void OpDispatchBuilder::AVX128_VCVTPH2PS(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(DstSize) / 2);
-  const auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
-  const auto Is128BitDst = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128BitSrc = SrcSize == OpSize::i128Bit;
+  const auto Is128BitDst = DstSize == OpSize::i128Bit;

  RefPair Src {};
  if (Op->Src[0].IsGPR()) {
@ -2783,7 +2781,7 @@ void OpDispatchBuilder::AVX128_VCVTPH2PS(OpcodeArgs) {

 void OpDispatchBuilder::AVX128_VCVTPS2PH(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128BitSrc = SrcSize == OpSize::i128Bit;
  const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2);

  const auto Imm8 = Op->Src[1].Literal();
@ -2814,7 +2812,7 @@ void OpDispatchBuilder::AVX128_VCVTPS2PH(OpcodeArgs) {

  // We need to eliminate upper junk if we're storing into a register with
  // a 256-bit source (VCVTPS2PH's destination for registers is an XMM).
-  if (Op->Src[0].IsGPR() && SrcSize == Core::CPUState::XMM_AVX_REG_SIZE) {
+  if (Op->Src[0].IsGPR() && SrcSize == OpSize::i256Bit) {
    Result = AVX128_Zext(Result.Low);
  }

--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp
@ -322,7 +322,7 @@ void OpDispatchBuilder::AESEncOp(OpcodeArgs) {

 void OpDispatchBuilder::VAESEncOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  [[maybe_unused]] const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESENC.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENC unimplemented");
@ -343,7 +343,7 @@ void OpDispatchBuilder::AESEncLastOp(OpcodeArgs) {

 void OpDispatchBuilder::VAESEncLastOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  [[maybe_unused]] const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESENCLAST.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENCLAST unimplemented");
@ -364,7 +364,7 @@ void OpDispatchBuilder::AESDecOp(OpcodeArgs) {

 void OpDispatchBuilder::VAESDecOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  [[maybe_unused]] const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESDEC.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDEC unimplemented");
@ -385,7 +385,7 @@ void OpDispatchBuilder::AESDecLastOp(OpcodeArgs) {

 void OpDispatchBuilder::VAESDecLastOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  [[maybe_unused]] const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESDECLAST.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDECLAST unimplemented");
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
@ -139,8 +139,8 @@ Ref OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
 }

 void OpDispatchBuilder::CalculateOF(IR::OpSize SrcSize, Ref Res, Ref Src1, Ref Src2, bool Sub) {
-  auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
-  uint64_t SignBit = (SrcSize * 8) - 1;
+  const auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;
+  const uint64_t SignBit = IR::OpSizeAsBits(SrcSize) - 1;
  Ref Anded = nullptr;

  // For add, OF is set iff the sources have the same sign but the destination
@ -171,7 +171,7 @@ void OpDispatchBuilder::CalculateOF(IR::OpSize SrcSize, Ref Res, Ref Src1, Ref S
    }
  }

-  SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Anded, SrcSize * 8 - 1, true);
+  SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Anded, SignBit, true);
 }

 Ref OpDispatchBuilder::LoadPFRaw(bool Mask, bool Invert) {
@ -265,7 +265,7 @@ Ref OpDispatchBuilder::IncrementByCarry(OpSize OpSize, Ref Src) {
 Ref OpDispatchBuilder::CalculateFlags_ADC(IR::OpSize SrcSize, Ref Src1, Ref Src2) {
  auto Zero = _InlineConstant(0);
  auto One = _InlineConstant(1);
-  auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
+  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;
  Ref Res;

  CalculateAF(Src1, Src2);
@ -277,7 +277,7 @@ Ref OpDispatchBuilder::CalculateFlags_ADC(IR::OpSize SrcSize, Ref Src1, Ref Src2
    CFInverted = false;
  } else {
    // Need to zero-extend for correct comparisons below
-    Src2 = _Bfe(OpSize, SrcSize * 8, 0, Src2);
+    Src2 = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Src2);

    // Note that we do not extend Src2PlusCF, since we depend on proper
    // 32-bit arithmetic to correctly handle the Src2 = 0xffff case.
@ -285,7 +285,7 @@ Ref OpDispatchBuilder::CalculateFlags_ADC(IR::OpSize SrcSize, Ref Src1, Ref Src2

    // Need to zero-extend for the comparison.
    Res = _Add(OpSize, Src1, Src2PlusCF);
-    Res = _Bfe(OpSize, SrcSize * 8, 0, Res);
+    Res = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Res);

    // TODO: We can fold that second Bfe in (cmp uxth).
    auto SelectCFInv = _Select(FEXCore::IR::COND_UGE, Res, Src2PlusCF, One, Zero);
@ -302,7 +302,7 @@ Ref OpDispatchBuilder::CalculateFlags_ADC(IR::OpSize SrcSize, Ref Src1, Ref Src2
 Ref OpDispatchBuilder::CalculateFlags_SBB(IR::OpSize SrcSize, Ref Src1, Ref Src2) {
  auto Zero = _InlineConstant(0);
  auto One = _InlineConstant(1);
-  auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
+  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;

  CalculateAF(Src1, Src2);

@ -316,13 +316,13 @@ Ref OpDispatchBuilder::CalculateFlags_SBB(IR::OpSize SrcSize, Ref Src1, Ref Src2
    CFInverted = true;
  } else {
    // Zero extend for correct comparison behaviour with Src1 = 0xffff.
-    Src1 = _Bfe(OpSize, SrcSize * 8, 0, Src1);
-    Src2 = _Bfe(OpSize, SrcSize * 8, 0, Src2);
+    Src1 = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Src1);
+    Src2 = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Src2);

    auto Src2PlusCF = IncrementByCarry(OpSize, Src2);

    Res = _Sub(OpSize, Src1, Src2PlusCF);
-    Res = _Bfe(OpSize, SrcSize * 8, 0, Res);
+    Res = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Res);

    auto SelectCFInv = _Select(FEXCore::IR::COND_UGE, Src1, Src2PlusCF, One, Zero);

@ -345,9 +345,9 @@ Ref OpDispatchBuilder::CalculateFlags_SUB(IR::OpSize SrcSize, Ref Src1, Ref Src2

  Ref Res;
  if (SrcSize >= OpSize::i32Bit) {
-    Res = _SubWithFlags(IR::SizeToOpSize(SrcSize), Src1, Src2);
+    Res = _SubWithFlags(SrcSize, Src1, Src2);
  } else {
-    _SubNZCV(IR::SizeToOpSize(SrcSize), Src1, Src2);
+    _SubNZCV(SrcSize, Src1, Src2);
    Res = _Sub(OpSize::i32Bit, Src1, Src2);
  }

@ -375,9 +375,9 @@ Ref OpDispatchBuilder::CalculateFlags_ADD(IR::OpSize SrcSize, Ref Src1, Ref Src2

  Ref Res;
  if (SrcSize >= OpSize::i32Bit) {
-    Res = _AddWithFlags(IR::SizeToOpSize(SrcSize), Src1, Src2);
+    Res = _AddWithFlags(SrcSize, Src1, Src2);
  } else {
-    _AddNZCV(IR::SizeToOpSize(SrcSize), Src1, Src2);
+    _AddNZCV(SrcSize, Src1, Src2);
    Res = _Add(OpSize::i32Bit, Src1, Src2);
  }

@ -400,7 +400,7 @@ void OpDispatchBuilder::CalculateFlags_MUL(IR::OpSize SrcSize, Ref Res, Ref High

  // CF and OF are set if the result of the operation can't be fit in to the destination register
  // If the value can fit then the top bits will be zero
-  auto SignBit = _Sbfe(OpSize::i64Bit, 1, SrcSize * 8 - 1, Res);
+  auto SignBit = _Sbfe(OpSize::i64Bit, 1, IR::OpSizeAsBits(SrcSize) - 1, Res);
  _SubNZCV(OpSize::i64Bit, High, SignBit);

  // If High = SignBit, then sets to nZCv. Else sets to nzcV. Since SF/ZF
@ -415,7 +415,7 @@ void OpDispatchBuilder::CalculateFlags_UMUL(Ref High) {
  InvalidatePF_AF();

  auto Zero = _InlineConstant(0);
-  OpSize Size = IR::SizeToOpSize(GetOpSize(High));
+  const auto Size = GetOpSize(High);

  // CF and OF are set if the result of the operation can't be fit in to the destination register
  // The result register will be all zero if it can't fit due to how multiplication behaves
@ -442,7 +442,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(IR::OpSize SrcSize, Re
    return;
  }

-  auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
+  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;

  SetNZ_ZeroCV(SrcSize, UnmaskedRes);

@ -451,7 +451,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(IR::OpSize SrcSize, Re
    // Extract the last bit shifted in to CF. Shift is already masked, but for
    // 8/16-bit it might be >= SrcSizeBits, in which case CF is cleared. There's
    // nothing to do in that case since we already cleared CF above.
-    auto SrcSizeBits = SrcSize * 8;
+    const auto SrcSizeBits = IR::OpSizeAsBits(SrcSize);
    if (Shift < SrcSizeBits) {
      SetCFDirect(Src1, SrcSizeBits - Shift, true);
    }
@ -464,7 +464,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(IR::OpSize SrcSize, Re
  // In the case of left shift. OF is only set from the result of <Top Source Bit> XOR <Top Result Bit>
  if (Shift == 1) {
    auto Xor = _Xor(OpSize, UnmaskedRes, Src1);
-    SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, SrcSize * 8 - 1, true);
+    SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, IR::OpSizeAsBits(SrcSize) - 1, true);
  } else {
    // Undefined, we choose to zero as part of SetNZ_ZeroCV
  }
@ -515,7 +515,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightImmediate(IR::OpSize SrcSize, R
    // Only defined when Shift is 1 else undefined
    // Is set to the MSB of the original value
    if (Shift == 1) {
-      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Src1, SrcSize * 8 - 1, true);
+      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Src1, IR::OpSizeAsBits(SrcSize) - 1, true);
    }
  }
 }
@ -526,7 +526,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(IR::OpSize SrcS
    return;
  }

-  const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
+  const auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;
  CalculateFlags_ShiftRightImmediateCommon(SrcSize, Res, Src1, Shift);

  // OF
@ -536,7 +536,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(IR::OpSize SrcS
    // XOR of Result and Src1
    if (Shift == 1) {
      auto val = _Xor(OpSize, Src1, Res);
-      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(val, SrcSize * 8 - 1, true);
+      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(val, IR::OpSizeAsBits(SrcSize) - 1, true);
    }
  }
 }
@ -549,7 +549,7 @@ void OpDispatchBuilder::CalculateFlags_ZCNT(IR::OpSize SrcSize, Ref Result) {
  // Now set CF if the Result = SrcSize * 8. Since SrcSize is a power-of-two and
  // Result is <= SrcSize * 8, we equivalently check if the log2(SrcSize * 8)
  // bit is set. No masking is needed because no higher bits could be set.
-  unsigned CarryBit = FEXCore::ilog2(SrcSize * 8u);
+  unsigned CarryBit = FEXCore::ilog2(IR::OpSizeAsBits(SrcSize));
  SetCFDirect(Result, CarryBit);
 }

--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
@ -418,7 +418,7 @@ void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
  // Always 32-bit.
  const auto ElementSize = OpSize::i32Bit;
  // Always signed
-  Dest = _VSToFVectorInsert(IR::SizeToOpSize(DstSize), ElementSize, ElementSize, Dest, Src, true, false);
+  Dest = _VSToFVectorInsert(DstSize, ElementSize, ElementSize, Dest, Src, true, false);

  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Dest, DstSize, OpSize::iInvalid);
 }
@ -482,7 +482,7 @@ Ref OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSiz
  Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags);
  Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true});

-  return _VFToFScalarInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcElementSize, Src1, Src2, ZeroUpperBits);
+  return _VFToFScalarInsert(DstSize, DstElementSize, SrcElementSize, Src1, Src2, ZeroUpperBits);
 }

 template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
@ -530,7 +530,7 @@ Ref OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, IR::OpSize DstSize, IR:
  Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  const auto SourceMode = TranslateRoundType(Mode);
-  auto ALUOp = _VFToIScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, SourceMode, ZeroUpperBits);
+  auto ALUOp = _VFToIScalarInsert(DstSize, ElementSize, Src1, Src2, SourceMode, ZeroUpperBits);

  return ALUOp;
 }
@ -600,7 +600,7 @@ void OpDispatchBuilder::InsertScalarFCMPOp(OpcodeArgs) {
  Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags);
  Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags, {.AllowUpperGarbage = true});

-  Ref Result = InsertScalarFCMPOpImpl(IR::SizeToOpSize(DstSize), OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType, false);
+  Ref Result = InsertScalarFCMPOpImpl(DstSize, OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType, false);
  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid);
 }

@ -619,7 +619,7 @@ void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs) {
  Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags);
  Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags, {.AllowUpperGarbage = true});

-  Ref Result = InsertScalarFCMPOpImpl(IR::SizeToOpSize(DstSize), OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType, true);
+  Ref Result = InsertScalarFCMPOpImpl(DstSize, OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType, true);
  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid);
 }

@ -741,10 +741,10 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, IR::OpSize ElementSize) {
    for (unsigned i = 0; i < NumElements; ++i) {
      // Extract the top bit of the element
      Ref Tmp = _VExtractToGPR(Size, ElementSize, Src, i);
-      Tmp = _Bfe(IR::SizeToOpSize(ElementSize), 1, ElementSize * 8 - 1, Tmp);
+      Tmp = _Bfe(ElementSize, 1, IR::OpSizeAsBits(ElementSize) - 1, Tmp);

      // Shift it to the correct location
-      Tmp = _Lshl(IR::SizeToOpSize(ElementSize), Tmp, _Constant(i));
+      Tmp = _Lshl(ElementSize, Tmp, _Constant(i));

      // Or it with the current value
      CurrentVal = _Or(OpSize::i64Bit, CurrentVal, Tmp);
@ -755,7 +755,7 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, IR::OpSize ElementSize) {

 void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = SrcSize == OpSize::i256Bit;
  const auto ExtractSize = Is256Bit ? OpSize::i32Bit : OpSize::i16Bit;

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -767,7 +767,7 @@ void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) {
  // Since we also handle the MM MOVMSKB here too,
  // we need to clamp the lower bound.
  const auto VAdd1Size = std::max(SrcSize, OpSize::i128Bit);
-  const auto VAdd2Size = std::max(IR::DivideOpSize(SrcSize, 2), OpSize::i64Bit);
+  const auto VAdd2Size = std::max(SrcSize >> 1, OpSize::i64Bit);

  auto VAdd1 = _VAddP(VAdd1Size, OpSize::i8Bit, VAnd, VAnd);
  auto VAdd2 = _VAddP(VAdd2Size, OpSize::i8Bit, VAdd1, VAdd1);
@ -790,7 +790,7 @@ void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize) {

 void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -819,8 +819,7 @@ void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize) {

 void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
-
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;
  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);

@ -852,7 +851,7 @@ Ref OpDispatchBuilder::GeneratePSHUFBMask(IR::OpSize SrcSize) {
 }

 Ref OpDispatchBuilder::PSHUFBOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, Ref MaskVector) {
-  const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = SrcSize == OpSize::i256Bit;

  // We perform the 256-bit version as two 128-bit operations due to
  // the lane splitting behavior, so cap the maximum size at 16.
@ -1173,7 +1172,7 @@ void OpDispatchBuilder::PSHUFDOp(OpcodeArgs) {

 void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, IR::OpSize ElementSize, bool Low) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = SrcSize == OpSize::i256Bit;
  auto Shuffle = Op->Src[1].Literal();

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -1195,7 +1194,7 @@ void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, IR::OpSize ElementSize, bool Low)
  if (Is256Bit) {
    for (size_t i = 0; i < 4; i++) {
      const auto Index = Shuffle & 0b11;
-      const auto UpperLaneOffset = Core::CPUState::XMM_SSE_REG_SIZE / ElementSize;
+      const auto UpperLaneOffset = IR::NumElements(OpSize::i128Bit, ElementSize);

      const auto LowDstIndex = BaseElement + i;
      const auto LowSrcIndex = BaseElement + Index;
@ -1224,10 +1223,10 @@ Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize Ele
  // Since 256-bit variants and up don't lane cross, we can construct
  // everything in terms of the 128-variant, as each lane is essentially
  // its own 128-bit segment.
-  const uint8_t NumElements = Core::CPUState::XMM_SSE_REG_SIZE / ElementSize;
+  const uint8_t NumElements = IR::NumElements(OpSize::i128Bit, ElementSize);
  const uint8_t HalfNumElements = NumElements >> 1;

-  const bool Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const bool Is256Bit = DstSize == OpSize::i256Bit;

  std::array<Ref, 4> Srcs {};
  for (size_t i = 0; i < HalfNumElements; ++i) {
@ -1248,7 +1247,7 @@ Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize Ele
      // AVX differs the behavior of VSHUFPD and VSHUFPS.
      // The same immediate bits are used for both lanes with VSHUFPS,
      // but VSHUFPD uses different immediate bits for each lane.
-      const auto SrcIndex2 = ElementSize == 4 ? SrcIndex1 : ((Shuffle >> 2) & SelectionMask);
+      const auto SrcIndex2 = ElementSize == OpSize::i32Bit ? SrcIndex1 : ((Shuffle >> 2) & SelectionMask);

      Ref Insert = _VInsElement(DstSize, ElementSize, Element, SrcIndex1, Dest, Srcs[Element]);
      Dest = _VInsElement(DstSize, ElementSize, Element + NumElements, SrcIndex2 + NumElements, Insert, Srcs[Element]);
@ -1442,7 +1441,7 @@ void OpDispatchBuilder::VANDNOp(OpcodeArgs) {
 template<IROps IROp, IR::OpSize ElementSize>
 void OpDispatchBuilder::VHADDPOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = SrcSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -1485,7 +1484,7 @@ void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs, IR::OpSize ElementSize) {
 Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                                   const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) {
  const auto Size = OpSizeFromDst(Op);
-  const auto NumElements = Size / ElementSize;
+  const auto NumElements = IR::NumElements(Size, ElementSize);
  const uint64_t Index = Imm.Literal() & (NumElements - 1);
  Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, Size, Op->Flags);

@ -1608,7 +1607,7 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs, IR::OpSize ElementSize) {
  }

  // AVX version only operates on 128-bit.
-  const uint8_t NumElements = std::min<uint8_t>(GetSrcSize(Op), OpSize::i128Bit) / OverridenElementSize;
+  const uint8_t NumElements = IR::NumElements(std::min(OpSizeFromSrc(Op), OpSize::i128Bit), OverridenElementSize);
  Index &= NumElements - 1;

  if (Op->Dest.IsGPR()) {
@ -1649,8 +1648,8 @@ void OpDispatchBuilder::VEXTRACT128Op(OpcodeArgs) {
 Ref OpDispatchBuilder::PSIGNImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  const auto Size = OpSizeFromSrc(Op);

-  Ref Control = _VSQSHL(Size, ElementSize, Src2, (ElementSize * 8) - 1);
-  Control = _VSRSHR(Size, ElementSize, Control, (ElementSize * 8) - 1);
+  Ref Control = _VSQSHL(Size, ElementSize, Src2, IR::OpSizeAsBits(ElementSize) - 1);
+  Control = _VSRSHR(Size, ElementSize, Control, IR::OpSizeAsBits(ElementSize) - 1);
  return _VMul(Size, ElementSize, Src1, Control);
 }

@ -1725,7 +1724,7 @@ void OpDispatchBuilder::PSRLI(OpcodeArgs, IR::OpSize ElementSize) {

 void OpDispatchBuilder::VPSRLIOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;
  const uint64_t ShiftConstant = Op->Src[1].Literal();

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -1848,7 +1847,7 @@ void OpDispatchBuilder::PSRLDQ(OpcodeArgs) {
  Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
  Ref Result = LoadZeroVector(Size);

-  if (Shift < Size) {
+  if (Shift < IR::OpSizeToSize(Size)) {
    Result = _VExtr(Size, OpSize::i8Bit, Result, Dest, Shift);
  }
  StoreResult(FPRClass, Op, Result, OpSize::iInvalid);
@ -1856,7 +1855,7 @@ void OpDispatchBuilder::PSRLDQ(OpcodeArgs) {

 void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = DstSize == OpSize::i128Bit;
  const uint64_t Shift = Op->Src[1].Literal();

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -1872,7 +1871,7 @@ void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) {
    Result = LoadZeroVector(DstSize);

    if (Is128Bit) {
-      if (Shift < DstSize) {
+      if (Shift < IR::OpSizeToSize(DstSize)) {
        Result = _VExtr(DstSize, OpSize::i8Bit, Result, Src, Shift);
      }
    } else {
@ -1899,8 +1898,8 @@ void OpDispatchBuilder::PSLLDQ(OpcodeArgs) {

  Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
  Ref Result = LoadZeroVector(Size);
-  if (Shift < Size) {
-    Result = _VExtr(Size, OpSize::i8Bit, Dest, Result, Size - Shift);
+  if (Shift < IR::OpSizeToSize(Size)) {
+    Result = _VExtr(Size, OpSize::i8Bit, Dest, Result, IR::OpSizeToSize(Size) - Shift);
  }

  StoreResult(FPRClass, Op, Result, OpSize::iInvalid);
@ -1908,7 +1907,8 @@ void OpDispatchBuilder::PSLLDQ(OpcodeArgs) {

 void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto DstSizeInt = IR::OpSizeToSize(DstSize);
+  const auto Is128Bit = DstSize == OpSize::i128Bit;
  const uint64_t Shift = Op->Src[1].Literal();

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -1922,13 +1922,13 @@ void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) {
  } else {
    Result = LoadZeroVector(DstSize);
    if (Is128Bit) {
-      if (Shift < DstSize) {
-        Result = _VExtr(DstSize, OpSize::i8Bit, Src, Result, DstSize - Shift);
+      if (Shift < DstSizeInt) {
+        Result = _VExtr(DstSize, OpSize::i8Bit, Src, Result, DstSizeInt - Shift);
      }
    } else {
      if (Shift < Core::CPUState::XMM_SSE_REG_SIZE) {
        Ref ResultBottom = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Result, 16 - Shift);
-        Ref ResultTop = _VExtr(DstSize, OpSize::i8Bit, Src, Result, DstSize - Shift);
+        Ref ResultTop = _VExtr(DstSize, OpSize::i8Bit, Src, Result, DstSizeInt - Shift);

        Result = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ResultBottom, ResultTop);
      }
@ -1954,7 +1954,7 @@ void OpDispatchBuilder::PSRAIOp(OpcodeArgs, IR::OpSize ElementSize) {
 void OpDispatchBuilder::VPSRAIOp(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t Shift = Op->Src[1].Literal();
  const auto Size = OpSizeFromDst(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Result = Src;
@ -2008,7 +2008,7 @@ void OpDispatchBuilder::MOVDDUPOp(OpcodeArgs) {
 void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto IsSrcGPR = Op->Src[0].IsGPR();
-  const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = SrcSize == OpSize::i256Bit;
  const auto MemSize = Is256Bit ? OpSize::i256Bit : OpSize::i64Bit;

  Ref Src = IsSrcGPR ? LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags) :
@ -2112,7 +2112,7 @@ Ref OpDispatchBuilder::Vector_CVT_Int_To_FloatImpl(OpcodeArgs, IR::OpSize SrcEle
  auto ElementSize = SrcElementSize;
  if (Widen) {
    Src = _VSXTL(Size, ElementSize, Src);
-    ElementSize = IR::MultiplyOpSize(ElementSize, 2);
+    ElementSize = ElementSize << 1;
  }

  return _Vector_SToF(Size, ElementSize, Src);
@ -2143,8 +2143,8 @@ Ref OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, IR::OpSize SrcEle
  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);

  if (Narrow) {
-    Src = _Vector_FToF(DstSize, IR::DivideOpSize(SrcElementSize, 2), Src, SrcElementSize);
-    ElementSize = IR::DivideOpSize(ElementSize, 2);
+    Src = _Vector_FToF(DstSize, SrcElementSize >> 1, Src, SrcElementSize);
+    ElementSize = ElementSize >> 1;
  }

  if (HostRoundingMode) {
@ -2236,17 +2236,17 @@ void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElem
  const auto SrcSize = OpSizeFromSrc(Op);

  const auto IsFloatSrc = SrcElementSize == OpSize::i32Bit;
-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

-  const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2) : SrcSize;
+  const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? (SrcSize >> 1) : SrcSize;

  Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags);

  Ref Result {};
  if (DstElementSize > SrcElementSize) {
-    Result = _Vector_FToF(SrcSize, IR::MultiplyOpSize(SrcElementSize, 2), Src, SrcElementSize);
+    Result = _Vector_FToF(SrcSize, SrcElementSize << 1, Src, SrcElementSize);
  } else {
-    Result = _Vector_FToF(SrcSize, IR::DivideOpSize(SrcElementSize, 2), Src, SrcElementSize);
+    Result = _Vector_FToF(SrcSize, SrcElementSize >> 1, Src, SrcElementSize);
  }

  if (IsAVX) {
@ -2269,7 +2269,7 @@ void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);

  Src = _VSXTL(DstSize, ElementSize, Src);
-  ElementSize = IR::MultiplyOpSize(ElementSize, 2);
+  ElementSize = ElementSize << 1;

  // Always signed
  Src = _Vector_SToF(DstSize, ElementSize, Src);
@ -2294,8 +2294,8 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) {
  const auto Size = OpSizeFromDst(Op);

  if (Narrow) {
-    Src = _Vector_FToF(Size, IR::DivideOpSize(SrcElementSize, 2), Src, SrcElementSize);
-    ElementSize = IR::DivideOpSize(ElementSize, 2);
+    Src = _Vector_FToF(Size, SrcElementSize >> 1, Src, SrcElementSize);
+    ElementSize = ElementSize >> 1;
  }

  if constexpr (HostRoundingMode) {
@ -2816,7 +2816,7 @@ Ref OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand
  const auto DstSize = OpSizeFromDst(Op);
  const auto SanitizedDstSize = std::min(DstSize, OpSize::i128Bit);

-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Index = Imm.Literal();

  Ref Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags);
@ -2830,7 +2830,7 @@ Ref OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand
  }
  Ref Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags);

-  if (Index >= (SanitizedDstSize * 2)) {
+  if (Index >= (IR::OpSizeToSize(SanitizedDstSize) * 2)) {
    // If the immediate is greater than both vectors combined then it zeroes the vector
    return LoadZeroVector(DstSize);
  }
@ -2891,7 +2891,7 @@ template void OpDispatchBuilder::PACKUSOp<OpSize::i32Bit>(OpcodeArgs);

 void OpDispatchBuilder::VPACKUSOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -2919,7 +2919,7 @@ template void OpDispatchBuilder::PACKSSOp<OpSize::i32Bit>(OpcodeArgs);

 void OpDispatchBuilder::VPACKSSOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -2954,7 +2954,7 @@ Ref OpDispatchBuilder::PMULLOpImpl(OpSize Size, IR::OpSize ElementSize, bool Sig

 template<IR::OpSize ElementSize, bool Signed>
 void OpDispatchBuilder::PMULLOp(OpcodeArgs) {
-  static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit");
+  static_assert(ElementSize == OpSize::i32Bit, "Currently only handles 32-bit -> 64-bit");

  Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -2968,7 +2968,7 @@ template void OpDispatchBuilder::PMULLOp<OpSize::i32Bit, true>(OpcodeArgs);

 template<IR::OpSize ElementSize, bool Signed>
 void OpDispatchBuilder::VPMULLOp(OpcodeArgs) {
-  static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit");
+  static_assert(ElementSize == OpSize::i32Bit, "Currently only handles 32-bit -> 64-bit");

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -3124,15 +3124,15 @@ void OpDispatchBuilder::PMULHRWOp(OpcodeArgs) {

  // Implementation is more efficient for 8byte registers
  // Multiplies 4 16bit values in to 4 32bit values
-  Res = _VSMull(IR::MultiplyOpSize(Size, 2), OpSize::i16Bit, Dest, Src);
+  Res = _VSMull(Size << 1, OpSize::i16Bit, Dest, Src);

  // Load 0x0000_8000 in to each 32-bit element.
  Ref VConstant = _VectorImm(OpSize::i128Bit, OpSize::i32Bit, 0x80, 8);

-  Res = _VAdd(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, VConstant);
+  Res = _VAdd(Size << 1, OpSize::i32Bit, Res, VConstant);

  // Now shift and narrow to convert 32-bit values to 16bit, storing the top 16bits
-  Res = _VUShrNI(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, 16);
+  Res = _VUShrNI(Size << 1, OpSize::i32Bit, Res, 16);

  StoreResult(FPRClass, Op, Res, OpSize::iInvalid);
 }
@ -3177,7 +3177,7 @@ Ref OpDispatchBuilder::PMADDWDOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) {

  if (Size == OpSize::i64Bit) {
    // MMX implementation can be slightly more optimal
-    Size = IR::DivideOpSize(Size, 2);
+    Size = Size >> 1;
    auto MullResult = _VSMull(Size, OpSize::i16Bit, Src1, Src2);
    return _VAddP(Size, OpSize::i32Bit, MullResult, MullResult);
  }
@ -3211,7 +3211,7 @@ void OpDispatchBuilder::VPMADDWDOp(OpcodeArgs) {

 Ref OpDispatchBuilder::PMADDUBSWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) {
  if (Size == OpSize::i64Bit) {
-    const auto MultSize = IR::MultiplyOpSize(Size, 2);
+    const auto MultSize = Size << 1;
    // 64bit is more efficient

    // Src1 is unsigned
@ -3314,11 +3314,11 @@ Ref OpDispatchBuilder::PMULHRSWOpImpl(OpSize Size, Ref Src1, Ref Src2) {
  Ref Res {};
  if (Size == OpSize::i64Bit) {
    // Implementation is more efficient for 8byte registers
-    Res = _VSMull(IR::MultiplyOpSize(Size, 2), OpSize::i16Bit, Src1, Src2);
-    Res = _VSShrI(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, 14);
-    auto OneVector = _VectorImm(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, 1);
-    Res = _VAdd(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, OneVector);
-    return _VUShrNI(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, 1);
+    Res = _VSMull(Size << 1, OpSize::i16Bit, Src1, Src2);
+    Res = _VSShrI(Size << 1, OpSize::i32Bit, Res, 14);
+    auto OneVector = _VectorImm(Size << 1, OpSize::i32Bit, 1);
+    Res = _VAdd(Size << 1, OpSize::i32Bit, Res, OneVector);
+    return _VUShrNI(Size << 1, OpSize::i32Bit, Res, 1);
  } else {
    // 128-bit and 256-bit are less efficient
    Ref ResultLow;
@ -3375,7 +3375,7 @@ template void OpDispatchBuilder::HSUBP<OpSize::i64Bit>(OpcodeArgs);

 void OpDispatchBuilder::VHSUBPOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -3409,7 +3409,7 @@ template void OpDispatchBuilder::PHSUB<OpSize::i32Bit>(OpcodeArgs);

 void OpDispatchBuilder::VPHSUBOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -3441,7 +3441,7 @@ void OpDispatchBuilder::PHADDS(OpcodeArgs) {

 void OpDispatchBuilder::VPHADDSWOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = SrcSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -3476,7 +3476,7 @@ void OpDispatchBuilder::PHSUBS(OpcodeArgs) {

 void OpDispatchBuilder::VPHSUBSWOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -3497,13 +3497,13 @@ Ref OpDispatchBuilder::PSADBWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) {
  // but it actually operates in more than 8bit space
  // This can be seen with `abs(0 - 0xFF)` returning a different result depending
  // on bit length
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;

  if (Size == OpSize::i64Bit) {
-    auto AbsResult = _VUABDL(IR::MultiplyOpSize(Size, 2), OpSize::i8Bit, Src1, Src2);
+    auto AbsResult = _VUABDL(Size << 1, OpSize::i8Bit, Src1, Src2);

    // Now vector-wide add the results for each
-    return _VAddV(IR::MultiplyOpSize(Size, 2), OpSize::i16Bit, AbsResult);
+    return _VAddV(Size << 1, OpSize::i16Bit, AbsResult);
  }

  auto AbsResult_Low = _VUABDL(Size, OpSize::i8Bit, Src1, Src2);
@ -3558,7 +3558,7 @@ Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, IR::OpSize ElementSi
      return LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags);
    } else {
      // For memory operands the 256-bit variant loads twice the size specified in the table.
-      const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+      const auto Is256Bit = DstSize == OpSize::i256Bit;
      const auto SrcSize = OpSizeFromSrc(Op);
      const auto LoadSize = Is256Bit ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) * 2) : SrcSize;

@ -3569,8 +3569,7 @@ Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, IR::OpSize ElementSi
  Ref Src = GetSrc();
  Ref Result {Src};

-  for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize;
-       CurrentElementSize = IR::MultiplyOpSize(CurrentElementSize, 2)) {
+  for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize = CurrentElementSize << 1) {
    if (Signed) {
      Result = _VSXTL(DstSize, CurrentElementSize, Result);
    } else {
@ -3901,7 +3900,7 @@ void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize)
  //
  // To emulate this on AArch64
  // Arithmetic shift right by the element size, then use BSL to select the registers
-  Mask = _VSShrI(Size, ElementSize, Mask, (ElementSize * 8) - 1);
+  Mask = _VSShrI(Size, ElementSize, Mask, IR::OpSizeAsBits(ElementSize) - 1);

  auto Result = _VBSL(Size, Mask, Src, Dest);

@ -3910,7 +3909,7 @@ void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize)

 void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
-  const auto ElementSizeBits = ElementSize * 8;
+  const auto ElementSizeBits = IR::OpSizeAsBits(ElementSize);

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -3947,7 +3946,7 @@ void OpDispatchBuilder::PTestOpImpl(OpSize Size, Ref Dest, Ref Src) {
  // Set ZF according to Test1. SF will be zeroed since we do a 32-bit test on
  // the results of a 16-bit value from the UMaxV, so the 32-bit sign bit is
  // cleared even if the 16-bit scalars were negative.
-  SetNZ_ZeroCV(32, Test1);
+  SetNZ_ZeroCV(OpSize::i32Bit, Test1);
  SetCFInverted(Test2);
  ZeroPF_AF();
 }
@ -3962,7 +3961,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) {
 void OpDispatchBuilder::VTESTOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  InvalidateDeferredFlags();

-  const auto ElementSizeInBits = ElementSize * 8;
+  const auto ElementSizeInBits = IR::OpSizeAsBits(ElementSize);
  const auto MaskConstant = uint64_t {1} << (ElementSizeInBits - 1);

  Ref Mask = _VDupFromGPR(SrcSize, ElementSize, _Constant(MaskConstant));
@ -3985,7 +3984,7 @@ void OpDispatchBuilder::VTESTOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref
  Ref CFInv = _Select(IR::COND_NEQ, AndNotGPR, ZeroConst, OneConst, ZeroConst);

  // As in PTest, this sets Z appropriately while zeroing the rest of NZCV.
-  SetNZ_ZeroCV(32, AndGPR);
+  SetNZ_ZeroCV(OpSize::i32Bit, AndGPR);
  SetCFInverted(CFInv);
  ZeroPF_AF();
 }
@ -4083,7 +4082,7 @@ Ref OpDispatchBuilder::DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t

  // Now using the destination mask we choose where the result ends up
  // It can duplicate and zero results
-  if (ElementSize == 8) {
+  if (ElementSize == OpSize::i64Bit) {
    switch (DstMask) {
    case 0b01:
      // Dest[63:0] = Result
@ -4105,7 +4104,7 @@ Ref OpDispatchBuilder::DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t
    auto BadPath = [&]() {
      Ref Result = ZeroVec;

-      for (size_t i = 0; i < (DstSize / ElementSize); ++i) {
+      for (size_t i = 0; i < IR::NumElements(DstSize, ElementSize); ++i) {
        const auto Bit = 1U << (i % 4);

        if ((DstMask & Bit) != 0) {
@ -4127,13 +4126,13 @@ Ref OpDispatchBuilder::DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t
      // Dest[63:32]  = Result
      // Dest[95:64]  = Zero
      // Dest[127:96] = Zero
-      return _VZip(IR::DivideOpSize(DstSize, 2), ElementSize, ZeroVec, Temp);
+      return _VZip(DstSize >> 1, ElementSize, ZeroVec, Temp);
    case 0b0011:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Result
      // Dest[95:64]  = Zero
      // Dest[127:96] = Zero
-      return _VDupElement(IR::DivideOpSize(DstSize, 2), ElementSize, Temp, 0);
+      return _VDupElement(DstSize >> 1, ElementSize, Temp, 0);
    case 0b0100:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Zero
@ -4251,7 +4250,7 @@ Ref OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand&
  Ref Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V);

  // Now we zero out elements based on src mask
-  for (size_t i = 0; i < (DstSize / ElementSize); ++i) {
+  for (size_t i = 0; i < IR::NumElements(DstSize, ElementSize); ++i) {
    const auto Bit = 1U << (i % 4);

    if ((SrcMask & Bit) == 0) {
@ -4272,7 +4271,7 @@ Ref OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand&
  // It can duplicate and zero results
  Ref Result = ZeroVec;

-  for (size_t i = 0; i < (DstSize / ElementSize); ++i) {
+  for (size_t i = 0; i < IR::NumElements(DstSize, ElementSize); ++i) {
    const auto Bit = 1U << (i % 4);

    if ((DstMask & Bit) != 0) {
@ -4285,17 +4284,17 @@ Ref OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand&

 template<IR::OpSize ElementSize>
 void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
-  const auto DstSize = GetDstSize(Op);
+  const auto DstSize = OpSizeFromDst(Op);

  Ref Result {};
-  if (ElementSize == 4 && DstSize == Core::CPUState::XMM_AVX_REG_SIZE) {
+  if (ElementSize == OpSize::i32Bit && DstSize == OpSize::i256Bit) {
    // 256-bit DPPS isn't handled by the 128-bit solution.
    Result = VDPPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]);
  } else {
    Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
    Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);

-    Result = DPPOpImpl(OpSizeFromDst(Op), Src1, Src2, Op->Src[2].Literal(), ElementSize);
+    Result = DPPOpImpl(DstSize, Src1, Src2, Op->Src[2].Literal(), ElementSize);
  }

  // We don't need to emit a _VMov to clear the upper lane, since DPPOpImpl uses a zero vector
@ -4306,7 +4305,7 @@ void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
 template void OpDispatchBuilder::VDPPOp<OpSize::i32Bit>(OpcodeArgs);
 template void OpDispatchBuilder::VDPPOp<OpSize::i64Bit>(OpcodeArgs);

-Ref OpDispatchBuilder::MPSADBWOpImpl(size_t SrcSize, Ref Src1, Ref Src2, uint8_t Select) {
+Ref OpDispatchBuilder::MPSADBWOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, uint8_t Select) {
  const auto LaneHelper = [&, this](uint32_t Selector_Src1, uint32_t Selector_Src2, Ref Src1, Ref Src2) {
    // Src2 will grab a 32bit element and duplicate it across the 128bits
    Ref DupSrc = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Src2, Selector_Src2);
@ -4373,7 +4372,7 @@ Ref OpDispatchBuilder::MPSADBWOpImpl(size_t SrcSize, Ref Src1, Ref Src2, uint8_t
    return _VAddP(OpSize::i128Bit, OpSize::i16Bit, TmpTranspose1, TmpTranspose2);
  };

-  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  // Src1 needs to be in byte offset
  const uint8_t Select_Src1_Low = ((Select & 0b100) >> 2) * 32 / 8;
@ -4395,7 +4394,7 @@ Ref OpDispatchBuilder::MPSADBWOpImpl(size_t SrcSize, Ref Src1, Ref Src2, uint8_t

 void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) {
  const uint8_t Select = Op->Src[1].Literal();
-  const uint8_t SrcSize = GetSrcSize(Op);
+  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);

@ -4405,7 +4404,7 @@ void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) {

 void OpDispatchBuilder::VMPSADBWOp(OpcodeArgs) {
  const uint8_t Select = Op->Src[2].Literal();
-  const uint8_t SrcSize = GetSrcSize(Op);
+  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);

@ -4463,7 +4462,7 @@ void OpDispatchBuilder::VCVTPS2PHOp(OpcodeArgs) {

  // We need to eliminate upper junk if we're storing into a register with
  // a 256-bit source (VCVTPS2PH's destination for registers is an XMM).
-  if (Op->Src[0].IsGPR() && SrcSize == Core::CPUState::XMM_AVX_REG_SIZE) {
+  if (Op->Src[0].IsGPR() && SrcSize == OpSize::i256Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }

@ -4617,7 +4616,7 @@ Ref OpDispatchBuilder::VBLENDOpImpl(IR::OpSize VecSize, IR::OpSize ElementSize,

 void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Selector = Op->Src[2].Literal();

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -4642,7 +4641,7 @@ void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) {

 void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Selector = Op->Src[2].Literal();

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -4686,7 +4685,7 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) {

 void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = DstSize == OpSize::i128Bit;
  const auto Selector = Op->Src[2].Literal();

  Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -4718,7 +4717,7 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) {

 void OpDispatchBuilder::VZEROOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto IsVZEROALL = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto IsVZEROALL = DstSize == OpSize::i256Bit;
  const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;

  if (IsVZEROALL) {
@ -4743,7 +4742,7 @@ void OpDispatchBuilder::VZEROOp(OpcodeArgs) {

 void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Selector = Op->Src[1].Literal() & 0xFF;

  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
@ -4780,7 +4779,7 @@ Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, IR::OpSize ElementSize,
  //       The only difference here is that we need to add 16 to the upper lane
  //       before doing the final addition to build up the indices for TBL.

-  const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = DstSize == OpSize::i256Bit;
  auto IsPD = ElementSize == OpSize::i64Bit;

  if (IsPD) {
@ -4856,7 +4855,7 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask
    // While the control bit immediate for the instruction itself is only ever 8 bits
    // in size, we use it as a 16-bit value so that we can use the 8th bit to signify
    // whether or not RAX and RDX should be interpreted as a 64-bit value.
-    const auto SrcSize = GetSrcSize(Op);
+    const auto SrcSize = OpSizeFromSrc(Op);
    const auto Is64Bit = SrcSize == OpSize::i64Bit;
    const auto NewControl = uint16_t(Control | (uint16_t(Is64Bit) << 8));

@ -4935,7 +4934,7 @@ void OpDispatchBuilder::VPCMPISTRMOp(OpcodeArgs) {

 void OpDispatchBuilder::VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const auto Size = OpSizeFromDst(Op);
-  const auto Is256Bit = Size == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = Size == OpSize::i256Bit;

  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

@ -4964,7 +4963,7 @@ void OpDispatchBuilder::VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Sr

 void OpDispatchBuilder::VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const auto Size = OpSizeFromDst(Op);
-  const auto Is256Bit = Size == Core::CPUState::XMM_AVX_REG_SIZE;
+  const auto Is256Bit = Size == OpSize::i256Bit;

  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

@ -5024,20 +5023,20 @@ void OpDispatchBuilder::VPGATHER(OpcodeArgs) {
  LOGMAN_THROW_A_FMT(AddrElementSize == OpSize::i32Bit || AddrElementSize == OpSize::i64Bit, "Unknown address element size");

  const auto Size = OpSizeFromDst(Op);
-  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
+  const auto Is128Bit = Size == OpSize::i128Bit;

  ///< Element size is determined by W flag.
  const OpSize ElementLoadSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  // We only need the high address register if the number of data elements is more than what the low half can consume.
  // But also the number of address elements is clamped by the destination size as well.
-  const size_t NumDataElements = Size / ElementLoadSize;
-  const size_t NumAddrElementBytes = std::min<size_t>(Size, (NumDataElements * AddrElementSize));
-  const bool Needs128BitHighAddrBytes = NumAddrElementBytes > OpSize::i128Bit;
+  const size_t NumDataElements = IR::NumElements(Size, ElementLoadSize);
+  const size_t NumAddrElementBytes = std::min<size_t>(IR::OpSizeToSize(Size), (NumDataElements * IR::OpSizeToSize(AddrElementSize)));
+  const bool Needs128BitHighAddrBytes = NumAddrElementBytes > IR::OpSizeToSize(OpSize::i128Bit);

  auto VSIB = LoadVSIB(Op, Op->Src[0], Op->Flags);

-  const bool SupportsSVELoad = (VSIB.Scale == 1 || VSIB.Scale == AddrElementSize) && (AddrElementSize == ElementLoadSize);
+  const bool SupportsSVELoad = (VSIB.Scale == 1 || VSIB.Scale == IR::OpSizeToSize(AddrElementSize)) && (AddrElementSize == ElementLoadSize);

  Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
  Ref Mask = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
@ -5067,7 +5066,7 @@ void OpDispatchBuilder::VPGATHER(OpcodeArgs) {
      }
    }

-    auto Result128 = AVX128_VPGatherImpl(SizeToOpSize(Size), ElementLoadSize, AddrElementSize, Dest128, Mask128, VSIB128);
+    auto Result128 = AVX128_VPGatherImpl(Size, ElementLoadSize, AddrElementSize, Dest128, Mask128, VSIB128);
    // The registers are current split, need to merge them.
    Result = _VInsElement(OpSize::i256Bit, OpSize::i128Bit, 1, 0, Result128.Low, Result128.High);
  } else {
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
@ -103,7 +103,7 @@ void OpDispatchBuilder::FILD(OpcodeArgs) {

  // Sign extend to 64bits
  if (ReadWidth != OpSize::i64Bit) {
-    Data = _Sbfe(OpSize::i64Bit, ReadWidth * 8, 0, Data);
+    Data = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(ReadWidth), 0, Data);
  }

  // We're about to clobber flags to grab the sign, so save NZCV.
@ -351,33 +351,33 @@ void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) {
    _StoreMem(GPRClass, Size, Mem, FCW, Size);
  }

-  { _StoreMem(GPRClass, Size, ReconstructFSW_Helper(), Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); }
+  { _StoreMem(GPRClass, Size, ReconstructFSW_Helper(), Mem, _Constant(IR::OpSizeToSize(Size) * 1), Size, MEM_OFFSET_SXTX, 1); }

  auto ZeroConst = _Constant(0);

  {
    // FTW
-    _StoreMem(GPRClass, Size, GetX87FTW_Helper(), Mem, _Constant(Size * 2), Size, MEM_OFFSET_SXTX, OpSize::i8Bit);
+    _StoreMem(GPRClass, Size, GetX87FTW_Helper(), Mem, _Constant(IR::OpSizeToSize(Size) * 2), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Instruction Offset
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 3), Size, MEM_OFFSET_SXTX, OpSize::i8Bit);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 3), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Instruction CS selector (+ Opcode)
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 4), Size, MEM_OFFSET_SXTX, OpSize::i8Bit);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 4), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Data pointer offset
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 5), Size, MEM_OFFSET_SXTX, OpSize::i8Bit);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 5), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Data pointer selector
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 6), Size, MEM_OFFSET_SXTX, OpSize::i8Bit);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 6), Size, MEM_OFFSET_SXTX, 1);
  }
 }

@ -407,13 +407,13 @@ void OpDispatchBuilder::X87LDENV(OpcodeArgs) {
  auto NewFCW = _LoadMem(GPRClass, OpSize::i16Bit, Mem, OpSize::i16Bit);
  _StoreContext(OpSize::i16Bit, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));

-  Ref MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1));
+  Ref MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(IR::OpSizeToSize(Size) * 1));
  auto NewFSW = _LoadMem(GPRClass, Size, MemLocation, Size);
  ReconstructX87StateFromFSW_Helper(NewFSW);

  {
    // FTW
-    Ref MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2));
+    Ref MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(IR::OpSizeToSize(Size) * 2));
    SetX87FTW(_LoadMem(GPRClass, Size, MemLocation, Size));
  }
 }
@ -447,58 +447,58 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) {
    _StoreMem(GPRClass, Size, Mem, FCW, Size);
  }

-  { _StoreMem(GPRClass, Size, ReconstructFSW_Helper(), Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, 1); }
+  { _StoreMem(GPRClass, Size, ReconstructFSW_Helper(), Mem, _Constant(IR::OpSizeToSize(Size) * 1), Size, MEM_OFFSET_SXTX, 1); }

  auto ZeroConst = _Constant(0);

  {
    // FTW
-    _StoreMem(GPRClass, Size, GetX87FTW_Helper(), Mem, _Constant(Size * 2), Size, MEM_OFFSET_SXTX, 1);
+    _StoreMem(GPRClass, Size, GetX87FTW_Helper(), Mem, _Constant(IR::OpSizeToSize(Size) * 2), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Instruction Offset
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 3), Size, MEM_OFFSET_SXTX, 1);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 3), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Instruction CS selector (+ Opcode)
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 4), Size, MEM_OFFSET_SXTX, 1);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 4), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Data pointer offset
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 5), Size, MEM_OFFSET_SXTX, 1);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 5), Size, MEM_OFFSET_SXTX, 1);
  }

  {
    // Data pointer selector
-    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 6), Size, MEM_OFFSET_SXTX, 1);
+    _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(IR::OpSizeToSize(Size) * 6), Size, MEM_OFFSET_SXTX, 1);
  }

  auto OneConst = _Constant(1);
  auto SevenConst = _Constant(7);
  const auto LoadSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;
  for (int i = 0; i < 7; ++i) {
-    Ref data = _LoadContextIndexed(Top, LoadSize, MMBaseOffset(), OpSize::i128Bit, FPRClass);
+    Ref data = _LoadContextIndexed(Top, LoadSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit), FPRClass);
    if (ReducedPrecisionMode) {
      data = _F80CVTTo(data, OpSize::i64Bit);
    }
-    _StoreMem(FPRClass, OpSize::i128Bit, data, Mem, _Constant((Size * 7) + (10 * i)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
+    _StoreMem(FPRClass, OpSize::i128Bit, data, Mem, _Constant((IR::OpSizeToSize(Size) * 7) + (10 * i)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
    Top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, Top, OneConst), SevenConst);
  }

  // The final st(7) needs a bit of special handling here
-  Ref data = _LoadContextIndexed(Top, LoadSize, MMBaseOffset(), OpSize::i128Bit, FPRClass);
+  Ref data = _LoadContextIndexed(Top, LoadSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit), FPRClass);
  if (ReducedPrecisionMode) {
    data = _F80CVTTo(data, OpSize::i64Bit);
  }
  // ST7 broken in to two parts
  // Lower 64bits [63:0]
  // upper 16 bits [79:64]
-  _StoreMem(FPRClass, OpSize::i64Bit, data, Mem, _Constant((Size * 7) + (7 * 10)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
+  _StoreMem(FPRClass, OpSize::i64Bit, data, Mem, _Constant((IR::OpSizeToSize(Size) * 7) + (7 * 10)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
  auto topBytes = _VDupElement(OpSize::i128Bit, OpSize::i16Bit, data, 4);
-  _StoreMem(FPRClass, OpSize::i16Bit, topBytes, Mem, _Constant((Size * 7) + (7 * 10) + 8), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
+  _StoreMem(FPRClass, OpSize::i16Bit, topBytes, Mem, _Constant((IR::OpSizeToSize(Size) * 7) + (7 * 10) + 8), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);

  // reset to default
  FNINIT(Op);
@ -522,11 +522,11 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) {
    _SetRoundingMode(roundingMode, false, roundingMode);
  }

-  auto NewFSW = _LoadMem(GPRClass, Size, Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, 1);
+  auto NewFSW = _LoadMem(GPRClass, Size, Mem, _Constant(IR::OpSizeToSize(Size) * 1), Size, MEM_OFFSET_SXTX, 1);
  Ref Top = ReconstructX87StateFromFSW_Helper(NewFSW);
  {
    // FTW
-    SetX87FTW(_LoadMem(GPRClass, Size, Mem, _Constant(Size * 2), Size, MEM_OFFSET_SXTX, 1));
+    SetX87FTW(_LoadMem(GPRClass, Size, Mem, _Constant(IR::OpSizeToSize(Size) * 2), Size, MEM_OFFSET_SXTX, 1));
  }

  auto OneConst = _Constant(1);
@ -538,14 +538,14 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) {
  Mask = _VInsGPR(OpSize::i128Bit, OpSize::i64Bit, 1, Mask, high);
  const auto StoreSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;
  for (int i = 0; i < 7; ++i) {
-    Ref Reg = _LoadMem(FPRClass, OpSize::i128Bit, Mem, _Constant((Size * 7) + (10 * i)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
+    Ref Reg = _LoadMem(FPRClass, OpSize::i128Bit, Mem, _Constant((IR::OpSizeToSize(Size) * 7) + (10 * i)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
    // Mask off the top bits
    Reg = _VAnd(OpSize::i128Bit, OpSize::i128Bit, Reg, Mask);
    if (ReducedPrecisionMode) {
      // Convert to double precision
      Reg = _F80CVT(OpSize::i64Bit, Reg);
    }
-    _StoreContextIndexed(Reg, Top, StoreSize, MMBaseOffset(), OpSize::i128Bit, FPRClass);
+    _StoreContextIndexed(Reg, Top, StoreSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit), FPRClass);

    Top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, Top, OneConst), SevenConst);
  }
@ -554,13 +554,14 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) {
  // ST7 broken in to two parts
  // Lower 64bits [63:0]
  // upper 16 bits [79:64]
-  Ref Reg = _LoadMem(FPRClass, OpSize::i64Bit, Mem, _Constant((Size * 7) + (10 * 7)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
-  Ref RegHigh = _LoadMem(FPRClass, OpSize::i16Bit, Mem, _Constant((Size * 7) + (10 * 7) + 8), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
+  Ref Reg = _LoadMem(FPRClass, OpSize::i64Bit, Mem, _Constant((IR::OpSizeToSize(Size) * 7) + (10 * 7)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
+  Ref RegHigh =
+    _LoadMem(FPRClass, OpSize::i16Bit, Mem, _Constant((IR::OpSizeToSize(Size) * 7) + (10 * 7) + 8), OpSize::i8Bit, MEM_OFFSET_SXTX, 1);
  Reg = _VInsElement(OpSize::i128Bit, OpSize::i16Bit, 4, 0, Reg, RegHigh);
  if (ReducedPrecisionMode) {
    Reg = _F80CVT(OpSize::i64Bit, Reg); // Convert to double precision
  }
-  _StoreContextIndexed(Reg, Top, StoreSize, MMBaseOffset(), OpSize::i128Bit, FPRClass);
+  _StoreContextIndexed(Reg, Top, StoreSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit), FPRClass);
 }

 // Load / Store Control Word
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
@ -36,12 +36,12 @@ void OpDispatchBuilder::X87LDENVF64(OpcodeArgs) {
  _SetRoundingMode(roundingMode, false, roundingMode);
  _StoreContext(OpSize::i16Bit, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));

-  auto NewFSW = _LoadMem(GPRClass, Size, Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, 1);
+  auto NewFSW = _LoadMem(GPRClass, Size, Mem, _Constant(IR::OpSizeToSize(Size)), Size, MEM_OFFSET_SXTX, 1);
  ReconstructX87StateFromFSW_Helper(NewFSW);

  {
    // FTW
-    SetX87FTW(_LoadMem(GPRClass, Size, Mem, _Constant(Size * 2), Size, MEM_OFFSET_SXTX, 1));
+    SetX87FTW(_LoadMem(GPRClass, Size, Mem, _Constant(IR::OpSizeToSize(Size) * 2), Size, MEM_OFFSET_SXTX, 1));
  }
 }

@ -97,7 +97,7 @@ void OpDispatchBuilder::FILDF64(OpcodeArgs) {
  // Read from memory
  Ref Data = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], ReadWidth, Op->Flags);
  if (ReadWidth == OpSize::i16Bit) {
-    Data = _Sbfe(OpSize::i64Bit, ReadWidth * 8, 0, Data);
+    Data = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(ReadWidth), 0, Data);
  }
  auto ConvertedData = _Float_FromGPR_S(OpSize::i64Bit, ReadWidth == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, Data);
  _PushStack(ConvertedData, Data, ReadWidth, false);
@ -117,9 +117,9 @@ void OpDispatchBuilder::FISTF64(OpcodeArgs, bool Truncate) {

  Ref data = _ReadStackValue(0);
  if (Truncate) {
-    data = _Float_ToGPR_ZS(Size == 4 ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
+    data = _Float_ToGPR_ZS(Size == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
  } else {
-    data = _Float_ToGPR_S(Size == 4 ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
+    data = _Float_ToGPR_S(Size == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
  }
  StoreResult_WithOpSize(GPRClass, Op, Op->Dest, data, Size, OpSize::i8Bit);

@ -339,7 +339,7 @@ void OpDispatchBuilder::FCOMIF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpD
        if (Width == OpSize::i16Bit) {
          arg = _Sbfe(OpSize::i64Bit, 16, 0, arg);
        }
-        b = _Float_FromGPR_S(OpSize::i64Bit, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, arg);
+        b = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg);
      } else if (Width == OpSize::i32Bit) {
        arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
        b = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg);
--- a/FEXCore/Source/Interface/IR/IR.h
+++ b/FEXCore/Source/Interface/IR/IR.h
@ -548,7 +548,7 @@ protected:

 // This must directly match bytes to the named opsize.
 // Implicit sized IR operations does math to get between sizes.
-enum OpSize : uint8_t {
+enum class OpSize : uint8_t {
  iUnsized = 0,
  i8Bit = 1,
  i16Bit = 2,
@ -615,14 +615,18 @@ static inline uint16_t OpSizeAsBits(IR::OpSize Size) {
  return IR::OpSizeToSize(Size) * 8u;
 }

-static inline OpSize MultiplyOpSize(IR::OpSize Size, uint8_t Multiplier) {
+template<typename T>
+requires (std::is_integral_v<T>)
+static inline OpSize operator<<(IR::OpSize Size, T Shift) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
-  return IR::SizeToOpSize(IR::OpSizeToSize(Size) * Multiplier);
+  return IR::SizeToOpSize(IR::OpSizeToSize(Size) << Shift);
 }

-static inline OpSize DivideOpSize(IR::OpSize Size, uint8_t Divisor) {
+template<typename T>
+requires (std::is_integral_v<T>)
+static inline OpSize operator>>(IR::OpSize Size, T Shift) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
-  return IR::SizeToOpSize(IR::OpSizeToSize(Size) / Divisor);
+  return IR::SizeToOpSize(IR::OpSizeToSize(Size) >> Shift);
 }

 static inline OpSize operator/(IR::OpSize Size, IR::OpSize Divisor) {
@ -630,7 +634,9 @@ static inline OpSize operator/(IR::OpSize Size, IR::OpSize Divisor) {
  return IR::SizeToOpSize(IR::OpSizeToSize(Size) / IR::OpSizeToSize(Divisor));
 }

-static inline OpSize operator/(IR::OpSize Size, uint8_t Divisor) {
+template<typename T>
+requires (std::is_integral_v<T>)
+static inline OpSize operator/(IR::OpSize Size, T Divisor) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
  return IR::SizeToOpSize(IR::OpSizeToSize(Size) / Divisor);
 }
--- a/FEXCore/Source/Interface/IR/IR.json
+++ b/FEXCore/Source/Interface/IR/IR.json
@ -736,7 +736,7 @@
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "EmitValidation": [
-          "Offset % RegisterSize == 0",
+          "Offset % IR::OpSizeToSize(RegisterSize) == 0",
          "RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit"
        ]
      },
@ -748,7 +748,7 @@
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "EmitValidation": [
-          "Offset % RegisterSize == 0",
+          "Offset % IR::OpSizeToSize(RegisterSize) == 0",
          "RegisterSize == FEXCore::IR::OpSize::i128Bit"
        ]
      },
@ -760,7 +760,7 @@
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "EmitValidation": [
-          "Offset % RegisterSize == 0",
+          "Offset % IR::OpSizeToSize(RegisterSize) == 0",
          "RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit"
        ]
      }
@ -2017,7 +2017,7 @@
        "TiedSource": 0,
        "Desc": "Unsigned shifts right each element and then narrows to the next lower element size",
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },

      "FPR = VUShrNI2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$BitShift": {
@ -2026,73 +2026,73 @@
                 "Inserts results in to the high elements of the first argument"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSXTL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": "Sign extends elements from the source element size to the next size up",
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VSXTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Sign extends elements from the source element size to the next size up",
                 "Source elements come from the upper half of the register"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VSSHLL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift{0}": {
        "Desc": "Sign extends elements from the source element size to the next size up",
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VSSHLL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift{0}": {
        "Desc": ["Sign extends elements from the source element size to the next size up",
                 "Source elements come from the upper half of the register"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VUXTL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": "Zero extends elements from the source element size to the next size up",
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VUXTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Zero extends elements from the source element size to the next size up",
                 "Source elements come from the upper half of the register"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VSQXTN OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSQXTN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSQXTNPair OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "Desc": ["Does both VSQXTN and VSQXTN2 in a combined operation."
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSQXTUN OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSQXTUN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSQXTUNPair OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "Desc": ["Does both VSQXTUN and VSQXTUN2 in a combined operation."
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize >> 1)"
      },
      "FPR = VSRSHR OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "Desc": ["Signed rounding shift right by immediate",
@ -2271,24 +2271,24 @@
      },
      "FPR = VUMull OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VSMull OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": [ "Does a signed integer multiply with extend.",
                  "ElementSize is the source size"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VUMull2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Multiplies the high elements with size extension",
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VSMull2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Multiplies the high elements with size extension",
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VUMulH OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Wide unsigned multiply returning the high results",
@ -2305,14 +2305,14 @@
        "Desc": ["Unsigned Absolute Difference Long"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VUABDL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": ["Unsigned Absolute Difference Long",
                 "Using the high elements of the source vectors"
                ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))"
+        "NumElements": "RegisterSize / (ElementSize << 1)"
      },
      "FPR = VUShl OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
        "TiedSource": 0,
@ -2580,7 +2580,7 @@
          "Selecting from the high half of the register."
        ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::MultiplyOpSize(ElementSize, 2))",
+        "NumElements": "RegisterSize / (ElementSize << 1)",
        "EmitValidation": [
          "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\""
        ]
@ -2594,7 +2594,7 @@
          "F64->F32, F32->F16"
        ],
        "DestSize": "RegisterSize",
-        "NumElements": "RegisterSize / (IR::DivideOpSize(ElementSize, 2))",
+        "NumElements": "RegisterSize / (ElementSize >> 1)",
        "EmitValidation": [
          "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\""
        ]
--- a/FEXCore/Source/Interface/IR/IRDumper.cpp
+++ b/FEXCore/Source/Interface/IR/IRDumper.cpp
@ -112,17 +112,17 @@ static void PrintArg(fextl::stringstream* out, const IRListView* IR, OrderedNode
  }

  if (GetHasDest(IROp->Op)) {
-    uint32_t ElementSize = IROp->ElementSize;
-    uint32_t NumElements = IROp->Size;
-    if (!IROp->ElementSize) {
+    auto ElementSize = IROp->ElementSize;
+    uint32_t NumElements = 0;
+    if (IROp->ElementSize == OpSize::iUnsized) {
      ElementSize = IROp->Size;
    }

-    if (ElementSize) {
-      NumElements /= ElementSize;
+    if (ElementSize != OpSize::iUnsized) {
+      NumElements = IR::NumElements(IROp->Size, ElementSize);
    }

-    *out << " i" << std::dec << (ElementSize * 8);
+    *out << " i" << std::dec << IR::OpSizeAsBits(ElementSize);

    if (NumElements > 1) {
      *out << "v" << std::dec << NumElements;
@ -296,11 +296,11 @@ void Dump(fextl::stringstream* out, const IRListView* IR, IR::RegisterAllocation

          auto ElementSize = IROp->ElementSize;
          uint8_t NumElements = 0;
-          if (!IROp->ElementSize) {
+          if (IROp->ElementSize != OpSize::iUnsized) {
            ElementSize = IROp->Size;
          }

-          if (ElementSize) {
+          if (ElementSize != OpSize::iUnsized) {
            NumElements = IR::NumElements(IROp->Size, ElementSize);
          }

@ -324,7 +324,7 @@ void Dump(fextl::stringstream* out, const IRListView* IR, IR::RegisterAllocation
            }
          }

-          *out << " i" << std::dec << (ElementSize * 8);
+          *out << " i" << std::dec << IR::OpSizeAsBits(ElementSize);

          if (NumElements > 1) {
            *out << "v" << std::dec << NumElements;
@ -334,16 +334,16 @@ void Dump(fextl::stringstream* out, const IRListView* IR, IR::RegisterAllocation
        } else {

          auto ElementSize = IROp->ElementSize;
-          if (!IROp->ElementSize) {
+          if (IROp->ElementSize == OpSize::iUnsized) {
            ElementSize = IROp->Size;
          }
          uint32_t NumElements = 0;
-          if (ElementSize) {
+          if (ElementSize != OpSize::iUnsized) {
            NumElements = IR::NumElements(IROp->Size, ElementSize);
          }

          *out << "(%" << std::dec << ID << ' ';
-          *out << 'i' << std::dec << (ElementSize * 8);
+          *out << 'i' << std::dec << IR::OpSizeAsBits(ElementSize);
          if (NumElements > 1) {
            *out << 'v' << std::dec << NumElements;
          }
--- a/FEXCore/Source/Interface/IR/IREmitter.h
+++ b/FEXCore/Source/Interface/IR/IREmitter.h
@ -71,19 +71,18 @@ public:
    return _Jump(InvalidNode);
  }
  IRPair<IROp_CondJump> _CondJump(Ref ssa0, CondClassType cond = {COND_NEQ}) {
-    return _CondJump(ssa0, _Constant(0), InvalidNode, InvalidNode, cond, IR::SizeToOpSize(GetOpSize(ssa0)));
+    return _CondJump(ssa0, _Constant(0), InvalidNode, InvalidNode, cond, GetOpSize(ssa0));
  }
  IRPair<IROp_CondJump> _CondJump(Ref ssa0, Ref ssa1, Ref ssa2, CondClassType cond = {COND_NEQ}) {
-    return _CondJump(ssa0, _Constant(0), ssa1, ssa2, cond, IR::SizeToOpSize(GetOpSize(ssa0)));
+    return _CondJump(ssa0, _Constant(0), ssa1, ssa2, cond, GetOpSize(ssa0));
  }
  // TODO: Work to remove this implicit sized Select implementation.
-  IRPair<IROp_Select> _Select(uint8_t Cond, Ref ssa0, Ref ssa1, Ref ssa2, Ref ssa3, uint8_t CompareSize = 0) {
-    if (CompareSize == 0) {
-      CompareSize = std::max<uint8_t>(4, std::max<uint8_t>(GetOpSize(ssa0), GetOpSize(ssa1)));
+  IRPair<IROp_Select> _Select(uint8_t Cond, Ref ssa0, Ref ssa1, Ref ssa2, Ref ssa3, IR::OpSize CompareSize = OpSize::iUnsized) {
+    if (CompareSize == OpSize::iUnsized) {
+      CompareSize = std::max(OpSize::i32Bit, std::max(GetOpSize(ssa0), GetOpSize(ssa1)));
    }

-    return _Select(IR::SizeToOpSize(std::max<uint8_t>(4, std::max<uint8_t>(GetOpSize(ssa2), GetOpSize(ssa3)))),
-                   IR::SizeToOpSize(CompareSize), CondClassType {Cond}, ssa0, ssa1, ssa2, ssa3);
+    return _Select(std::max(OpSize::i32Bit, std::max(GetOpSize(ssa2), GetOpSize(ssa3))), CompareSize, CondClassType {Cond}, ssa0, ssa1, ssa2, ssa3);
  }
  IRPair<IROp_LoadMem> _LoadMem(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref ssa0, IR::OpSize Align = OpSize::i8Bit) {
    return _LoadMem(Class, Size, ssa0, Invalid(), Align, MEM_OFFSET_SXTX, 1);
--- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
+++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
@ -29,7 +29,7 @@ $end_info$
 namespace FEXCore::IR {

 uint64_t getMask(IROp_Header* Op) {
-  uint64_t NumBits = Op->Size * 8;
+  uint64_t NumBits = IR::OpSizeAsBits(Op->Size);
  return (~0ULL) >> (64 - NumBits);
 }

@ -91,7 +91,7 @@ private:
    // We don't allow 8/16-bit operations to have constants, since no
    // constant would be in bounds after the JIT's 24/16 shift.
    auto Filter = [&IROp](uint64_t X) {
-      return ARMEmitter::IsImmAddSub(X) && IROp->Size >= 4;
+      return ARMEmitter::IsImmAddSub(X) && IROp->Size >= OpSize::i32Bit;
    };

    return InlineIf(IREmit, CurrentIR, CodeNode, IROp, Index, Filter);
@ -112,7 +112,7 @@ private:
    IsSIMM9 &= (SupportsTSOImm9 || !TSO);

    // Extended offsets for regular loadstore only.
-    bool IsExtended = (Imm & (IROp->Size - 1)) == 0 && Imm / IROp->Size <= 4095;
+    bool IsExtended = (Imm & (IR::OpSizeToSize(IROp->Size) - 1)) == 0 && Imm / IR::OpSizeToSize(IROp->Size) <= 4095;
    IsExtended &= !TSO;

    if (IsSIMM9 || IsExtended) {
@ -204,7 +204,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
    /* IsImmAddSub assumes the constants are sign-extended, take care of that
     * here so we get the optimization for 32-bit adds too.
     */
-    if (Op->Header.Size == 4) {
+    if (Op->Header.Size == OpSize::i32Bit) {
      Constant1 = (int64_t)(int32_t)Constant1;
      Constant2 = (int64_t)(int32_t)Constant2;
    }
@ -290,12 +290,12 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
    }

    if (!Replaced) {
-      InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IROp->Size * 8); });
+      InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IR::OpSizeAsBits(IROp->Size)); });
    }
    break;
  }
  case OP_OR: {
-    InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IROp->Size * 8); });
+    InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IR::OpSizeAsBits(IROp->Size)); });
    break;
  }
  case OP_XOR: {
@ -325,7 +325,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
      }

      if (!Replaced) {
-        InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IROp->Size * 8); });
+        InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IR::OpSizeAsBits(IROp->Size)); });
      }
    }
    break;
@ -333,7 +333,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
  case OP_ANDWITHFLAGS:
  case OP_ANDN:
  case OP_TESTNZ: {
-    InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IROp->Size * 8); });
+    InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, [&IROp](uint64_t X) { return IsImmLogical(X, IR::OpSizeAsBits(IROp->Size)); });
    break;
  }
  case OP_NEG: {
@ -356,7 +356,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current

    if (IREmit->IsValueConstant(IROp->Args[0], &Constant1) && IREmit->IsValueConstant(IROp->Args[1], &Constant2)) {
      // Shifts mask the shift amount by 63 or 31 depending on operating size;
-      uint64_t ShiftMask = IROp->Size == 8 ? 63 : 31;
+      uint64_t ShiftMask = IROp->Size == OpSize::i64Bit ? 63 : 31;
      uint64_t NewConstant = (Constant1 << (Constant2 & ShiftMask)) & getMask(IROp);
      IREmit->ReplaceWithConstant(CodeNode, NewConstant);
    } else if (IREmit->IsValueConstant(IROp->Args[1], &Constant2) && Constant2 == 0) {
@ -384,7 +384,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
    auto Op = IROp->C<IR::IROp_Bfe>();
    uint64_t Constant;

-    if (IROp->Size <= 8 && IREmit->IsValueConstant(Op->Src, &Constant)) {
+    if (IROp->Size <= OpSize::i64Bit && IREmit->IsValueConstant(Op->Src, &Constant)) {
      uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1);
      SourceMask <<= Op->lsb;

@ -400,7 +400,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
    if (IREmit->IsValueConstant(Op->Src, &Constant)) {
      // SBFE of a constant can be converted to a constant.
      uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1);
-      uint64_t DestSizeInBits = IROp->Size * 8;
+      uint64_t DestSizeInBits = IR::OpSizeAsBits(IROp->Size);
      uint64_t DestMask = DestSizeInBits == 64 ? ~0ULL : ((1ULL << DestSizeInBits) - 1);
      SourceMask <<= Op->lsb;

@ -424,11 +424,11 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
      uint64_t NewConstant = SourceMask << Op->lsb;

      if (ConstantSrc & 1) {
-        auto orr = IREmit->_Or(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(IROp->Args[0]), IREmit->_Constant(NewConstant));
+        auto orr = IREmit->_Or(IROp->Size, CurrentIR.GetNode(IROp->Args[0]), IREmit->_Constant(NewConstant));
        IREmit->ReplaceAllUsesWith(CodeNode, orr);
      } else {
        // We are wanting to clear the bitfield.
-        auto andn = IREmit->_Andn(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(IROp->Args[0]), IREmit->_Constant(NewConstant));
+        auto andn = IREmit->_Andn(IROp->Size, CurrentIR.GetNode(IROp->Args[0]), IREmit->_Constant(NewConstant));
        IREmit->ReplaceAllUsesWith(CodeNode, andn);
      }
    }
@ -596,7 +596,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
  case OP_SELECT: {
    InlineIf(IREmit, CurrentIR, CodeNode, IROp, 1, ARMEmitter::IsImmAddSub);

-    uint64_t AllOnes = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;
+    uint64_t AllOnes = IROp->Size == OpSize::i64Bit ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;

    uint64_t Constant2 {};
    uint64_t Constant3 {};
@ -614,7 +614,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
    // We always allow source 1 to be zero, but source 0 can only be a
    // special 1/~0 constant if source 1 is 0.
    if (InlineIfZero(IREmit, CurrentIR, CodeNode, IROp, 1)) {
-      uint64_t AllOnes = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;
+      uint64_t AllOnes = IROp->Size == OpSize::i64Bit ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;
      InlineIf(IREmit, CurrentIR, CodeNode, IROp, 0, [&AllOnes](uint64_t X) { return X == 1 || X == AllOnes; });
    }
    break;
@ -632,7 +632,7 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
        auto EO = NewRIP->C<IR::IROp_EntrypointOffset>();
        IREmit->SetWriteCursor(CurrentIR.GetNode(Op->NewRIP));

-        IREmit->ReplaceNodeArgument(CodeNode, 0, IREmit->_InlineEntrypointOffset(IR::SizeToOpSize(EO->Header.Size), EO->Offset));
+        IREmit->ReplaceNodeArgument(CodeNode, 0, IREmit->_InlineEntrypointOffset(EO->Header.Size, EO->Offset));
      }
    }
    break;
--- a/FEXCore/Source/Interface/IR/Passes/IRValidation.cpp
+++ b/FEXCore/Source/Interface/IR/Passes/IRValidation.cpp
@ -79,12 +79,12 @@ void IRValidation::Run(IREmitter* IREmit) {

    for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
      const auto ID = CurrentIR.GetID(CodeNode);
-      const uint8_t OpSize = IROp->Size;
+      const auto OpSize = IROp->Size;

      if (GetHasDest(IROp->Op)) {
-        HadError |= OpSize == 0;
+        HadError |= OpSize == IR::OpSize::iInvalid;
        // Does the op have a destination of size 0?
-        if (OpSize == 0) {
+        if (OpSize == IR::OpSize::iInvalid) {
          Errors << "%" << ID << ": Had destination but with no size" << std::endl;
        }

--- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp
+++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp
@ -521,7 +521,7 @@ void DeadFlagCalculationEliminination::FoldBranch(IREmitter* IREmit, IRListView&
    // Pattern match a branch fed by a compare. We could also handle bit tests
    // here, but tbz/tbnz has a limited offset range which we don't have a way to
    // deal with yet. Let's hope that's not a big deal.
-    if (!(Op->Cond == COND_NEQ || Op->Cond == COND_EQ) || (Prev->Size < 4)) {
+    if (!(Op->Cond == COND_NEQ || Op->Cond == COND_EQ) || (Prev->Size < OpSize::i32Bit)) {
      return;
    }

@ -534,7 +534,7 @@ void DeadFlagCalculationEliminination::FoldBranch(IREmitter* IREmit, IRListView&
    IREmit->ReplaceNodeArgument(CodeNode, 0, CurrentIR.GetNode(Prev->Args[0]));
    IREmit->ReplaceNodeArgument(CodeNode, 1, CurrentIR.GetNode(Prev->Args[1]));
    Op->FromNZCV = false;
-    Op->CompareSize = IR::SizeToOpSize(Prev->Size);
+    Op->CompareSize = Prev->Size;
  } else {
    return;
  }
@ -612,7 +612,7 @@ bool DeadFlagCalculationEliminination::ProcessBlock(IREmitter* IREmit, IRListVie
        // this flag is outside of the if, since the TestNZ might result from
        // optimizing AndWithFlags, and we need to converge locally in a single
        // iteration.
-        if (IROp->Op == OP_TESTNZ && IROp->Size < 4 && !(FlagsRead & (FLAG_N | FLAG_C))) {
+        if (IROp->Op == OP_TESTNZ && IROp->Size < OpSize::i32Bit && !(FlagsRead & (FLAG_N | FLAG_C))) {
          IROp->Op = OP_TESTZ;
        }

--- a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp
+++ b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp
@ -582,7 +582,7 @@ void ConstrainedRAPass::Run(IREmitter* IREmit_) {

            if (Reg.Class == FPRFixedClass) {
              IROp_Header* Header = IR->GetOp<IROp_Header>(Old);
-              Copy = IREmit->_VMov(IR::SizeToOpSize(Header->Size), Map(Old));
+              Copy = IREmit->_VMov(Header->Size, Map(Old));
            } else {
              Copy = IREmit->_Copy(Map(Old));
            }
--- a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
+++ b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
@ -731,7 +731,7 @@ void X87StackOptimization::Run(IREmitter* Emit) {
        } else {
          auto* SourceNode = CurrentIR.GetNode(Op->X80Src);
          auto* OriginalNode = CurrentIR.GetNode(Op->OriginalValue);
-          StackData.push(StackMemberInfo {SourceNode, OriginalNode, SizeToOpSize(Op->LoadSize), Op->Float});
+          StackData.push(StackMemberInfo {SourceNode, OriginalNode, Op->LoadSize, Op->Float});
        }
        break;
      }
@ -793,7 +793,7 @@ void X87StackOptimization::Run(IREmitter* Emit) {
        // or similar. As long as the source size and dest size are one and the same.
        // This will avoid any conversions between source and stack element size and conversion back.
        if (!SlowPath && Value->Source && Value->Source->first == Op->StoreSize && Value->InterpretAsFloat) {
-          IREmit->_StoreMem(Value->InterpretAsFloat ? FPRClass : GPRClass, IR::SizeToOpSize(Op->StoreSize), AddrNode, Value->Source->second);
+          IREmit->_StoreMem(Value->InterpretAsFloat ? FPRClass : GPRClass, Op->StoreSize, AddrNode, Value->Source->second);
        } else {
          if (ReducedPrecisionMode) {
            switch (Op->StoreSize) {
@ -826,7 +826,7 @@ void X87StackOptimization::Run(IREmitter* Emit) {
              auto DestAddr = IREmit->_Add(OpSize::i64Bit, AddrNode, GetConstant(8));
              IREmit->_StoreMem(GPRClass, OpSize::i16Bit, DestAddr, Upper, OpSize::i64Bit);
            } else {
-              IREmit->_StoreMem(FPRClass, IR::SizeToOpSize(Op->StoreSize), AddrNode, StackNode);
+              IREmit->_StoreMem(FPRClass, Op->StoreSize, AddrNode, StackNode);
            }
          }
        }