first commit

This commit is contained in:
MITSUNARI Shigeo 2010-04-16 10:33:04 +09:00
commit cbb4ca2178
45 changed files with 11548 additions and 0 deletions

47
COPYRIGHT Normal file
View File

@ -0,0 +1,47 @@
Copyright (c) 2007-2009 MITSUNARI Shigeo
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the copyright owner nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
す場合に限り、再頒布および使用が許可されます。
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
を含めること。
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
権表示、本条件一覧、および下記免責条項を含めること。
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
一切責任を負わないものとします。

17
Makefile Normal file
View File

@ -0,0 +1,17 @@
PREFIX=/usr/local
INSTALL_DIR=$(PREFIX)/include/xbyak
all:
make -C sample
clean:
make -C sample clean
install:
mkdir -p $(INSTALL_DIR)
cp -pR xbyak/*.h $(INSTALL_DIR)
uninstall:
rm -i $(INSTALL_DIR)/*.h
rmdir $(INSTALL_DIR)

17
gen/b2hex.cpp Normal file
View File

@ -0,0 +1,17 @@
#include <stdio.h>
int main()
{
puts("enum {");
for (int i = 0; i < 256; i++) {
printf(" B");
for (int j = 0; j < 8; j++) {
putchar(i & (1 << (7 - j)) ? '1' : '0');
}
printf("= %d", i);
if (i < 255) putchar(',');
putchar('\n');
}
puts("};");
return 0;
}

874
gen/gen_code.cpp Normal file
View File

@ -0,0 +1,874 @@
#define XBYAK_DONT_READ_LIST
#include <stdio.h>
#include <string.h>
#include "xbyak.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
using namespace Xbyak;
void put()
{
const int NO = CodeGenerator::NONE;
{
char buf[16];
unsigned int v = VERSION;
if (v & 0xF) {
sprintf(buf, "%d.%02X%x", v >> 12, (v >> 4) & 0xFF, v & 0xF);
} else {
sprintf(buf, "%d.%02X", v >> 12, (v >> 4) & 0xFF);
}
printf("const char *getVersionString() const { return \"%s\"; }\n", buf);
}
const int B = 1 << 0;
const int W = 1 << 1;
const int D = 1 << 2;
const int Q = 1 << 3;
{
const struct Tbl {
uint8 code;
const char *name;
} tbl[] = {
// MMX
{ B01101011, "packssdw" },
{ B01100011, "packsswb" },
{ B01100111, "packuswb" },
{ B11011011, "pand" },
{ B11011111, "pandn" },
{ B11110101, "pmaddwd" },
{ B11100100, "pmulhuw" },
{ B11100101, "pmulhw" },
{ B11010101, "pmullw" },
{ B11101011, "por" },
{ B01101000, "punpckhbw" },
{ B01101001, "punpckhwd" },
{ B01101010, "punpckhdq" },
{ B01100000, "punpcklbw" },
{ B01100001, "punpcklwd" },
{ B01100010, "punpckldq" },
{ B11101111, "pxor" },
// MMX2
{ B11100000, "pavgb" },
{ B11100011, "pavgw" },
{ B11101110, "pmaxsw" },
{ B11011110, "pmaxub" },
{ B11101010, "pminsw" },
{ B11011010, "pminub" },
{ B11110110, "psadbw" },
//
{ B11010100, "paddq" },
{ B11110100, "pmuludq" },
{ B11111011, "psubq" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n"
, p->name, p->code);
}
}
{
const struct Tbl {
uint8 code;
int mode;
const char *name;
} tbl[] = {
{ B11111100, B|W|D, "padd" },
{ B11101100, B|W , "padds" },
{ B11011100, B|W , "paddus" },
{ B01110100, B|W|D, "pcmpeq" },
{ B01100100, B|W|D, "pcmpgt" },
{ B11110000, W|D|Q, "psll" },
{ B11100000, W|D , "psra" },
{ B11010000, W|D|Q, "psrl" },
{ B11111000, B|W|D, "psub" },
{ B11101000, B|W , "psubs" },
{ B11011000, B|W , "psubus" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
static const char modTbl[][4] = {
"b", "w", "d", "q"
};
for (int j = 0; j < 4; j++) {
// B(0), W(1), D(2), Q(3)
if (!(p->mode & (1 << j))) continue;
printf("void %s%s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n"
, p->name, modTbl[j]
, p->code | j
);
}
}
}
{
const struct Tbl {
uint8 code;
int ext;
int mode;
const char *name;
} tbl[] = {
{ B01110000, 6, W|D|Q, "psll" },
{ B01110000, 4, W|D , "psra" },
{ B01110000, 2, W|D|Q, "psrl" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
static const char modTbl[][4] = {
"b", "w", "d", "q"
};
for (int j = 0; j < 4; j++) {
// B(0), W(1), D(2), Q(3)
if (!(p->mode & (1 << j))) continue;
printf("void %s%s(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x%02X, %d); }\n"
, p->name, modTbl[j]
, p->code | j
, p->ext
);
}
}
printf("void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", B01110011, 7);
printf("void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", B01110011, 3);
}
{
const struct Tbl {
uint8 code;
uint8 pref;
const char *name;
} tbl[] = {
{ B01110000, 0, "pshufw" },
{ B01110000, 0xF2, "pshuflw" },
{ B01110000, 0xF3, "pshufhw" },
{ B01110000, 0x66, "pshufd" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n" , p->name, p->code, p->pref);
}
}
{
const struct MmxTbl6 {
uint8 code; // for (reg, reg/[mem])
uint8 code2; // for ([mem], reg)
int pref;
const char *name;
} mmxTbl6[] = {
{ B01101111, B01111111, 0x66, "movdqa" },
{ B01101111, B01111111, 0xF3, "movdqu" },
// SSE2
{ B00101000, B00101001, NO, "movaps" },
{ B00010000, B00010001, 0xF3, "movss" },
{ B00010000, B00010001, NO, "movups" },
{ B00101000, B00101001, 0x66, "movapd" },
{ B00010000, B00010001, 0xF2, "movsd" },
{ B00010000, B00010001, 0x66, "movupd" },
};
for (int i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
const MmxTbl6 *p = &mmxTbl6[i];
printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n" , p->name, p->code, p->pref);
printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name);
if (p->pref != NO) printf("db(0x%02X); ", p->pref);
printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n" , p->code2);
}
}
{
enum {
PS = 1 << 0,
SS = 1 << 1,
PD = 1 << 2,
SD = 1 << 3
};
const struct {
int code;
const char *name;
} sufTbl[] = {
{ NO, "ps" },
{ 0xF3, "ss" },
{ 0x66, "pd" },
{ 0xF2, "sd" },
};
const struct Tbl {
uint8 code;
int mode;
const char *name;
bool hasImm;
} tbl[] = {
{ B01011000, PS|SS|PD|SD, "add" },
{ B01010101, PS|PD , "andn" },
{ B01010100, PS|PD , "and" },
{ B11000010, PS|SS|PD|SD, "cmp", true },
{ B01011110, PS|SS|PD|SD, "div" },
{ B01011111, PS|SS|PD|SD, "max" },
{ B01011101, PS|SS|PD|SD, "min" },
{ B01011001, PS|SS|PD|SD, "mul" },
{ B01010110, PS|PD , "or" },
{ B01010011, PS|SS , "rcp" },
{ B01010010, PS|SS , "rsqrt" },
{ B11000110, PS|PD , "shuf", true },
{ B01010001, PS|SS|PD|SD, "sqrt" },
{ B01011100, PS|SS|PD|SD, "sub" },
{ B00010101, PS|PD , "unpckh" },
{ B00010100, PS|PD , "unpckl" },
{ B01010111, PS|PD , "xor" },
//
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
for (int j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
if (!(p->mode & (1 << j))) continue;
if (p->hasImm) {
// don't change uint8 to int because NO is not in byte
printf("void %s%s(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code);
} else {
printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code);
}
}
}
}
{
// (XMM, XMM)
const struct Tbl {
uint8 code;
uint8 pref;
const char *name;
} tbl[] = {
{ B11110111, 0x66, "maskmovdqu" },
{ B00010010, 0 , "movhlps" },
{ B00010110, 0 , "movlhps" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name);
if (p->pref) printf("db(0x%02X); ", p->pref);
printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code);
}
}
{
// (XMM, XMM|MEM)
const struct Tbl {
uint8 code;
int pref;
const char *name;
} tbl[] = {
{ B01101101, 0x66, "punpckhqdq" },
{ B01101100, 0x66, "punpcklqdq" },
{ B00101111, NO , "comiss" },
{ B00101110, NO , "ucomiss" },
{ B00101111, 0x66, "comisd" },
{ B00101110, 0x66, "ucomisd" },
{ B01011010, 0x66, "cvtpd2ps" },
{ B01011010, NO , "cvtps2pd" },
{ B01011010, 0xF2, "cvtsd2ss" },
{ B01011010, 0xF3, "cvtss2sd" },
{ B11100110, 0xF2, "cvtpd2dq" },
{ B11100110, 0x66, "cvttpd2dq" },
{ B11100110, 0xF3, "cvtdq2pd" },
{ B01011011, 0x66, "cvtps2dq" },
{ B01011011, 0xF3, "cvttps2dq" },
{ B01011011, NO , "cvtdq2ps" },
// SSE3
{ B11010000, 0x66, "addsubpd" },
{ B11010000, 0xF2, "addsubps" },
{ B01111100, 0x66, "haddpd" },
{ B01111100, 0xF2, "haddps" },
{ B01111101, 0x66, "hsubpd" },
{ B01111101, 0xF2, "hsubps" },
{ B00010010, 0xF2, "movddup" },
{ B00010110, 0xF3, "movshdup" },
{ B00010010, 0xF3, "movsldup" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code, p->pref);
}
}
{
// special type
const struct Tbl {
uint8 code;
int pref;
const char *name;
const char *cond;
} tbl[] = {
{ B00101010, NO , "cvtpi2ps", "isXMM_MMXorMEM" },
{ B00101101, NO , "cvtps2pi", "isMMX_XMMorMEM" },
{ B00101010, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" },
{ B00101101, 0xF3, "cvtss2si", "isREG32_XMMorMEM" },
{ B00101100, NO , "cvttps2pi", "isMMX_XMMorMEM" },
{ B00101100, 0xF3, "cvttss2si", "isREG32_XMMorMEM" },
{ B00101010, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" },
{ B00101101, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" },
{ B00101010, 0xF2, "cvtsi2sd", "isXMM_REG32orMEM" },
{ B00101101, 0xF2, "cvtsd2si", "isREG32_XMMorMEM" },
{ B00101100, 0x66, "cvttpd2pi", "isMMX_XMMorMEM" },
{ B00101100, 0xF2, "cvttsd2si", "isREG32_XMMorMEM" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& reg, const Operand& op) { opGen(reg, op, 0x%02X, 0x%02X, %s); }\n", p->name, p->code, p->pref, p->cond);
}
}
{
// prefetch
const struct Tbl {
int ext;
const char *name;
} tbl[] = {
{ 1, "t0" },
{ 2, "t1" },
{ 3, "t2" },
{ 0, "nta" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void prefetch%s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, B00011000); }\n", p->name, p->ext);
}
}
{
const struct Tbl {
uint8 code;
int pref;
const char *name;
} tbl[] = {
{ B00010110, NO, "movhps" },
{ B00010010, NO, "movlps" },
{ B00010110, 0x66, "movhpd" },
{ B00010010, 0x66, "movlpd" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref);
}
}
{
// cmov
const struct Tbl {
uint8 ext;
const char *name;
} tbl[] = {
{ 0, "o" },
{ 1, "no" },
{ 2, "b" },
{ 2, "nae" },
{ 3, "nb" },
{ 3, "ae" },
{ 4, "e" },
{ 4, "z" },
{ 5, "ne" },
{ 5, "nz" },
{ 6, "be" },
{ 6, "na" },
{ 7, "nbe" },
{ 7, "a" },
{ 8, "s" },
{ 9, "ns" },
{ 10, "p" },
{ 10, "pe" },
{ 11, "np" },
{ 11, "po" },
{ 12, "l" },
{ 12, "nge" },
{ 13, "nl" },
{ 13, "ge" },
{ 14, "le" },
{ 14, "ng" },
{ 15, "nle" },
{ 15, "g" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void cmov%s(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | %d); }\n", p->name, p->ext);
printf("void j%s(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }\n", p->name, p->ext | B01110000, p->ext | B10000000, 0x0F);
printf("void set%s(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | %d); }\n", p->name, p->ext);
}
}
////////////////////////////////////////////////////////////////
{
const struct Tbl {
const char *name;
uint8 code1;
uint8 code2;
uint8 code3;
} tbl[] = {
// only 64-bit mode(from)
{ "cdqe", B01001000, B10011000 },
{ "@@@" }, /// here
// only 32-bit mode(from)
{ "aaa", B00110111 },
{ "aad", B11010101, B00001010 },
{ "aam", B11010100, B00001010 },
{ "aas", B00111111 },
{ "daa", B00100111 },
{ "das", B00101111 },
{ "popad", B01100001 },
{ "popfd", B10011101 },
{ "pusha", B01100000 },
{ "pushad", B01100000 },
{ "pushfd", B10011100 },
{ "popa", B01100001 },
{ "@@@" }, /// here
{ "cbw", 0x66, B10011000 },
{ "cdq", B10011001 },
{ "clc", B11111000 },
{ "cld", B11111100 },
{ "cli", B11111010 },
{ "cmc", B11110101 },
{ "cpuid", 0x0F, B10100010 },
{ "cwd", 0x66, B10011001 },
{ "cwde", B10011000 },
{ "lahf", B10011111 },
{ "lock", B11110000 },
{ "nop", B10010000 },
{ "sahf", B10011110 },
{ "stc", B11111001 },
{ "std", B11111101 },
{ "sti", B11111011 },
{ "emms", 0x0F, B01110111 },
{ "pause", 0xF3, B10010000 },
{ "sfence", 0x0F, B10101110, B11111000 },
{ "lfence", 0x0F, B10101110, B11101000 },
{ "mfence", 0x0F, B10101110, B11110000 },
{ "monitor", 0x0F, B00000001, B11001000 },
{ "mwait", 0x0F, B00000001, B11001001 },
{ "rdmsr", 0x0F, B00110010 },
{ "rdpmc", 0x0F, B00110011 },
{ "rdtsc", 0x0F, B00110001 },
{ "wait", B10011011 },
{ "wbinvd", 0x0F, B00001001 },
{ "wrmsr", 0x0F, B00110000 },
{ "xlatb", 0xD7 },
{ "popf", B10011101 },
{ "pushf", B10011100 },
// FPU
{ "f2xm1", 0xD9, 0xF0 },
{ "fabs", 0xD9, 0xE1 },
{ "faddp", 0xDE, 0xC1 },
{ "fchs", 0xD9, 0xE0 },
{ "fcom", 0xD8, 0xD1 },
{ "fcomp", 0xD8, 0xD9 },
{ "fcompp", 0xDE, 0xD9 },
{ "fcos", 0xD9, 0xFF },
{ "fdecstp", 0xD9, 0xF6 },
{ "fdivp", 0xDE, 0xF9 },
{ "fdivrp", 0xDE, 0xF1 },
{ "fincstp", 0xD9, 0xF7 },
{ "fld1", 0xD9, 0xE8 },
{ "fldl2t", 0xD9, 0xE9 },
{ "fldl2e", 0xD9, 0xEA },
{ "fldpi", 0xD9, 0xEB },
{ "fldlg2", 0xD9, 0xEC },
{ "fldln2", 0xD9, 0xED },
{ "fldz", 0xD9, 0xEE },
{ "fmulp", 0xDE, 0xC9 },
{ "fnop", 0xD9, 0xD0 },
{ "fpatan", 0xD9, 0xF3 },
{ "fprem", 0xD9, 0xF8 },
{ "fprem1", 0xD9, 0xF5 },
{ "fptan", 0xD9, 0xF2 },
{ "frndint", 0xD9, 0xFC },
{ "fscale", 0xD9, 0xFD },
{ "fsin", 0xD9, 0xFE },
{ "fsincos", 0xD9, 0xFB },
{ "fsqrt", 0xD9, 0xFA },
{ "fsubp", 0xDE, 0xE9 },
{ "fsubrp", 0xDE, 0xE1 },
{ "ftst", 0xD9, 0xE4 },
{ "fucom", 0xDD, 0xE1 },
{ "fucomp", 0xDD, 0xE9 },
{ "fucompp", 0xDA, 0xE9 },
{ "fxam", 0xD9, 0xE5 },
{ "fxch", 0xD9, 0xC9 },
{ "fxtract", 0xD9, 0xF4 },
{ "fyl2x", 0xD9, 0xF1 },
{ "fyl2xp1", 0xD9, 0xF9 },
};
printf("#ifdef XBYAK64\n");
bool inOnly64Bit = true;
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
if (strcmp(p->name, "@@@") == 0) {
if (inOnly64Bit) {
printf("#else\n");
inOnly64Bit = false;
} else {
printf("#endif\n");
}
continue;
}
printf("void %s() { db(0x%02X); ", p->name, p->code1);
if (p->code2) printf("db(0x%02X); ", p->code2);
if (p->code3) printf("db(0x%02X); ", p->code3);
printf("}\n");
}
}
{
const struct Tbl {
uint8 code; // (reg, reg)
uint8 ext; // (reg, imm)
const char *name;
} tbl[] = {
{ B00010000, 2, "adc" },
{ B00000000, 0, "add" },
{ B00100000, 4, "and" },
{ B00111000, 7, "cmp" },
{ B00001000, 1, "or" },
{ B00011000, 3, "sbb" },
{ B00101000, 5, "sub" },
{ B00110000, 6, "xor" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code);
printf("void %s(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
}
}
{
const struct Tbl {
uint8 code;
uint8 ext;
const char *name;
} tbl[] = {
{ B01001000, 1, "dec" },
{ B01000000, 0, "inc" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op) { opIncDec(op, 0x%02X, %d); }\n", p->name, p->code, p->ext);
}
}
{
const struct Tbl {
uint8 code;
uint8 ext;
const char *name;
} tbl[] = {
{ B11110110, 6, "div" },
{ B11110110, 7, "idiv" },
{ B11110110, 5, "imul" },
{ B11110110, 4, "mul" },
{ B11110110, 3, "neg" },
{ B11110110, 2, "not" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op) { opR_ModM(op, 0, 3, %d, 0x%02X); }\n", p->name, p->ext, p->code);
}
}
{
const struct Tbl {
const char *name;
uint8 ext;
} tbl[] = {
{ "rcl", 2 },
{ "rcr", 3 },
{ "rol", 0 },
{ "ror", 1 },
{ "sar", 7 },
{ "shl", 4 },
{ "shr", 5 },
{ "sal", 4 },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext);
printf("void %s(const Operand& op, const Reg8& cl) { opShift(op, cl, %d); }\n", p->name, p->ext);
}
}
{
const struct Tbl {
const char *name;
uint8 code;
} tbl[] = {
{ "shld", B10100100 },
{ "shrd", B10101100 },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code);
printf("void %s(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0x%02X, &cl); }\n", p->name, p->code);
}
}
{
const struct Tbl {
const char *name;
uint8 code;
} tbl[] = {
{ "bsf", B10111100 },
{ "bsr", B10111101 },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x%02X); }\n", p->name, p->code);
}
}
// SSSE3
{
const struct Tbl {
uint8 code;
const char *name;
} tbl[] = {
{ 0x00, "pshufb" },
{ 0x01, "phaddw" },
{ 0x02, "phaddd" },
{ 0x03, "phaddsw" },
{ 0x04, "pmaddubsw" },
{ 0x05, "phsubw" },
{ 0x06, "phsubd" },
{ 0x07, "phsubsw" },
{ 0x08, "psignb" },
{ 0x09, "psignw" },
{ 0x0a, "psignd" },
{ 0x0b, "pmulhrsw" },
{ 0x1c, "pabsb" },
{ 0x1d, "pabsw" },
{ 0x1e, "pabsd" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
int preCode = 0x38;
printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, %d, 0x38); }\n", p->name, p->code, NO);
}
printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }\n");
}
// SSE4
{
const struct Tbl {
uint8 code;
const char *name;
} tbl[] = {
// SSE4.1
{ 0x15, "blendvpd" },
{ 0x14, "blendvps" },
{ 0x2B, "packusdw" },
{ 0x10, "pblendvb" },
{ 0x29, "pcmpeqq" },
{ 0x17, "ptest" },
{ 0x20, "pmovsxbw" },
{ 0x21, "pmovsxbd" },
{ 0x22, "pmovsxbq" },
{ 0x23, "pmovsxwd" },
{ 0x24, "pmovsxwq" },
{ 0x25, "pmovsxdq" },
{ 0x30, "pmovzxbw" },
{ 0x31, "pmovzxbd" },
{ 0x32, "pmovzxbq" },
{ 0x33, "pmovzxwd" },
{ 0x34, "pmovzxwq" },
{ 0x35, "pmovzxdq" },
{ 0x38, "pminsb" },
{ 0x39, "pminsd" },
{ 0x3A, "pminuw" },
{ 0x3B, "pminud" },
{ 0x3C, "pmaxsb" },
{ 0x3D, "pmaxsd" },
{ 0x3E, "pmaxuw" },
{ 0x3F, "pmaxud" },
{ 0x28, "pmuldq" },
{ 0x40, "pmulld" },
{ 0x41, "phminposuw"},
// SSE4.2
{ 0x37, "pcmpgtq" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, %d, 0x38); }\n", p->name, p->code, NO);
}
}
{
const struct Tbl {
uint8 code;
const char *name;
} tbl[] = {
// SSE4.1
{ 0x0D, "blendpd" },
{ 0x0C, "blendps" },
{ 0x41, "dppd" },
{ 0x40, "dpps" },
{ 0x42, "mpsadbw" },
{ 0x0E, "pblendw" },
{ 0x08, "roundps" },
{ 0x09, "roundpd" },
{ 0x0A, "roundss" },
{ 0x0B, "roundsd" },
// SSE4.2
{ 0x60, "pcmpestrm" },
{ 0x61, "pcmpestri" },
{ 0x62, "pcmpistrm" },
{ 0x63, "pcmpistri" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }\n", p->name, p->code);
}
}
{
const struct Tbl {
uint8 code;
uint8 ext;
const char *name;
} tbl[] = {
{ B10101110, 2, "ldmxcsr" },
{ B10101110, 3, "stmxcsr" },
{ B10101110, 7, "clflush" }, // 0x80 is bug of nasm ?
// { B10000000, 7, "clflush" }, // 0x80 is bug of nasm ?
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, 0x%02X); }\n", p->name, p->ext, p->code);
}
}
{
const struct Tbl {
uint8 code;
const char *name;
} tbl[] = {
{ B00101011, "movntpd" },
{ B11100111, "movntdq" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
// cast xmm register to 16bit register to put 0x66
printf("void %s(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x%02X); }\n", p->name, p->code);
}
}
{
const struct Tbl {
uint8 code;
const char *name;
} tbl[] = {
{ B10111110, "movsx" },
{ B10110110, "movzx" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code);
}
}
{
const struct Tbl {
uint8 m16;
uint8 m32;
uint8 m64;
uint8 ext;
const char *name;
uint8 m64ext;
} tbl[] = {
{ 0x00, 0xD8, 0xDC, 0, "fadd" },
{ 0xDE, 0xDA, 0x00, 0, "fiadd" },
{ 0x00, 0xD8, 0xDC, 2, "fcom" },
{ 0x00, 0xD8, 0xDC, 3, "fcomp" },
{ 0x00, 0xD8, 0xDC, 6, "fdiv" },
{ 0xDE, 0xDA, 0x00, 6, "fidiv" },
{ 0x00, 0xD8, 0xDC, 7, "fdivr" },
{ 0xDE, 0xDA, 0x00, 7, "fidivr" },
{ 0xDE, 0xDA, 0x00, 2, "ficom" },
{ 0xDE, 0xDA, 0x00, 3, "ficomp" },
{ 0xDF, 0xDB, 0xDF, 0, "fild", 5 },
{ 0xDF, 0xDB, 0x00, 2, "fist" },
{ 0xDF, 0xDB, 0xDF, 3, "fistp", 7 },
{ 0xDF, 0xDB, 0xDD, 1, "fisttp" },
{ 0x00, 0xD9, 0xDD, 0, "fld" },
{ 0x00, 0xD8, 0xDC, 1, "fmul" },
{ 0xDE, 0xDA, 0x00, 1, "fimul" },
{ 0x00, 0xD9, 0xDD, 2, "fst" },
{ 0x00, 0xD9, 0xDD, 3, "fstp" },
{ 0x00, 0xD8, 0xDC, 4, "fsub" },
{ 0xDE, 0xDA, 0x00, 4, "fisub" },
{ 0x00, 0xD8, 0xDC, 5, "fsubr" },
{ 0xDE, 0xDA, 0x00, 5, "fisubr" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Address& addr) { opFpuMem(addr, 0x%02X, 0x%02X, 0x%02X, %d, %d); }\n", p->name, p->m16, p->m32, p->m64, p->ext, p->m64ext);
}
}
{
const struct Tbl {
uint32 code1;
uint32 code2;
const char *name;
} tbl[] = {
{ 0xD8C0, 0xDCC0, "fadd" },
{ 0x0000, 0xDEC0, "faddp" },
{ 0xDAC0, 0x00C0, "fcmovb" },
{ 0xDAC8, 0x00C8, "fcmove" },
{ 0xDAD0, 0x00D0, "fcmovbe" },
{ 0xDAD8, 0x00D8, "fcmovu" },
{ 0xDBC0, 0x00C0, "fcmovnb" },
{ 0xDBC8, 0x00C8, "fcmovne" },
{ 0xDBD0, 0x00D0, "fcmovnbe" },
{ 0xDBD8, 0x00D8, "fcmovnu" },
{ 0xDBF0, 0x00F0, "fcomi" },
{ 0xDFF0, 0x00F0, "fcomip" },
{ 0xDBE8, 0x00E8, "fucomi" },
{ 0xDFE8, 0x00E8, "fucomip" },
{ 0xD8F0, 0xDCF8, "fdiv" },
{ 0x0000, 0xDEF8, "fdivp" },
{ 0xD8F8, 0xDCF0, "fdivr" },
{ 0x0000, 0xDEF0, "fdivrp" },
{ 0xD8C8, 0xDCC8, "fmul" },
{ 0x0000, 0xDEC8, "fmulp" },
{ 0xD8E0, 0xDCE8, "fsub" },
{ 0x0000, 0xDEE8, "fsubp" },
{ 0xD8E8, 0xDCE0, "fsubr" },
{ 0x0000, 0xDEE0, "fsubrp" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
}
}
{
const struct Tbl {
uint8 code1;
uint8 code2;
const char *name;
} tbl[] = {
{ 0xD8, 0xD0, "fcom" },
{ 0xD8, 0xD8, "fcomp" },
{ 0xDD, 0xC0, "ffree" },
{ 0xD9, 0xC0, "fld" },
{ 0xDD, 0xD0, "fst" },
{ 0xDD, 0xD8, "fstp" },
{ 0xDD, 0xE0, "fucom" },
{ 0xDD, 0xE8, "fucomp" },
{ 0xD9, 0xC8, "fxch" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Fpu& reg) { opFpu(reg, 0x%02X, 0x%02X); }\n", p->name, p->code1, p->code2);
}
}
}
int main()
{
put();
return 0;
}

7
gen/update.bat Normal file
View File

@ -0,0 +1,7 @@
rem set STL_DIR=c:/s/STLport
rem set OPT=-GX -I%STL_DIR%/stlport /link /libpath:%STL_DIR%/lib
rem set OPT=-GX -I../xbyak
rem don't add /Ox
set OPT=/EHsc -I../xbyak
cl gen_code.cpp %OPT%
gen_code > ..\\xbyak\\xbyak_mnemonic.h

240
readme.txt Normal file
View File

@ -0,0 +1,240 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.23
-----------------------------------------------------------------------------
◎概要
これはx86, x64(AMD64, x86-64)のマシン語命令を生成するC++のクラスライブラリです.
プログラム実行時に動的にアセンブルすることが可能です.
-----------------------------------------------------------------------------
◎特徴
・ヘッダファイルオンリー
xbyak.hをインクルードするだけですぐ利用することができます
C++の枠組み内で閉じているため,外部アセンブラは不要です.
32bit/64bit両対応です
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)
・Windows Xp(32bit, 64bit), Vista/Linux(32bit, 64bit)/Intel Mac対応
Windows Xp上ではVC2005 Express Ed., VC2008
Windows Vista
Linux (kernel 2.4.32)上ではgcc 4.3.0, CentOS 5.1上ではgcc 4.1.2
Intel Mac
などで動作確認をしています.
※ gccではand, or, xorなどを演算子として解釈してしまうため
-fno-operator-namesオプションを追加してコンパイルしてください
-----------------------------------------------------------------------------
◎準備
xbyak.h
xbyak_bin2hex.h
xbyak_mnemonic.h
これらを同一のパスに入れてインクルードパスに追加してください.
Linuxではmake installで/usr/local/include/xbyakにコピーされます
-----------------------------------------------------------------------------
◎文法
Xbyak::CodeGenerator クラスを継承しそのクラスメソッド内でx86, x64アセンブラを
記述しますそのメソッドを呼び出した後getCode()メソッドを呼び出し,その戻
り値を自分が使いたい関数ポインタに変換して利用します.アセンブルエラーは例外
により通知されます(cf. main.cpp)
・基本的にnasmの命令で括弧をつければよいです
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
・アドレッシング
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
という形で指定しますサイズを指定する必要がない限りptrを使えばよいです
セレクタはサポートしていません.
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
test byte [esp], 4 --> test (byte [esp], 4);
(注意) dword, word, byteはクラス変数です従ってたとえばunsigned intの
つもりでdwordをtypedefしないでください
・ラベル
L(文字列);
で定義します.ジャンプするときはその文字列を指定します.後方参照も可能ですが,
相対アドレスが8ビットに収まらない場合はT_NEARをつけないと実行時に例外が発生
します.
・hasUndefinedLabel()を呼び出して真ならジャンプ先が存在しないことを示します.
コードを見直してください.
L("L1");
jmp ("L1");
jmp ("L2");
...
少しの命令の場合.
...
L("L2");
jmp ("L3", T_NEAR);
...
沢山の命令がある場合
...
L("L3");
<応用編>
1. MASMライクな@@, @f, @bをサポート
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
2. ラベルの局所化
ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます.
void func1()
{
inLocalLabel();
L(".lp"); // <A>
...
jmp(".lp"); // jmpt to <A>
outLocalLabel();
}
void func2()
{
L(".lp"); // <B>
func1();
jmp(".lp"); // jmp to <B>
}
上記サンプルではinLocalLabel(), outLocalLabel()が無いと,
".lp"ラベルの二重定義エラーになります.
・Xbyak::CodeGenerator()コンストラクタインタフェース
@param maxSize [in] コード生成最大サイズ(デフォルト2048byte)
@param userPtr [in] ユーザ指定メモリ
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0);
デフォルトコードサイズは2048(=DEFAULT_MAX_CODE_SIZE)バイトです.
それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください.
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
# 動的にしたほうがよいのだが実行属性の管理が面倒でやってません….
またユーザ指定メモリをコード生成最大サイズと共に指定するとCodeGeneratorは
指定されたメモリ上にバイト列を生成します.
サポート関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と
与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress()
も用意しました詳細はsample/test0.cppのuse memory allocated by userを参考に
してください.
/**
change exec permission of memory
@param addr [in] buffer address
@param size [in] buffer size
@param canExec [in] true(enable to exec), false(disable to exec)
@return true(success), false(failure)
*/
bool CodeArray::protect(const void *addr, size_t size, bool canExec);
/**
get aligned memory pointer
*/
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
その他詳細は各種サンプルを参照してください.
-----------------------------------------------------------------------------
◎マクロ
32bit環境上でコンパイルするとXBYAK32が64bit環境上でコンパイルするとXBYAK64が
定義されますさらに64bit環境上ではWindowsならXBYAK64_WINgcc上ではXBYAK64_GCC
も定義されます.
-----------------------------------------------------------------------------
◎使用例
test0.cpp ; 簡単な例(x86, x64)
quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86)
calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64)
boost(http://www.boost.org/)が必要
bf.cpp ; JIT Brainfuck(x86, x64)
-----------------------------------------------------------------------------
◎注意
MMX/SSE命令はほぼ全て実装されていますが3D Now!命令や,一部の特殊な
命令は現時点では実装されていませんFPUの80bit浮動小数はサポートしていません
何かご要望があればご連絡ください.
-----------------------------------------------------------------------------
◎ライセンス
修正された新しいBSDライセンスに従います
http://www.opensource.jp/licenses/bsd-license.html
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
いただきました.
-----------------------------------------------------------------------------
◎履歴
2010/04/15 ver 2.23 fix align() and xbyak_util.h for Mac
2010/02/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
2009/12/09 ver 2.21 support cygwin(gcc 4.3.2)
2009/11/28 ver 2.20 FPUの一部命令サポート
2009/06/25 ver 2.11 64bitモードでの mov(qword[rax], imm); 修正(thanks to Martinさん)
2009/03/10 ver 2.10 jmp/call reg64の冗長なREG.W削除
2009/02/24 ver 2.09 movq reg64, mmx/xmm; movq mmx/xmm, reg64追加
2009/02/13 ver 2.08 movd(xmm7, dword[eax])が0x66を落とすバグ修正(thanks to Gabestさん)
2008/12/30 ver 2.07 call()の相対アドレスが8bit以下のときのバグ修正(thanks to katoさん)
2008/09/18 ver 2.06 @@, @f, @bとラベルの局所化機能追加(thanks to nobu-qさん)
2008/09/18 ver 2.05 ptr [rip + 32bit offset]サポート(thanks to 団子厨(Dango-Chu)さん)
2008/06/03 ver 2.04 align()のポカミス修正mov(ptr[eax],1);などをエラーに
2008/06/02 ver 2.03 ユーザ定義メモリインタフェースサポート
2008/05/26 ver 2.02 protect()(on Linux)で不正な設定になることがあるのを修正(thanks to sinichiro_hさん)
2008/04/30 ver 2.01 cmpxchg16b, cdqe追加
2008/04/29 ver 2.00 x86/x64-64版公開
2008/04/25 ver 1.90 x86版β公開
2008/04/18 ver 1.12 コード整理
2008/04/14 ver 1.11 コード整理
2008/03/12 ver 1.10 bsf/bsr追加(忘れていた)
2008/02/14 ver 1.09 sub eax, 1234が16bitモードで出力されていたのを修正(thanks to Robertさん)
2007/11/05 ver 1.08 lock, xadd, xchg追加
2007/11/02 ver 1.07 SSSE3/SSE4対応(thanks to 団子厨(Dango-Chu)さん)
2007/09/25 ver 1.06 call((int)関数ポインタ); jmp((int)関数ポインタ);のサポート
2007/08/04 ver 1.05 細かい修正
2007/02/04 後方へのジャンプでT_NEARをつけないときに8bit相対アドレスに入らない
場合に例外が発生しないバグの修正
2007/01/21 [disp]の形のアドレス生成のバグ修正
mov (eax|ax|al, [disp]); mov([disp], eax|ax|al);の短い表現選択
2007/01/17 webページ作成
2007/01/04 公開開始
-----------------------------------------------------------------------------
◎著作権者
光成滋生(MITSUNARI Shigeo, herumi at nifty dot com)
---
$Revision: 1.54 $
$Date: 2010/04/15 06:44:46 $

184
readme_e.txt Normal file
View File

@ -0,0 +1,184 @@
Xbyak 2.23 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
-----------------------------------------------------------------------------
<Abstract>
This is a header file which enables dynamically to assemble x86(IA32),
x64(AMD64, x86-64) mnemonic.
-----------------------------------------------------------------------------
<Feature>
header file only
you can use Xbyak's functions at once if xbyak.h is included.
MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(partial) are available.
Windows Xp(32bit, 64bit), Vista, Linux(32bit, 64bit), Intel Mac ready
support Visual Studio C++ 2005 Express Ed., VC2008 Pro,
mingw 3.4.2, icc 7.2, gcc 4.1.1, and so on.
Note: "-fno-operator-names" option is required on gcc to avoid analyzing
"and", "or", etc. as operators.
-----------------------------------------------------------------------------
<Install>
The following files are necessary. Please add the path to your compile
directories.
xbyak.h
xbyak_bin2hex.h
xbyak_mnemonic.h
Linux:
>make install
These files are copied into /usr/local/include/xbyak .
-----------------------------------------------------------------------------
<Syntax>
Make Xbyak::CodeGenerator and make the class method and get the function
pointer by calling cgetCode() and casting the return value.
NASM Xbyak
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
Addressing
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
NASM Xbyak
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
test byte [esp], 4 --> test (byte [esp], 4);
NB. dword, word and byte are class members, then don't use dword as
unsigned int, for example.
Label
L("L1");
jmp ("L1");
jmp ("L2");
...
a few mnemonics(8-bit displacement jmp)
...
L("L2");
jmp ("L3", T_NEAR);
...
a lot of mnemonics(32-bit displacement jmp)
...
L("L3");
Call hasUndefinedLabel() to verify your code has no undefined label.
1. support @@, @f, @b like MASM
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
2. localization of label by calling inLocalLabel(), outLocallabel()
void func1()
{
inLocalLabel();
L(".lp"); // <A>
...
jmp(".lp"); // jmpt to <A>
outLocalLabel();
}
void func2()
{
L(".lp"); // <B>
func1();
jmp(".lp"); // jmp to <B>
}
Code size
The default max code size is 2048 bytes. Please set it in constructor of
CodeGenerator() if you want to use large size.
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
See main.cpp
-----------------------------------------------------------------------------
<Macro>
XBYAK32 is defined on 32bit.
XBYAK64 is defined on 64bit.
XBYAK64_WIN is defined on 64bit Windows
XBYAK64_GCC is defined on 64bit gcc
-----------------------------------------------------------------------------
<Sample>
test0.cpp ; tiny sample of Xbyak(x86, x64)
quantize.cpp ; JIT optimized quantization by fast division(x86 only)
calc.cpp ; assemble and estimate a given polynomial(x86, x64)
bf.cpp ; JIT brainfuck(x86, x64)
-----------------------------------------------------------------------------
<Remark>
The current version does not support 3D Now!, 80bit FPU load/store
and some special mnemonics.
Please mail to me if necessary.
-----------------------------------------------------------------------------
<License>
modified new BSD License
http://www.opensource.org/licenses/bsd-license.php
-----------------------------------------------------------------------------
<History>
2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
2009/Nov/28 support a part of FPU
2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
2008/Dec/30 fix call in short relative address(thanks to kato san)
2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
2008/Sep/18 support ptr [rip + 32bit offset] (thanks to Dango-Chu san)
2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
2008/Jun/02 support memory interface allocated by user
2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
2008/Apr/30 add cmpxchg16b, cdqe
2008/Apr/29 support x86
2008/Apr/14 code refactoring
2008/Mar/12 add bsr/bsf
2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
2007/Nov/5 support lock, xadd, xchg
2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
2007/Feb/4 fix the bug that exception doesn't occur under the condition
which the offset of jmp mnemonic without T_NEAR is over 127.
2007/Jan/21 fix the bug to create address like [disp]
select smaller representation for mov (eax|ax|al, [disp])
2007/Jan/4 first version
-----------------------------------------------------------------------------
<Author>
MITSUNARI Shigeo(herumi at nifty dot com)
---
$Revision: 1.42 $
$Date: 2010/04/15 06:44:46 $

76
sample/Makefile Normal file
View File

@ -0,0 +1,76 @@
TARGET = test quantize bf toyvm test_util memfunc
XBYAK_INC=../xbyak/xbyak.h
BOOST_EXIT=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
BIT=32
ifeq ($(shell uname -m),x86_64)
BIT=64
endif
ifeq ($(shell uname -s),Darwin)
BIT=64
endif
ifeq ($(BIT),64)
TARGET += test64 bf64 memfunc64 test_util64
ifeq ($(BOOST_EXIT),1)
TARGET += calc64
endif
endif
ifeq ($(BOOST_EXIT),1)
TARGET += calc
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ $(CFLAGS_WARN)
test:
g++ $(CFLAGS) test0.cpp -o $@ -m32
quantize:
g++ $(CFLAGS) quantize.cpp -o $@ -m32
calc:
g++ $(CFLAGS) calc.cpp -o $@ -m32
calc64:
g++ $(CFLAGS) calc.cpp -o $@ -m64
bf:
g++ $(CFLAGS) bf.cpp -o $@ -m32
bf64:
g++ $(CFLAGS) bf.cpp -o $@ -m64
memfunc:
g++ $(CFLAGS) memfunc.cpp -o $@ -m32
memfunc64:
g++ $(CFLAGS) memfunc.cpp -o $@ -m64
toyvm:
g++ $(CFLAGS) toyvm.cpp -o $@ -m32
test64:
g++ $(CFLAGS) test0.cpp -o $@ -m64
test_util:
g++ $(CFLAGS) test_util.cpp -o $@ -m32
test_util64:
g++ $(CFLAGS) test_util.cpp -o $@ -m64
clean:
rm -rf *.o $(TARGET)
test : test0.cpp $(XBYAK_INC)
test64: test0.cpp $(XBYAK_INC)
quantize : quantize.cpp $(XBYAK_INC)
calc : calc.cpp $(XBYAK_INC)
calc64 : calc.cpp $(XBYAK_INC)
bf : bf.cpp $(XBYAK_INC)
bf64 : bf.cpp $(XBYAK_INC)
memfunc : memfunc.cpp $(XBYAK_INC)
memfunc64 : memfunc.cpp $(XBYAK_INC)
toyvm : toyvm.cpp $(XBYAK_INC)
test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h

205
sample/bf.cpp Normal file
View File

@ -0,0 +1,205 @@
#include "xbyak/xbyak.h"
#include <stdio.h>
#include <stdlib.h>
#include <stack>
#include <fstream>
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#define snprintf _snprintf
#endif
class Brainfuck : public Xbyak::CodeGenerator {
private:
enum Direction { B, F };
const char *toStr(int labelNo, Direction dir)
{
static char num[64];
snprintf(num, sizeof(num), "%c%d", dir == B ? 'B' : 'F', labelNo);
return num;
}
public:
int getContinuousChar(std::istream& is, char c)
{
int count = 1;
char p;
while (is >> p) {
if (p != c) break;
count++;
}
is.unget();
return count;
}
Brainfuck(std::istream& is) : CodeGenerator(10000)
{
// void (*)(void* putchar, void* getchar, int *stack)
using namespace Xbyak;
#ifdef XBYAK32
#if defined(_MSC_VER) && (_MSC_VER <= 1200) // for VC6
const Reg32 pPutchar(esi);
const Reg32 pGetchar(edi);
const Reg32 stack(ebp);
#else
const Reg32& pPutchar(esi);
const Reg32& pGetchar(edi);
const Reg32& stack(ebp);
#endif
const Address cur = dword [stack];
push(ebp); // stack
push(esi);
push(edi);
const int P_ = 4 * 3;
mov(pPutchar, ptr[esp + P_ + 4]); // putchar
mov(pGetchar, ptr[esp + P_ + 8]); // getchar
mov(stack, ptr[esp + P_ + 12]); // stack
#elif defined(XBYAK64_WIN)
const Reg64& pPutchar(rsi);
const Reg64& pGetchar(rdi);
const Reg64& stack(rbp); // stack
const Address cur = dword [stack];
push(rsi);
push(rdi);
push(rbp);
mov(pPutchar, rcx); // putchar
mov(pGetchar, rdx); // getchar
mov(stack, r8); // stack
#else
const Reg64& pPutchar(rbx);
const Reg64& pGetchar(rbp);
const Reg64& stack(r12); // stack
const Address cur = dword [stack];
push(rbx);
push(rbp);
push(r12);
mov(pPutchar, rdi); // putchar
mov(pGetchar, rsi); // getchar
mov(stack, rdx); // stack
#endif
int labelNo = 0;
std::stack<int> keepLabelNo;
char c;
while (is >> c) {
switch (c) {
case '+':
case '-':
{
int count = getContinuousChar(is, c);
if (count == 1) {
c == '+' ? inc(cur) : dec(cur);
} else {
add(cur, (c == '+' ? count : -count));
}
}
break;
case '>':
case '<':
{
int count = getContinuousChar(is, c);
add(stack, 4 * (c == '>' ? count : -count));
}
break;
case '.':
#ifdef XBYAK32
push(cur);
call(pPutchar);
pop(eax);
#elif defined(XBYAK64_WIN)
mov(rcx, cur);
sub(rsp, 32);
call(pPutchar);
add(rsp, 32);
#else
mov(rdi, cur);
call(pPutchar);
#endif
break;
case ',':
#if defined(XBYAK32) || defined(XBYAK64_GCC)
call(pGetchar);
mov(cur, eax);
#elif defined(XBYAK64_WIN)
sub(rsp, 32);
call(pGetchar);
add(rsp, 32);
mov(cur, rax);
#endif
break;
case '[':
L(toStr(labelNo, B));
mov(eax, cur);
test(eax, eax);
jz(toStr(labelNo, F), T_NEAR);
keepLabelNo.push(labelNo++);
break;
case ']':
{
int no = keepLabelNo.top(); keepLabelNo.pop();
jmp(toStr(no, B));
L(toStr(no, F));
}
break;
default:
break;
}
}
#ifdef XBYAK32
pop(edi);
pop(esi);
pop(ebp);
#elif defined(XBYAK64_WIN)
pop(rbp);
pop(rdi);
pop(rsi);
#else
pop(r12);
pop(rbp);
pop(rbx);
#endif
ret();
}
};
void dump(const Xbyak::uint8 *code, size_t size)
{
puts("#include <stdio.h>\nstatic int stack[32768];\nstatic const unsigned char code[] = {");
for (size_t i = 0; i < size; i++) {
printf("0x%02x,", code[i]); if ((i % 16) == 15) putchar('\n');
}
puts("\n};");
#ifdef __linux__
puts("#include <unistd.h>");
puts("#include <sys/mman.h>");
#endif
puts("main()\n{");
#ifdef __linux__
puts("\tlong pageSize = sysconf(_SC_PAGESIZE) - 1;");
puts("\tmprotect((void*)code, (sizeof(code) + pageSize) & ~pageSize, PROT_READ | PROT_EXEC);");
#endif
puts(
"\t((void (*)(void*, void*, int *))code)((void*)putchar, (void*)getchar, stack);\n"
"}"
);
}
int main(int argc, char *argv[])
{
#ifdef XBYAK32
puts("32bit mode");
#else
puts("64bit mode");
#endif
if (argc == 1) {
fprintf(stderr, "bf filename.bf [0|1]\n");
return 1;
}
std::ifstream ifs(argv[1]);
int mode = argc == 3 ? atoi(argv[2]) : 0;
Brainfuck bf(ifs);
if (mode == 0) {
static int stack[32768];
((void (*)(void*, void*, int *))bf.getCode())((void*)putchar, (void*)getchar, stack);
} else {
dump(bf.getCode(), bf.getSize());
}
return 0;
}

427
sample/bf.vcproj Normal file
View File

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="bf"
ProjectGUID="{654BD79B-59D3-4B10-BBAA-158BAB272828}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/bf.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/bf.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/bf.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/bf.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/bf.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/bf.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/bf.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/bf.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="bf.cpp"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

226
sample/calc.cpp Normal file
View File

@ -0,0 +1,226 @@
/*
@author herumi
@date $Date: 2010/04/15 06:52:07 $
tiny calculator 2
This program generates a function to calc the value of
polynomial given by user in run-time.
use boost::sprit
*/
#include <stdio.h>
#include <sstream>
#include <map>
#include "xbyak/xbyak.h"
#ifdef _WIN32
#pragma warning(disable : 4127) // for boost(constant condition)
#pragma warning(disable : 4512) // for boost
#endif
//#include <boost/spirit/iterator/file_iterator.hpp>
//#include <boost/spirit/core.hpp>
#include <boost/spirit/include/classic_file_iterator.hpp>
#include <boost/spirit/include/classic_core.hpp>
#include <boost/bind.hpp>
enum Error {
UNDEFINED_VARIABLE = 1
};
/*
JIT assemble of given polynomial for VC or gcc
*/
class FuncGen : public Xbyak::CodeGenerator {
public:
typedef std::map<std::string, int> Map;
private:
enum {
MAX_CONST_NUM = 32
};
double constTbl_[MAX_CONST_NUM];
size_t constTblPos_;
int regIdx_;
Map varMap_; // map var name to index
const Xbyak::Reg32e& valTbl_;
const Xbyak::Reg32e& tbl_;
public:
/*
@param y [out] the value of f(var)
@param var [in] table of input variables
func(double *y, const double var[]);
@note func does not return double to avoid difference of compiler
*/
FuncGen(const std::vector<std::string>& varTbl)
: constTblPos_(0)
, regIdx_(-1)
#ifdef XBYAK32
, valTbl_(eax)
, tbl_(edx)
#elif defined(XBYAK64_WIN)
, valTbl_(rcx)
, tbl_(rdx)
#else
, valTbl_(rdi)
, tbl_(rsi)
#endif
{
#ifdef XBYAK32
mov(valTbl_, ptr[esp+8]); // eax == varTbl
mov(tbl_, (size_t)constTbl_);
#else
#ifdef XBYAK64_WIN
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
movaps(ptr [rsp + 8 + 16], xm7);
#endif
mov(tbl_, (size_t)constTbl_);
#endif
for (int i = 0, n = static_cast<int>(varTbl.size()); i < n; i++) {
varMap_[varTbl[i]] = i;
}
}
// use edx
void genPush(double n)
{
if (constTblPos_ >= MAX_CONST_NUM) throw;
constTbl_[constTblPos_] = n;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + constTblPos_ * sizeof(double)]);
constTblPos_++;
}
// use eax
void genVal(const char *begin, const char *end)
{
std::string var(begin, end);
if (varMap_.find(var) == varMap_.end()) throw UNDEFINED_VARIABLE;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[valTbl_ + varMap_[var] * sizeof(double)]);
}
void genAdd(const char*, const char*)
{
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genSub(const char*, const char*)
{
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genMul(const char*, const char*)
{
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genDiv(const char*, const char*)
{
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void complete()
{
#ifdef XBYAK32
mov(eax, ptr [esp + 4]); // eax = valTbl
movsd(ptr [eax], xm0);
#else
#ifdef XBYAK64_WIN
movaps(xm6, ptr [rsp + 8]);
movaps(xm7, ptr [rsp + 8 + 16]);
#endif
#endif
ret();
}
};
struct Grammar : public boost::spirit::classic::grammar<Grammar> {
FuncGen& f_;
Grammar(FuncGen& f) : f_(f) { }
template<typename ScannerT>
struct definition {
boost::spirit::classic::rule<ScannerT> poly0, poly1, poly2, var;
definition(const Grammar& self)
{
using namespace boost;
using namespace boost::spirit::classic;
poly0 = poly1 >> *(('+' >> poly1)[bind(&FuncGen::genAdd, ref(self.f_), _1, _2)]
| ('-' >> poly1)[bind(&FuncGen::genSub, ref(self.f_), _1, _2)]);
poly1 = poly2 >> *(('*' >> poly2)[bind(&FuncGen::genMul, ref(self.f_), _1, _2)]
| ('/' >> poly2)[bind(&FuncGen::genDiv, ref(self.f_), _1, _2)]);
var = (+alpha_p)[bind(&FuncGen::genVal, ref(self.f_), _1, _2)];
poly2 = real_p[bind(&FuncGen::genPush, ref(self.f_), _1)]
| var
| '(' >> poly0 >> ')';
}
const boost::spirit::classic::rule<ScannerT>& start() const { return poly0; }
};
};
void put(const std::vector<double>& x)
{
printf("%f", x[0]);
for (size_t i = 1, n = x.size(); i < n; i++) {
printf(", %f", x[i]);
}
}
int main(int argc, char *argv[])
{
if (argc <= 2) {
fprintf(stderr, "calc \"var1 var2 ...\" \"function of var\"\n");
fprintf(stderr, "eg. calc x \"x*x\"\n");
fprintf(stderr, "eg. calc \"x y z\" \"x*x + y - z\"\n");
return 1;
}
const char *poly = argv[2];
try {
std::vector<std::string> varTbl;
// get varTbl from argv[1]
{
std::istringstream is(argv[1]);
int i = 0;
printf("varTbl = { ");
while (is) {
std::string var;
is >> var;
if (var.empty()) break;
printf("%s:%d, ", var.c_str(), i);
varTbl.push_back(var);
i++;
}
printf("}\n");
}
FuncGen funcGen(varTbl);
Grammar calc(funcGen);
boost::spirit::classic::parse_info<> r = parse(poly, calc, boost::spirit::classic::space_p);
if (!r.full) {
printf("err poly=%s\n", poly);
return 1;
}
funcGen.complete();
std::vector<double> valTbl;
valTbl.resize(varTbl.size());
#ifdef XBYAK32
puts("32bit mode");
void (*func)(double *ret, const double *valTbl) = (void (*)(double *, const double*))funcGen.getCode();
#else
puts("64bit mode");
double (*func)(const double *valTbl) = (double (*)(const double*))funcGen.getCode();
#endif
for (int i = 0; i < 10; i++) {
for (size_t j = 0, n = valTbl.size(); j < n; j++) {
valTbl[j] = rand() % 7;
}
double y;
#ifdef XBYAK32
func(&y, &valTbl[0]);
#else
y = func(&valTbl[0]);
#endif
printf("f("); put(valTbl); printf(")=%f\n", y);
}
} catch (Xbyak::Error err) {
printf("ERR:%s(%d)\n", Xbyak::ConvertErrorToString(err), err);
} catch (Error err) {
printf("ERR:%d\n", err);
} catch (...) {
printf("unknown error\n");
}
return 0;
}

423
sample/calc.vcproj Normal file
View File

@ -0,0 +1,423 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="calc"
ProjectGUID="{5FDDFAA6-B947-491D-A17E-BBD863846579}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/calc.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/calc.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/calc.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/calc.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/calc.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/calc.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/calc.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/calc.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="calc.cpp"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

5
sample/echo.bf Normal file
View File

@ -0,0 +1,5 @@
>>++++++++[->++++++++<]>>>>+++++++++[->++++++++++<]>[<<,[->+<<+<<+>>>]<<<[
->>>+<<<]>>>>>[->+>>+<<<]>[<<[->+>>+<<<]>>>[-<<<+>>>]<<[[-]<->]>-]>>[-<<<+
>>>]<<<<<<<[-<+<<+>>>]<[>>[-<+<<+>>>]<<<[->>>+<<<]>>[[-]>-<]<-]<<[->>>+<<<
]>>>>><[[-]>++++++++++++++++++++++++++++++++>[[-]<------------------------
-------->]<<]>>[-]<.>>]

19
sample/fizzbuzz.bf Normal file
View File

@ -0,0 +1,19 @@
++++++[->++++>>+>+>-<<<<<]>
[<++++>>+++>++++>>+++>+++++>+++++>>>>>>++>>++<<<<<<<<<<<<<<-]
<++++>+++>-->+++>->>--->++>>>+++++[->++>++<<]<<<<<<<<<<
[->
-[>>>>>>>]>[<+++>.>.>>>>..>>>+<]<<<<<
-[>>>>]>[<+++++>.>.>..>>>+<]>>>>
+<-[<<<]<[
[-<<+>>]>>>+>+<<<<<<[->>+>+>-<<<<]<
]>>
[[-]<]>[
>>>[>.<<.<<<]<[.<<<<]>
]
>.<<<<<<<<<<<
]

3
sample/hello.bf Normal file
View File

@ -0,0 +1,3 @@
>+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++
++>-]<.>+++++++++++[<+++++>-]<.>++++++++[<+++>-]<.+++.------.--------.[-]>
++++++++[<++++>-]<+.[-]++++++++++.

110
sample/memfunc.cpp Normal file
View File

@ -0,0 +1,110 @@
#include <stdio.h>
#include <stdlib.h>
#include <xbyak/xbyak.h>
struct A {
int x_;
int y_;
A() : x_(3), y_(5) {}
int func(int a, int b, int c, int d, int e) const { return x_ + y_ + a + b + c + d + e; }
};
struct Code : public Xbyak::CodeGenerator {
Code()
{
using namespace Xbyak;
int RET_ADJ = 0;
#ifdef XBYAK32
#ifdef _WIN32
const int PARA_ADJ = 0;
RET_ADJ = 5 * 4;
#else
const int PARA_ADJ = 4;
mov(ecx, ptr [esp + 4]);
#endif
#endif
const struct {
const Reg32e& self;
const Operand& a;
const Operand& b;
const Operand& c;
const Operand& d;
const Operand& e;
} para = {
#if defined(XBYAK64_WIN)
rcx,
edx,
r8d,
r9d,
ptr [rsp + 8 * 5],
ptr [rsp + 8 * 6],
#elif defined(XBYAK64_GCC)
rdi,
esi,
edx,
ecx,
r8d,
r9d,
#else
ecx,
ptr [esp + 4 + PARA_ADJ],
ptr [esp + 8 + PARA_ADJ],
ptr [esp + 12 + PARA_ADJ],
ptr [esp + 16 + PARA_ADJ],
ptr [esp + 20 + PARA_ADJ],
#endif
};
mov(eax, ptr [para.self]);
add(eax, ptr [para.self + 4]);
add(eax, para.a);
add(eax, para.b);
add(eax, para.c);
add(eax, para.d);
add(eax, para.e);
ret(RET_ADJ);
}
};
int main()
{
#ifdef XBYAK64
printf("64bit");
#else
printf("32bit");
#endif
#ifdef _WIN32
puts(" win");
#else
puts(" linux");
#endif
try {
Code code;
int (A::*p)(int, int, int, int, int) const = 0;
#if defined(XBYAK32) && !defined(_WIN32)
// avoid breaking strict-aliasing rules for 32bit gcc
union {
int (A::*p)(int, int, int, int, int) const;
const Xbyak::uint8 *code;
} u;
u.code = code.getCode();
p = u.p;
#else
*(void**)&p = (void*)code.getCode();
#endif
for (int i = 0; i < 10; i++) {
A a;
int t1, t2, t3, t4, t5, x, y;
a.x_ = rand(); a.y_ = rand();
t1 = rand(); t2 = rand(); t3 = rand();
t4 = rand(); t5 = rand();
x = a.func(t1, t2, t3, t4, t5);
y = (a.*p)(t1, t2, t3, t4, t5);
printf("%c %d, %d\n", x == y ? 'o' : 'x', x, y);
}
} catch (Xbyak::Error& e) {
printf("err=%s\n", Xbyak::ConvertErrorToString(e));
return 1;
}
}

224
sample/quantize.cpp Normal file
View File

@ -0,0 +1,224 @@
/*
@author herumi
@date $Date: 2009/12/09 05:40:52 $
JPEG quantize sample
This program generates a quantization routine by using fast division algorithm in run-time.
time(sec)
quality 1(low) 10 50 100(high)
VC2005 8.0 8.0 8.0 8.0
Xbyak 1.6 0.8 0.5 0.5
; generated code at q = 100
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
shr eax,4
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov edx,0BA2E8BA3h
mul eax,edx
shr edx,3
...
; generated code at q = 100
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov dword ptr [edi+4],eax
mov eax,dword ptr [esi+8]
mov dword ptr [edi+8],eax
mov eax,dword ptr [esi+0Ch]
...
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#endif
typedef Xbyak::uint64 uint64;
typedef Xbyak::uint32 uint32;
const int N = 64;
class Quantize : public Xbyak::CodeGenerator {
static int ilog2(int x)
{
int shift = 0;
while ((1 << shift) <= x) shift++;
return shift - 1;
}
public:
/*
input : esi
output : eax = [esi+offset] / dividend
destroy : edx
*/
void udiv(uint32 dividend, int offset)
{
mov(eax, ptr[esi + offset]);
/* dividend = odd x 2^exponent */
int exponent = 0, odd = dividend;
while ((odd & 1) == 0) {
odd >>= 1; exponent++;
}
if (odd == 1) { // trivial case
if (exponent) {
shr(eax, exponent);
}
return;
}
uint64 mLow, mHigh;
int len = ilog2(odd) + 1;
{
uint64 roundUp = uint64(1) << (32 + len);
uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
mLow = roundUp / odd;
mHigh = (roundUp + k) / odd;
}
while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
mLow >>= 1; mHigh >>= 1; len--;
}
uint64 m; int a;
if ((mHigh >> 32) == 0) {
m = mHigh; a = 0;
} else {
len = ilog2(odd);
uint64 roundDown = uint64(1) << (32 + len);
mLow = roundDown / odd;
int r = (int)(roundDown % odd);
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
a = 1;
}
while ((m & 1) == 0) {
m >>= 1; len--;
}
len += exponent;
mov(edx, int(m));
mul(edx);
if (a) {
add(eax, int(m));
adc(edx, 0);
}
if (len) {
shr(edx, len);
}
mov(eax, edx);
}
/*
quantize(uint32 dest[64], const uint32 src[64]);
*/
Quantize(const uint32 qTbl[64])
{
push(esi);
push(edi);
const int P_ = 4 * 2;
mov(edi, ptr [esp+P_+4]); // dest
mov(esi, ptr [esp+P_+8]); // src
for (int i = 0; i < N; i++) {
udiv(qTbl[i], i * 4);
mov(ptr[edi+i*4], eax);
}
pop(edi);
pop(esi);
ret();
}
};
void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
{
for (int i = 0; i < N; i++) {
dest[i] = src[i] / qTbl[i];
}
}
int main(int argc, char *argv[])
{
#ifdef XBYAK64
puts("not implemented for 64bit");
return 1;
#endif
int q;
if (argc > 1) {
q = atoi(argv[1]);
} else {
printf("input quantize=");
scanf("%d", &q);
}
printf("q=%d\n", q);
uint32 qTbl[] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
for (int i = 0; i < N; i++) {
qTbl[i] /= q;
if (qTbl[i] == 0) qTbl[i] = 1;
}
try {
uint32 src[N];
uint32 src2[N];
uint32 dest[N];
uint32 dest2[N];
for (int i = 0; i < N; i++) {
src2[i] = src[i] = rand() % 2048;
}
Quantize jit(qTbl);
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
void (*quantize2)(uint32*, const uint32*, const uint32 *) = (void (*)(uint32*, const uint32*, const uint32 *))jit.getCode();
quantize(dest, src, qTbl);
quantize2(dest2, src, qTbl);
for (int i = 0; i < N; i++) {
if (dest[i] != dest2[i]) {
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
}
}
const int count = 10000000;
int begin;
begin = clock();
for (int i = 0; i < count; i++) {
quantize(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
begin = clock();
for (int i = 0; i < count; i++) {
quantize2(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
} catch (Xbyak::Error err) {
printf("ERR:%s(%d)\n", Xbyak::ConvertErrorToString(err), err);
} catch (...) {
printf("unknown error\n");
}
return 0;
}

427
sample/quantize.vcproj Normal file
View File

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="quantize"
ProjectGUID="{D06753BF-E1F3-4578-9B18-08673327F77C}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/quantize.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/quantize.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/quantize.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/quantize.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/quantize.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/quantize.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/quantize.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/quantize.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="quantize.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

150
sample/test0.cpp Normal file
View File

@ -0,0 +1,150 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include "xbyak/xbyak.h"
class Sample : public Xbyak::CodeGenerator {
void operator=(const Sample&);
public:
Sample(void *ptr = 0, size_t size = Xbyak::DEFAULT_MAX_CODE_SIZE) : Xbyak::CodeGenerator(size, ptr) {}
void gen()
{
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]); // n
#elif defined(XBYAK64_GCC)
mov(ecx, edi); // n
#endif
xor(eax, eax); // sum
test(ecx, ecx);
jz(".exit");
xor(edx, edx); // i
L("@@");
add(eax, edx);
inc(edx);
/*
sample of local label
*/
inLocalLabel(); // from here
jmp(".exit"); // jmp to not <B> but <A>
nop();
nop();
L(".exit"); // <A> this label is different from <B>
outLocalLabel(); // here
cmp(edx, ecx);
jbe("@b"); // jmp to previous @@
L(".exit"); // <B>
ret();
}
};
class AddFunc : public Xbyak::CodeGenerator {
void operator=(const AddFunc&);
public:
AddFunc(int y)
{
#ifdef XBYAK32
mov(eax, ptr [esp + 4]);
add(eax, y);
#elif defined(XBYAK64_WIN)
lea(rax, ptr [rcx + y]);
#else
lea(eax, ptr [edi + y]);
#endif
ret();
}
int (*get() const)(int) { return (int (*)(int))getCode(); }
};
class CallAtoi : public Xbyak::CodeGenerator {
void operator=(const CallAtoi&);
public:
CallAtoi()
{
// rdi is pointer to string
#ifdef XBYAK64_WIN
#ifdef _DEBUG
sub(rsp, 32); // return-address is destroied if 64bit debug mode
#endif
mov(rax, (size_t)atoi);
call(rax);
#ifdef _DEBUG
add(rsp, 32);
#endif
#else
call((void*)atoi);
#endif
ret();
}
int (*get() const)(const char *) { return (int (*)(const char *))getCode(); }
};
class JmpAtoi : public Xbyak::CodeGenerator {
void operator=(const JmpAtoi&);
public:
JmpAtoi()
{
/* already pushed "456" */
#ifdef XBYAK64_WIN
mov(rax, (size_t)atoi);
jmp(rax);
#else
jmp((void*)atoi);
#endif
}
int (*get() const)(const char *) { return (int (*)(const char *))getCode(); }
};
int main()
{
try {
Sample s;
printf("Xbyak version=%s\n", s.getVersionString());
#ifdef XBYAK64_GCC
puts("64bit mode(gcc)");
#elif defined(XBYAK64_WIN)
puts("64bit mode(win)");
#else
puts("32bit");
#endif
s.gen();
int (*func)(int) = (int (*)(int))s.getCode();
for (int i = 0; i <= 10; i++) {
printf("0 + ... + %d = %d\n", i, func(i));
}
for (int i = 0; i < 10; i++) {
AddFunc a(i);
int (*add)(int) = a.get();
int y = add(i);
printf("%d + %d = %d\n", i, i, y);
}
CallAtoi c;
printf("call atoi(\"123\") = %d\n", c.get()("123"));
JmpAtoi j;
printf("jmp atoi(\"456\") = %d\n", j.get()("456"));
{
// use memory allocated by user
using namespace Xbyak;
const size_t codeSize = 1024;
uint8 buf[codeSize + 16];
uint8 *p = CodeArray::getAlignedAddress(buf);
CodeArray::protect(p, codeSize, true);
Sample s(p, codeSize);
s.gen();
int (*func)(int) = (int (*)(int))s.getCode();
printf("0 + ... + %d = %d\n", 100, func(100));
CodeArray::protect(p, codeSize, false);
}
puts("OK");
} catch (Xbyak::Error err) {
printf("ERR:%s(%d)\n", Xbyak::ConvertErrorToString(err), err);
} catch (...) {
printf("unknown error\n");
}
return 0;
}

427
sample/test0.vcproj Normal file
View File

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="test0"
ProjectGUID="{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test0.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test0.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test0.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test0.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test0.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test0.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test0.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test0.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="test0.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

65
sample/test_util.cpp Normal file
View File

@ -0,0 +1,65 @@
#include <stdio.h>
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
void putCPUinfo()
{
using namespace Xbyak::util;
Cpu cpu;
printf("vendor %s\n", cpu.has(Cpu::tINTEL) ? "intel" : "amd");
static const struct {
Cpu::Type type;
const char *str;
} tbl[] = {
{ Cpu::tMMX, "mmx" },
{ Cpu::tMMX2, "mmx2" },
{ Cpu::tCMOV, "cmov" },
{ Cpu::tSSE, "sse" },
{ Cpu::tSSE2, "sse2" },
{ Cpu::tSSE3, "sse3" },
{ Cpu::tSSSE3, "ssse3" },
{ Cpu::tSSE41, "sse41" },
{ Cpu::tSSE42, "sse42" },
{ Cpu::tPOPCNT, "popcnt" },
{ Cpu::t3DN, "3dn" },
{ Cpu::tE3DN, "e3dn" },
{ Cpu::tSSE4a, "sse4a" },
{ Cpu::tSSE5, "sse5" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
}
printf("\n");
}
#ifdef XBYAK32
struct EipTest : public Xbyak::util::EnableSetEip<Xbyak::CodeGenerator> {
EipTest()
{
setEipTo(eax);
ret();
}
};
void putEip()
{
EipTest s;
int (*getEip)() = (int(*)())s.getCode();
printf("eip=%08x\n", getEip());
}
#endif
int main()
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
putCPUinfo();
#ifdef XBYAK32
putEip();
#endif
}

427
sample/test_util.vcproj Normal file
View File

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="test_util"
ProjectGUID="{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test_util.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test_util.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test_util.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test_util.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test_util.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test_util.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test_util.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test_util.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="test_util.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

382
sample/toyvm.cpp Normal file
View File

@ -0,0 +1,382 @@
/*
toy vm
register A, B : 32bit
PC : program counter
mem_ 4byte x 65536
4byte固定
16bit
R = A or B
vldiR, imm ; R = imm
vldR, idx ; R = mem_[idx]
vstR, idx ; mem_[idx] = R
vaddiR, imm ; R += imm
vsubiR, imm ; R -= imm
vaddR, idx ; R += mem_[idx]
vsubR, idx ; R -= mem_[idx]
vputR ; print R
vjnzR, offset; if (R != 0) then jmp(PC += offset(signed))
*/
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <vector>
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#endif
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
using namespace Xbyak;
uint64 getRdtsc()
{
#ifdef _WIN64
return __rdtsc();
#elif defined(WIN32)
__asm {
rdtsc
}
#else
uint64 x;
__asm__ volatile("rdtsc" : "=A" (x));
return x;
#endif
}
class ToyVm : public Xbyak::CodeGenerator {
typedef std::vector<uint32> Buffer;
public:
enum Reg {
A, B
};
enum Code {
LD, LDI, ST, ADD, ADDI, SUB, SUBI, PUT, JNZ,
END_OF_CODE
};
ToyVm()
: mark_(0)
{
::memset(mem_, 0, sizeof(mem_));
}
void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); }
void vld(Reg r, uint16 idx) { encode(LD, r, idx); }
void vst(Reg r, uint16 idx) { encode(ST, r, idx); }
void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); }
void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); }
void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); }
void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); }
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16>(offset)); }
void vput(Reg r) { encode(PUT, r); }
void setMark()
{
mark_ = (int)code_.size();
}
int getMarkOffset()
{
return mark_ - (int)code_.size() - 1;
}
void run()
{
bool debug = false;//true;
uint32 reg[2] = { 0, 0 };
const size_t end = code_.size();
uint32 pc = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
decode(code, r, imm, x);
if (debug) {
printf("---\n");
printf("A %08x B %08x\n", reg[0], reg[1]);
printf("mem_[] = %08x %08x %08x\n", mem_[0], mem_[1], mem_[2]);
printf("pc=%4d, code=%02x, r=%d, imm=%04x\n", pc, code, r, imm);
}
switch (code) {
case LDI:
reg[r] = imm;
break;
case LD:
reg[r] = mem_[imm];
break;
case ST:
mem_[imm] = reg[r];
break;
case ADD:
reg[r] += mem_[imm];
break;
case ADDI:
reg[r] += imm;
break;
case SUB:
reg[r] -= mem_[imm];
break;
case SUBI:
reg[r] -= imm;
break;
case PUT:
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
break;
case JNZ:
if (reg[r] != 0) pc += static_cast<signed short>(imm);
break;
default:
assert(0);
break;
}
pc++;
if (pc >= end) break;
} // for (;;)
}
void recompile()
{
using namespace Xbyak;
/*
esi : A
edi : B
ebx : mem_
for speed up
mem_[0] : eax
mem_[1] : ecx
mem_[2] : edx
*/
push(ebx);
push(esi);
push(edi);
const Reg32 reg[2] = { esi, edi };
const Reg32 mem(ebx);
const Reg32 memTbl[] = { eax, ecx, edx };
const size_t memTblNum = NUM_OF_ARRAY(memTbl);
for (size_t i = 0; i < memTblNum; i++) xor(memTbl[i], memTbl[i]);
xor(reg[0], reg[0]);
xor(reg[1], reg[1]);
mov(mem, (size_t)mem_);
const size_t end = code_.size();
uint32 pc = 0;
uint32 labelNum = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
decode(code, r, imm, x);
L(Label::toStr(labelNum++).c_str());
switch (code) {
case LDI:
mov(reg[r], imm);
break;
case LD:
if (imm < memTblNum) {
mov(reg[r], memTbl[imm]);
} else {
mov(reg[r], ptr[mem + imm * 4]);
}
break;
case ST:
if (imm < memTblNum) {
mov(memTbl[imm], reg[r]);
} else {
mov(ptr [mem + imm * 4], reg[r]);
}
break;
case ADD:
if (imm < memTblNum) {
add(reg[r], memTbl[imm]);
} else {
add(reg[r], ptr [mem + imm * 4]);
}
break;
case ADDI:
add(reg[r], imm);
break;
case SUB:
if (imm < memTblNum) {
sub(reg[r], memTbl[imm]);
} else {
sub(reg[r], ptr [mem + imm * 4]);
}
break;
case SUBI:
sub(reg[r], imm);
break;
case PUT:
{
static const char *str = "%c %8d(0x%08x)\n";
push(eax);
push(edx);
push(ecx);
push(reg[r]);
push(reg[r]);
push('A' + r);
push((int)str);
call((void*)printf);
add(esp, 4 * 4);
pop(ecx);
pop(edx);
pop(eax);
}
break;
case JNZ:
test(reg[r], reg[r]);
jnz(Label::toStr(labelNum + static_cast<signed short>(imm)).c_str());
break;
default:
assert(0);
break;
}
pc++;
if (pc >= end) break;
} // for (;;)
pop(edi);
pop(esi);
pop(ebx);
ret();
}
private:
uint32 mem_[65536];
Buffer code_;
int mark_;
void decode(uint32& code, uint32& r, uint32& imm, uint32 x)
{
code = x >> 24;
r = (x >> 16) & 0xff;
imm = x & 0xffff;
}
void encode(Code code, Reg r, uint16 imm = 0)
{
uint32 x = (code << 24) | (r << 16) | imm;
code_.push_back(x);
}
};
class Fib : public ToyVm {
public:
Fib(int n)
{
if (n >= 65536) {
fprintf(stderr, "current version support only imm16\n");
return;
}
/*
A : c
B : temporary
mem_[0] : p
mem_[1] : t
mem_[2] : n
*/
vldi(A, 1); // c
vst(A, 0); // p(1)
vldi(B, static_cast<uint16>(n));
vst(B, 2); // n
// lp
setMark();
vst(A, 1); // t = c
vadd(A, 0); // c += p
vld(B, 1);
vst(B, 0); // p = t
// vput(A);
vld(B, 2);
vsubi(B, 1);
vst(B, 2); // n--
vjnz(B, getMarkOffset());
vput(A);
}
void runByJIT()
{
((void (*)())getCode())();
}
};
void fibC(uint32 n)
{
uint32 p, c, t;
p = 1;
c = 1;
lp:
t = c;
c += p;
p = t;
n--;
if (n != 0) goto lp;
printf("c=%d(0x%08x)\n", c, c);
}
int main()
{
try {
const int n = 10000;
uint64 p;
Fib fib(n);
fib.recompile();
p = getRdtsc();
fib.run();
printf("vm %.2fKclk\n", (signed)(getRdtsc() - p) * 1e-3);
p = getRdtsc();
fib.runByJIT();
printf("jit %.2fKclk\n", (signed)(getRdtsc() - p) * 1e-3);
p = getRdtsc();
fibC(n);
printf("native C %.2fKclk\n", (signed)(getRdtsc() - p) * 1e-3);
} catch (Xbyak::Error err) {
printf("ERR:%s(%d)\n", Xbyak::ConvertErrorToString(err), err);
} catch (...) {
printf("unknown error\n");
}
return 0;
}
/*
the code generated by Xbyak
push ebx
push esi
push edi
xor eax,eax
xor ecx,ecx
xor edx,edx
xor esi,esi
xor edi,edi
mov ebx,0EFF58h
mov esi,1
mov eax,esi
mov edi,2710h
mov edx,edi
.lp:
mov ecx,esi
add esi,eax
mov edi,ecx
mov eax,edi
mov edi,edx
sub edi,1
mov edx,edi
test edi,edi
jne .lp
push eax
push edx
push ecx
push esi
push esi
push 41h
push 42C434h
call printf (409342h)
add esp,10h
pop ecx
pop edx
pop eax
pop edi
pop esi
pop ebx
ret
*/

427
sample/toyvm.vcproj Normal file
View File

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="toyvm"
ProjectGUID="{2E41C7AF-39FF-454C-B081-37445378DCB3}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/toyvm.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/toyvm.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/toyvm.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/toyvm.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/toyvm.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/toyvm.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/toyvm.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/toyvm.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="toyvm.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

7
test/6.bat Normal file
View File

@ -0,0 +1,7 @@
@echo off
echo nasm
nasm -l a.lst -f win64 test.asm
cat a.lst
echo yasm
yasm -l b.lst -f win64 test.asm
cat b.lst

35
test/Makefile Normal file
View File

@ -0,0 +1,35 @@
TARGET = make_nm normalize_prefix
XBYAK_INC=../xbyak/xbyak.h
ifeq ($(MODE_BIT),64)
XBYAK_OPT=-m64
XBYAK_COPT=-DXBYAK64
else
XBYAK_OPT=-m32
XBYAK_COPT=-DXBYAK32
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ $(XBYAK_OPT) $(XBYAK_COPT) $(CFLAGS_WARN)
make_nm:
g++ $(CFLAGS) make_nm.cpp -o $@
normalize_prefix: normalize_prefix.cpp
g++ $(CFLAGS) normalize_prefix.cpp -o $@
test_mmx: test_mmx.cpp
g++ $(CFLAGS) test_mmx.cpp -o $@ -lpthread
test: normalize_prefix
./test_nm.sh
./test_nm.sh Y
./test_nm.sh 64
./test_nm.sh Y64
clean:
rm -rf *.o $(TARGET)
make_nm: make_nm.cpp $(XBYAK_INC)

9
test/a.bat Normal file
View File

@ -0,0 +1,9 @@
@echo off
echo 32bit
rm a.lst
echo nasm
nasm -l a.lst -f win32 -DWIN32 test.asm
cat a.lst
echo yasm
yasm -l a.lst -f win32 -DWIN32 test.asm
cat a.lst

78
test/address.cpp Normal file
View File

@ -0,0 +1,78 @@
#include <stdio.h>
#include <string.h>
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
{
for (size_t i = 0; i < regTblNum + 1; i++) {
const char *base = regTbl[i];
for (size_t j = 0; j < regTblNum + 1; j++) {
if (j == 4) continue; /* esp is not index register */
const char *index = regTbl[j];
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
for (int k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
int scale = scaleTbl[k];
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
for (int m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
int disp = dispTbl[m];
bool isFirst = true;
if (isJIT) {
printf("mov (ecx, ptr[");
} else {
printf("mov ecx, [");
}
if (i < regTblNum) {
printf("%s", base);
isFirst = false;
}
if (j < regTblNum) {
if (!isFirst) putchar('+');
printf("%s", index);
if (scale) printf("*%d", scale);
isFirst = false;
}
if (isFirst) {
if (isJIT) printf("(void*)");
printf("0x%08X", disp);
} else {
if (disp >= 0) {
putchar('+');
}
printf("%d", disp);
isFirst = false;
}
if (isJIT) {
printf("]); dump();\n");
} else {
printf("]\n");
}
}
}
}
}
}
int main(int argc, char *argv[])
{
argc--, argv++;
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
bool isJIT = (argc > 1);
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
if (phase) {
static const char reg32Tbl[][5] = {
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
#ifdef XBYAK64
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
#endif
};
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
} else {
#ifdef XBYAK64
static const char reg64Tbl[][5] = {
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
#endif
}
}

100
test/jmp.cpp Normal file
View File

@ -0,0 +1,100 @@
#include <stdio.h>
#include "xbyak/xbyak.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
using namespace Xbyak;
struct TestJmp : public CodeGenerator {
void putNop(int n)
{
for (int i = 0; i < n; i++) {
nop();
}
}
/*
4 X0:
5 00000004 EBFE jmp short X0
6
7 X1:
8 00000006 <res 00000001> dummyX1 resb 1
9 00000007 EBFD jmp short X1
10
11 X126:
12 00000009 <res 0000007E> dummyX126 resb 126
13 00000087 EB80 jmp short X126
14
15 X127:
16 00000089 <res 0000007F> dummyX127 resb 127
17 00000108 E97CFFFFFF jmp near X127
18
19 0000010D EB00 jmp short Y0
20 Y0:
21
22 0000010F EB01 jmp short Y1
23 00000111 <res 00000001> dummyY1 resb 1
24 Y1:
25
26 00000112 EB7F jmp short Y127
27 00000114 <res 0000007F> dummyY127 resb 127
28 Y127:
29
30 00000193 E980000000 jmp near Y128
31 00000198 <res 00000080> dummyY128 resb 128
32 Y128:
*/
TestJmp(int offset, bool isBack, bool isShort)
{
char buf[32];
static int count = 0;
if (isBack) {
sprintf(buf, "L(\"X%d\");\n", count);
L(buf);
putNop(offset);
jmp(buf);
} else {
sprintf(buf, "L(\"Y%d\");\n", count);
if (isShort) {
jmp(buf);
} else {
jmp(buf, T_NEAR);
}
putNop(offset);
L(buf);
}
count++;
}
};
int main()
{
static const struct Tbl {
int offset;
bool isBack;
bool isShort;
const char *result;
} tbl[] = {
{ 0, true, true, "EBFE" },
{ 1, true, true, "EBFD" },
{ 126, true, true, "EB80" },
{ 127, true, false, "E97CFFFFFF" },
{ 0, false, true, "EB00" },
{ 1, false, true, "EB01" },
{ 127, false, true, "EB7F" },
{ 128, false, false, "E980000000" },
};
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
TestJmp jmp(p->offset, p->isBack, p->isShort);
const uint8 *q = (const uint8*)jmp.getCode();
char buf[32];
if (p->isBack) q += p->offset; /* skip nop */
for (int j = 0; j < jmp.getSize() - p->offset; j++) {
sprintf(&buf[j * 2], "%02X", q[j]);
}
if (strcmp(buf, p->result) != 0) {
printf("error %d assume:%s, err=%s\n", i, p->result, buf);
} else {
printf("ok %d\n", i);
}
}
}

1159
test/make_nm.cpp Normal file

File diff suppressed because it is too large Load Diff

1918
test/nm.cpp Normal file

File diff suppressed because it is too large Load Diff

45
test/nm_frame.cpp Normal file
View File

@ -0,0 +1,45 @@
#include <stdio.h>
#include "xbyak/xbyak.h"
using namespace Xbyak;
class Sample : public CodeGenerator {
void operator=(const Sample&);
public:
void gen()
{
try {
#include "nm.cpp"
} catch (Xbyak::Error err) {
printf("ERR:%s(%d)\n", ConvertErrorToString(err), err);
} catch (...) {
printf("unkwon error\n");
}
}
};
#define _STR(x) #x
#define TEST(syntax) err = true; try { syntax; err = false; } catch (Xbyak::Error) { } catch (...) { } if (!err) printf("should be err:%s;\n", _STR(syntax))
class ErrorSample : public CodeGenerator {
void operator=(const ErrorSample&);
public:
void gen()
{
bool err;
TEST(mov(ptr[eax],1));
TEST(test(ptr[eax],1));
TEST(adc(ptr[eax],1));
TEST(setz(eax));
}
};
int main()
{
Sample s;
s.gen();
ErrorSample es;
es.gen();
}

45
test/normalize_prefix.cpp Normal file
View File

@ -0,0 +1,45 @@
/*
normalize prefix
*/
#include <string>
#include <set>
#include <iostream>
#include <memory.h>
typedef unsigned char uint8;
std::string normalize(const std::string& line)
{
static const char tbl[][3] = { "66", "67", "F2", "F3" };
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
typedef std::set<std::string> StringSet;
StringSet suf;
size_t pos = 0;
for (; pos < line.size(); pos += 2) {
bool found = false;
for (size_t i = 0; i < tblNum; i++) {
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
found = true;
suf.insert(tbl[i]);
break;
}
}
if (!found) break;
}
std::string ret;
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
ret += *i;
}
ret += &line[pos];
return ret;
}
int main()
{
std::string line;
while (std::getline(std::cin, line)) {
std::string normalizedLine = normalize(line);
std::cout << normalizedLine << '\n';//std::endl;
}
}

33
test/test_address.bat Normal file
View File

@ -0,0 +1,33 @@
@echo off
if /i "%1"=="64" (
set OPT2=-DXBYAK64
set OPT3=win64
) else (
set OPT2=
set OPT3=win32
)
pushd ..\gen
call update
popd
if /i "%1"=="64" (
call :sub 1
call :sub 2
) else (
call :sub 1
)
goto end
:sub
cl address.cpp %OPT% %OPT2%
address %1% > a.asm
nasm -f %OPT3% -l a.lst a.asm
awk "{print $3}" < a.lst > ok.lst
address %1% jit > nm.cpp
cl -I../ -DTEST_NM nm_frame.cpp %OPT% %OPT2%
nm_frame > x.lst
diff x.lst ok.lst
wc x.lst
:end

8
test/test_all.bat Normal file
View File

@ -0,0 +1,8 @@
@echo off
call test_nm_all
echo *** test addressing ***
call test_address
call test_address 64
echo *** test jmp address ***
call test_jmp
echo *** all test end ***

5
test/test_jmp.bat Normal file
View File

@ -0,0 +1,5 @@
pushd ..\gen
call update
popd
cl -I../ -DTEST_NM jmp.cpp %OPT%
jmp

78
test/test_mmx.cpp Normal file
View File

@ -0,0 +1,78 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include "../../include.mie/mie_thread.h"
#include "xbyak/xbyak.h"
class WriteMMX : public Xbyak::CodeGenerator {
public:
WriteMMX()
{
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]);
#endif
movd(mm0, ecx);
ret();
}
void (*set() const)(int x) { return (void (*)(int x))getCode(); }
};
class ReadMMX : public Xbyak::CodeGenerator {
public:
ReadMMX()
{
movd(eax, mm0);
ret();
}
int (*get() const)() { return (int (*)())getCode(); }
};
class Test : public MIE::ThreadBase<Test> {
int n_;
public:
Test(int n)
: n_(n)
{
}
void threadEntry()
{
printf("n=%d\n", n_);
WriteMMX w;
w.set()(n_);
ReadMMX r;
for (;;) {
int b = r.get()();
printf("b=%d\n", b);
if (b != n_) {
printf("mm0 has changed!\n");
}
MIE::MIE_Sleep(1000);
}
}
void stopThread() { }
};
int main(int argc, char *argv[])
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
try {
int n = atoi(argc == 1 ? "1223" : argv[1]);
Test test0(n), test1(n + 1);
test0.beginThread();
test1.beginThread();
test0.joinThread();
test1.joinThread();
} catch (Xbyak::Error err) {
printf("ERR:%s(%d)\n", Xbyak::ConvertErrorToString(err), err);
} catch (...) {
printf("unknown error\n");
}
}

42
test/test_nm.bat Normal file
View File

@ -0,0 +1,42 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe -DXBYAK32
set OPT2=
set OPT3=win32
)
pushd ..\gen
call update
popd
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
make_nm > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
cl -I../ -DTEST_NM nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
wc x.lst

42
test/test_nm.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/tcsh
set FILTER=cat
if ($1 == "Y") then
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
echo "nasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile make_nm.cpp"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DTEST_NM nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff ok.lst x.lst && echo "ok"
exit 0

9
test/test_nm_all.bat Normal file
View File

@ -0,0 +1,9 @@
@echo off
echo *** nasm(32bit) ***
call test_nm
echo *** yasm(32bit) ***
call test_nm Y
echo *** nasm(64bit) ***
call test_nm 64
echo *** yasm(64bit) ***
call test_nm Y64

76
xbyak.sln Normal file
View File

@ -0,0 +1,76 @@
þ½Ž¿
Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.ActiveCfg = Debug|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.Build.0 = Debug|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.ActiveCfg = Debug|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.Build.0 = Debug|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.ActiveCfg = Release|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.Build.0 = Release|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.ActiveCfg = Release|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.Build.0 = Release|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.ActiveCfg = Debug|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.Build.0 = Debug|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.ActiveCfg = Debug|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.Build.0 = Debug|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.ActiveCfg = Release|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.Build.0 = Release|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.ActiveCfg = Release|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.Build.0 = Release|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.ActiveCfg = Debug|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.Build.0 = Debug|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.ActiveCfg = Debug|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.Build.0 = Debug|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.ActiveCfg = Release|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.Build.0 = Release|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.ActiveCfg = Release|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.Build.0 = Release|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.ActiveCfg = Debug|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.Build.0 = Debug|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.ActiveCfg = Debug|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.Build.0 = Debug|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.ActiveCfg = Release|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.Build.0 = Release|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.ActiveCfg = Release|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.Build.0 = Release|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.ActiveCfg = Debug|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.Build.0 = Debug|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.ActiveCfg = Debug|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.Build.0 = Debug|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.ActiveCfg = Release|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.Build.0 = Release|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.ActiveCfg = Release|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.Build.0 = Release|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.ActiveCfg = Debug|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.Build.0 = Debug|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.ActiveCfg = Debug|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.Build.0 = Debug|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.ActiveCfg = Release|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.Build.0 = Release|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.ActiveCfg = Release|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

1435
xbyak/xbyak.h Normal file

File diff suppressed because it is too large Load Diff

258
xbyak/xbyak_bin2hex.h Normal file
View File

@ -0,0 +1,258 @@
enum {
B00000000= 0,
B00000001= 1,
B00000010= 2,
B00000011= 3,
B00000100= 4,
B00000101= 5,
B00000110= 6,
B00000111= 7,
B00001000= 8,
B00001001= 9,
B00001010= 10,
B00001011= 11,
B00001100= 12,
B00001101= 13,
B00001110= 14,
B00001111= 15,
B00010000= 16,
B00010001= 17,
B00010010= 18,
B00010011= 19,
B00010100= 20,
B00010101= 21,
B00010110= 22,
B00010111= 23,
B00011000= 24,
B00011001= 25,
B00011010= 26,
B00011011= 27,
B00011100= 28,
B00011101= 29,
B00011110= 30,
B00011111= 31,
B00100000= 32,
B00100001= 33,
B00100010= 34,
B00100011= 35,
B00100100= 36,
B00100101= 37,
B00100110= 38,
B00100111= 39,
B00101000= 40,
B00101001= 41,
B00101010= 42,
B00101011= 43,
B00101100= 44,
B00101101= 45,
B00101110= 46,
B00101111= 47,
B00110000= 48,
B00110001= 49,
B00110010= 50,
B00110011= 51,
B00110100= 52,
B00110101= 53,
B00110110= 54,
B00110111= 55,
B00111000= 56,
B00111001= 57,
B00111010= 58,
B00111011= 59,
B00111100= 60,
B00111101= 61,
B00111110= 62,
B00111111= 63,
B01000000= 64,
B01000001= 65,
B01000010= 66,
B01000011= 67,
B01000100= 68,
B01000101= 69,
B01000110= 70,
B01000111= 71,
B01001000= 72,
B01001001= 73,
B01001010= 74,
B01001011= 75,
B01001100= 76,
B01001101= 77,
B01001110= 78,
B01001111= 79,
B01010000= 80,
B01010001= 81,
B01010010= 82,
B01010011= 83,
B01010100= 84,
B01010101= 85,
B01010110= 86,
B01010111= 87,
B01011000= 88,
B01011001= 89,
B01011010= 90,
B01011011= 91,
B01011100= 92,
B01011101= 93,
B01011110= 94,
B01011111= 95,
B01100000= 96,
B01100001= 97,
B01100010= 98,
B01100011= 99,
B01100100= 100,
B01100101= 101,
B01100110= 102,
B01100111= 103,
B01101000= 104,
B01101001= 105,
B01101010= 106,
B01101011= 107,
B01101100= 108,
B01101101= 109,
B01101110= 110,
B01101111= 111,
B01110000= 112,
B01110001= 113,
B01110010= 114,
B01110011= 115,
B01110100= 116,
B01110101= 117,
B01110110= 118,
B01110111= 119,
B01111000= 120,
B01111001= 121,
B01111010= 122,
B01111011= 123,
B01111100= 124,
B01111101= 125,
B01111110= 126,
B01111111= 127,
B10000000= 128,
B10000001= 129,
B10000010= 130,
B10000011= 131,
B10000100= 132,
B10000101= 133,
B10000110= 134,
B10000111= 135,
B10001000= 136,
B10001001= 137,
B10001010= 138,
B10001011= 139,
B10001100= 140,
B10001101= 141,
B10001110= 142,
B10001111= 143,
B10010000= 144,
B10010001= 145,
B10010010= 146,
B10010011= 147,
B10010100= 148,
B10010101= 149,
B10010110= 150,
B10010111= 151,
B10011000= 152,
B10011001= 153,
B10011010= 154,
B10011011= 155,
B10011100= 156,
B10011101= 157,
B10011110= 158,
B10011111= 159,
B10100000= 160,
B10100001= 161,
B10100010= 162,
B10100011= 163,
B10100100= 164,
B10100101= 165,
B10100110= 166,
B10100111= 167,
B10101000= 168,
B10101001= 169,
B10101010= 170,
B10101011= 171,
B10101100= 172,
B10101101= 173,
B10101110= 174,
B10101111= 175,
B10110000= 176,
B10110001= 177,
B10110010= 178,
B10110011= 179,
B10110100= 180,
B10110101= 181,
B10110110= 182,
B10110111= 183,
B10111000= 184,
B10111001= 185,
B10111010= 186,
B10111011= 187,
B10111100= 188,
B10111101= 189,
B10111110= 190,
B10111111= 191,
B11000000= 192,
B11000001= 193,
B11000010= 194,
B11000011= 195,
B11000100= 196,
B11000101= 197,
B11000110= 198,
B11000111= 199,
B11001000= 200,
B11001001= 201,
B11001010= 202,
B11001011= 203,
B11001100= 204,
B11001101= 205,
B11001110= 206,
B11001111= 207,
B11010000= 208,
B11010001= 209,
B11010010= 210,
B11010011= 211,
B11010100= 212,
B11010101= 213,
B11010110= 214,
B11010111= 215,
B11011000= 216,
B11011001= 217,
B11011010= 218,
B11011011= 219,
B11011100= 220,
B11011101= 221,
B11011110= 222,
B11011111= 223,
B11100000= 224,
B11100001= 225,
B11100010= 226,
B11100011= 227,
B11100100= 228,
B11100101= 229,
B11100110= 230,
B11100111= 231,
B11101000= 232,
B11101001= 233,
B11101010= 234,
B11101011= 235,
B11101100= 236,
B11101101= 237,
B11101110= 238,
B11101111= 239,
B11110000= 240,
B11110001= 241,
B11110010= 242,
B11110011= 243,
B11110100= 244,
B11110101= 245,
B11110110= 246,
B11110111= 247,
B11111000= 248,
B11111001= 249,
B11111010= 250,
B11111011= 251,
B11111100= 252,
B11111101= 253,
B11111110= 254,
B11111111= 255
};

526
xbyak/xbyak_mnemonic.h Normal file
View File

@ -0,0 +1,526 @@
const char *getVersionString() const { return "2.23"; }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); }
void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); }
void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); }
void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); }
void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); }
void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); }
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); }
void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); }
void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); }
void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); }
void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); }
void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); }
void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); }
void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); }
void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); }
void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); }
void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); }
void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); }
void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); }
void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); }
void paddq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD4); }
void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
void psubq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFB); }
void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); }
void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); }
void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); }
void paddsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEC); }
void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); }
void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); }
void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); }
void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); }
void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); }
void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); }
void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); }
void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); }
void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); }
void psllw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF1); }
void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); }
void psllq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF3); }
void psraw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE1); }
void psrad(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE2); }
void psrlw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD1); }
void psrld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD2); }
void psrlq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD3); }
void psubb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF8); }
void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); }
void psubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFA); }
void psubsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE8); }
void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); }
void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); }
void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); }
void psllw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); }
void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); }
void psllq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); }
void psraw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); }
void psrad(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); }
void psrlw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); }
void psrld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); }
void psrlq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); }
void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); }
void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); }
void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); }
void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); }
void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x100); }
void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF3); }
void movss(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); }
void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); }
void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); }
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF2); }
void movsd(const Address& addr, const Xmm& xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); }
void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); }
void movupd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); }
void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); }
void addss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF3, isXMM_XMMorMEM); }
void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); }
void addsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF2, isXMM_XMMorMEM); }
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); }
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); }
void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); }
void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM_XMMorMEM); }
void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); }
void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); }
void maxss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF3, isXMM_XMMorMEM); }
void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); }
void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); }
void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXMM_XMMorMEM); }
void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
void minpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x66, isXMM_XMMorMEM); }
void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); }
void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); }
void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM_XMMorMEM); }
void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
void rcpps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0x100, isXMM_XMMorMEM); }
void rcpss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0xF3, isXMM_XMMorMEM); }
void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); }
void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); }
void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); }
void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); }
void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); }
void sqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF3, isXMM_XMMorMEM); }
void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); }
void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); }
void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); }
void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); }
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); }
void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); }
void punpckhqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6D, 0x66, isXMM_XMMorMEM); }
void punpcklqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6C, 0x66, isXMM_XMMorMEM); }
void comiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x100, isXMM_XMMorMEM); }
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
void comisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x66, isXMM_XMMorMEM); }
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
void cvtpd2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x66, isXMM_XMMorMEM); }
void cvtps2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x100, isXMM_XMMorMEM); }
void cvtsd2ss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF2, isXMM_XMMorMEM); }
void cvtss2sd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF3, isXMM_XMMorMEM); }
void cvtpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF2, isXMM_XMMorMEM); }
void cvttpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0x66, isXMM_XMMorMEM); }
void cvtdq2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF3, isXMM_XMMorMEM); }
void cvtps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x66, isXMM_XMMorMEM); }
void cvttps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0xF3, isXMM_XMMorMEM); }
void cvtdq2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x100, isXMM_XMMorMEM); }
void addsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0x66, isXMM_XMMorMEM); }
void addsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0xF2, isXMM_XMMorMEM); }
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM); }
void movshdup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x16, 0xF3, isXMM_XMMorMEM); }
void movsldup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF3, isXMM_XMMorMEM); }
void cvtpi2ps(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x100, isXMM_MMXorMEM); }
void cvtps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x100, isMMX_XMMorMEM); }
void cvtsi2ss(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF3, isXMM_REG32orMEM); }
void cvtss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF3, isREG32_XMMorMEM); }
void cvttps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x100, isMMX_XMMorMEM); }
void cvttss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF3, isREG32_XMMorMEM); }
void cvtpi2pd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x66, isXMM_MMXorMEM); }
void cvtpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x66, isMMX_XMMorMEM); }
void cvtsi2sd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF2, isXMM_REG32orMEM); }
void cvtsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF2, isREG32_XMMorMEM); }
void cvttpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x66, isMMX_XMMorMEM); }
void cvttsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF2, isREG32_XMMorMEM); }
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B00011000); }
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, B00011000); }
void prefetcht2(const Address& addr) { opModM(addr, Reg32(3), 0x0F, B00011000); }
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, B00011000); }
void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); }
void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); }
void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); }
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); }
void jo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
void seto(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 0); }
void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); }
void jno(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
void setno(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 1); }
void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
void jb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
void setb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
void jnae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
void setnae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
void jnb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
void setnb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
void jae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
void setae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
void je(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
void sete(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
void jz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
void setz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
void jne(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
void setne(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
void jnz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
void setnz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
void jbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
void setbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
void jna(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
void setna(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
void jnbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
void setnbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
void ja(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
void seta(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); }
void js(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
void sets(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 8); }
void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); }
void jns(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
void setns(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 9); }
void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
void jp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
void setp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
void jpe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
void setpe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
void jnp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
void setnp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
void jpo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
void setpo(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
void jl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
void setl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
void jnge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
void setnge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
void jnl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
void setnl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
void jge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
void setge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
void jle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
void setle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
void jng(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
void setng(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
void jnle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
void setnle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
void setg(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
#ifdef XBYAK64
void cdqe() { db(0x48); db(0x98); }
#else
void aaa() { db(0x37); }
void aad() { db(0xD5); db(0x0A); }
void aam() { db(0xD4); db(0x0A); }
void aas() { db(0x3F); }
void daa() { db(0x27); }
void das() { db(0x2F); }
void popad() { db(0x61); }
void popfd() { db(0x9D); }
void pusha() { db(0x60); }
void pushad() { db(0x60); }
void pushfd() { db(0x9C); }
void popa() { db(0x61); }
#endif
void cbw() { db(0x66); db(0x98); }
void cdq() { db(0x99); }
void clc() { db(0xF8); }
void cld() { db(0xFC); }
void cli() { db(0xFA); }
void cmc() { db(0xF5); }
void cpuid() { db(0x0F); db(0xA2); }
void cwd() { db(0x66); db(0x99); }
void cwde() { db(0x98); }
void lahf() { db(0x9F); }
void lock() { db(0xF0); }
void nop() { db(0x90); }
void sahf() { db(0x9E); }
void stc() { db(0xF9); }
void std() { db(0xFD); }
void sti() { db(0xFB); }
void emms() { db(0x0F); db(0x77); }
void pause() { db(0xF3); db(0x90); }
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
void mfence() { db(0x0F); db(0xAE); db(0xF0); }
void monitor() { db(0x0F); db(0x01); db(0xC8); }
void mwait() { db(0x0F); db(0x01); db(0xC9); }
void rdmsr() { db(0x0F); db(0x32); }
void rdpmc() { db(0x0F); db(0x33); }
void rdtsc() { db(0x0F); db(0x31); }
void wait() { db(0x9B); }
void wbinvd() { db(0x0F); db(0x09); }
void wrmsr() { db(0x0F); db(0x30); }
void xlatb() { db(0xD7); }
void popf() { db(0x9D); }
void pushf() { db(0x9C); }
void f2xm1() { db(0xD9); db(0xF0); }
void fabs() { db(0xD9); db(0xE1); }
void faddp() { db(0xDE); db(0xC1); }
void fchs() { db(0xD9); db(0xE0); }
void fcom() { db(0xD8); db(0xD1); }
void fcomp() { db(0xD8); db(0xD9); }
void fcompp() { db(0xDE); db(0xD9); }
void fcos() { db(0xD9); db(0xFF); }
void fdecstp() { db(0xD9); db(0xF6); }
void fdivp() { db(0xDE); db(0xF9); }
void fdivrp() { db(0xDE); db(0xF1); }
void fincstp() { db(0xD9); db(0xF7); }
void fld1() { db(0xD9); db(0xE8); }
void fldl2t() { db(0xD9); db(0xE9); }
void fldl2e() { db(0xD9); db(0xEA); }
void fldpi() { db(0xD9); db(0xEB); }
void fldlg2() { db(0xD9); db(0xEC); }
void fldln2() { db(0xD9); db(0xED); }
void fldz() { db(0xD9); db(0xEE); }
void fmulp() { db(0xDE); db(0xC9); }
void fnop() { db(0xD9); db(0xD0); }
void fpatan() { db(0xD9); db(0xF3); }
void fprem() { db(0xD9); db(0xF8); }
void fprem1() { db(0xD9); db(0xF5); }
void fptan() { db(0xD9); db(0xF2); }
void frndint() { db(0xD9); db(0xFC); }
void fscale() { db(0xD9); db(0xFD); }
void fsin() { db(0xD9); db(0xFE); }
void fsincos() { db(0xD9); db(0xFB); }
void fsqrt() { db(0xD9); db(0xFA); }
void fsubp() { db(0xDE); db(0xE9); }
void fsubrp() { db(0xDE); db(0xE1); }
void ftst() { db(0xD9); db(0xE4); }
void fucom() { db(0xDD); db(0xE1); }
void fucomp() { db(0xDD); db(0xE9); }
void fucompp() { db(0xDA); db(0xE9); }
void fxam() { db(0xD9); db(0xE5); }
void fxch() { db(0xD9); db(0xC9); }
void fxtract() { db(0xD9); db(0xF4); }
void fyl2x() { db(0xD9); db(0xF1); }
void fyl2xp1() { db(0xD9); db(0xF9); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); }
void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); }
void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); }
void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
void div(const Operand& op) { opR_ModM(op, 0, 3, 6, 0xF6); }
void idiv(const Operand& op) { opR_ModM(op, 0, 3, 7, 0xF6); }
void imul(const Operand& op) { opR_ModM(op, 0, 3, 5, 0xF6); }
void mul(const Operand& op) { opR_ModM(op, 0, 3, 4, 0xF6); }
void neg(const Operand& op) { opR_ModM(op, 0, 3, 3, 0xF6); }
void not(const Operand& op) { opR_ModM(op, 0, 3, 2, 0xF6); }
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
void rcr(const Operand& op, const Reg8& cl) { opShift(op, cl, 3); }
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
void rol(const Operand& op, const Reg8& cl) { opShift(op, cl, 0); }
void ror(const Operand& op, int imm) { opShift(op, imm, 1); }
void ror(const Operand& op, const Reg8& cl) { opShift(op, cl, 1); }
void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
void sar(const Operand& op, const Reg8& cl) { opShift(op, cl, 7); }
void shl(const Operand& op, int imm) { opShift(op, imm, 4); }
void shl(const Operand& op, const Reg8& cl) { opShift(op, cl, 4); }
void shr(const Operand& op, int imm) { opShift(op, imm, 5); }
void shr(const Operand& op, const Reg8& cl) { opShift(op, cl, 5); }
void sal(const Operand& op, int imm) { opShift(op, imm, 4); }
void sal(const Operand& op, const Reg8& cl) { opShift(op, cl, 4); }
void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); }
void shld(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xA4, &cl); }
void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); }
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
void bsf(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
void bsr(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, 256, 0x38); }
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, 256, 0x38); }
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, 256, 0x38); }
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, 256, 0x38); }
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, 256, 0x38); }
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, 256, 0x38); }
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, 256, 0x38); }
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, 256, 0x38); }
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, 256, 0x38); }
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, 256, 0x38); }
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, 256, 0x38); }
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, 256, 0x38); }
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, 256, 0x38); }
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, 256, 0x38); }
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, 256, 0x38); }
void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); }
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); }
void fiadd(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); }
void fcom(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); }
void fcomp(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); }
void fdiv(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); }
void fidiv(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); }
void fdivr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); }
void fidivr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); }
void ficom(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); }
void ficomp(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); }
void fild(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); }
void fist(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); }
void fistp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); }
void fisttp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); }
void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
void fmul(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); }
void fimul(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); }
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); }
void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); }
void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); }
void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); }
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); }
void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); }
void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); }
void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); }
void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); }
void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); }
void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); }
void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); }
void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); }
void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); }
void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); }
void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); }
void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); }
void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); }
void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); }
void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); }
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); }
void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); }
void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); }
void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); }
void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); }
void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); }
void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); }
void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); }
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }

231
xbyak/xbyak_util.h Normal file
View File

@ -0,0 +1,231 @@
#ifndef XBYAK_XBYAK_UTIL_H_
#define XBYAK_XBYAK_UTIL_H_
/**
utility class and functions for Xbyak
@note this header is UNDER CONSTRUCTION!
*/
#include "xbyak/xbyak.h"
#ifdef _WIN32
#if (_MSC_VER < 1400) && defined(XBYAK32)
static inline __declspec(naked) void __cpuid(int[4], int)
{
__asm {
push ebx
push esi
mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn
cpuid
mov esi, dword ptr [esp + 4 * 2 + 4] // data
mov dword ptr [esi], eax
mov dword ptr [esi + 4], ebx
mov dword ptr [esi + 8], ecx
mov dword ptr [esi + 12], edx
pop esi
pop ebx
ret
}
}
#else
#include <intrin.h> // for __cpuid
#endif
#else
#ifndef __GNUC_PREREQ
#define __GNUC_PREREQ(major, minor) (((major) << 16) + (minor))
#endif
#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
#include <cpuid.h>
#else
#if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#else
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#endif
#endif
#endif
namespace Xbyak { namespace util {
/**
CPU detection class
*/
class Cpu {
unsigned int type_;
unsigned int get32bitAsBE(const char *x) const
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
}
public:
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
{
#ifdef _WIN32
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
enum Type {
NONE = 0,
tMMX = 1 << 0,
tMMX2 = 1 << 1,
tCMOV = 1 << 2,
tSSE = 1 << 3,
tSSE2 = 1 << 4,
tSSE3 = 1 << 5,
tSSSE3 = 1 << 6,
tSSE41 = 1 << 7,
tSSE42 = 1 << 8,
tPOPCNT = 1 << 9,
t3DN = 1 << 16,
tE3DN = 1 << 17,
tSSE4a = 1 << 18,
tSSE5 = 1 << 11,
tINTEL = 1 << 24,
tAMD = 1 << 25
};
Cpu()
: type_(NONE)
{
unsigned int data[4];
getCpuid(0, data);
static const char intel[] = "ntel";
static const char amd[] = "cAMD";
if (data[2] == get32bitAsBE(amd)) {
type_ |= tAMD;
getCpuid(0x80000001, data);
if (data[3] & (1 << 31)) type_ |= t3DN;
if (data[3] & (1 << 15)) type_ |= tCMOV;
if (data[3] & (1 << 30)) type_ |= tE3DN;
if (data[3] & (1 << 22)) type_ |= tMMX2;
}
if (data[2] == get32bitAsBE(intel)) {
type_ |= tINTEL;
}
getCpuid(1, data);
if (data[2] & (1 << 0)) type_ |= tSSE3;
if (data[2] & (1 << 9)) type_ |= tSSSE3;
if (data[2] & (1 << 19)) type_ |= tSSE41;
if (data[2] & (1 << 20)) type_ |= tSSE42;
if (data[2] & (1 << 23)) type_ |= tPOPCNT;
if (data[3] & (1 << 15)) type_ |= tCMOV;
if (data[3] & (1 << 23)) type_ |= tMMX;
if (data[3] & (1 << 25)) type_ |= tMMX2 | tSSE;
if (data[3] & (1 << 26)) type_ |= tSSE2;
}
bool has(Type type) const
{
return (type & type_) != 0;
}
};
class Clock {
public:
static inline uint64 getRdtsc()
{
#ifdef _MSC_VER
return __rdtsc();
#else
unsigned int eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64)edx << 32) | eax;
#endif
}
Clock()
: clock_(0)
, count_(0)
{
}
void begin()
{
clock_ -= getRdtsc();
}
void end()
{
clock_ += getRdtsc();
count_++;
}
int getCount() const { return count_; }
uint64 getClock() const { return clock_; }
void clear() { count_ = 0; clock_ = 0; }
private:
uint64 clock_;
int count_;
};
#ifdef XBYAK32
namespace local {
#ifdef _WIN32
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline __declspec(naked) void set_eip_to_ ## x() { \
__asm { mov x, dword ptr [esp] } __asm { ret } \
}
#else
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline void set_eip_to_ ## x() { \
__asm__ volatile("movl (%esp), %" #x); \
}
#endif
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(eax)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ecx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(esi)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edi)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp)
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
} // end of local
template<class Gen>
struct EnableSetEip : public Gen {
EnableSetEip(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
: Gen(maxSize, userPtr)
{
}
/**
get eip to out register
@note out is not esp
*/
void setEipTo(const Xbyak::Reg32& out)
{
#if 0
Gen::call(Gen::getCurr() + 5);
Gen::pop(out);
#else
int idx = out.getIdx();
switch (idx) {
case Xbyak::Operand::EAX:
Gen::call((void*)local::set_eip_to_eax);
break;
case Xbyak::Operand::ECX:
Gen::call((void*)local::set_eip_to_ecx);
break;
case Xbyak::Operand::EDX:
Gen::call((void*)local::set_eip_to_edx);
break;
case Xbyak::Operand::EBX:
Gen::call((void*)local::set_eip_to_ebx);
break;
case Xbyak::Operand::ESI:
Gen::call((void*)local::set_eip_to_esi);
break;
case Xbyak::Operand::EDI:
Gen::call((void*)local::set_eip_to_edi);
break;
case Xbyak::Operand::EBP:
Gen::call((void*)local::set_eip_to_ebp);
break;
default:
assert(0);
}
#endif
}
};
#endif
} } // end of util
#endif