diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h index 939ffee..63efccd 100644 --- a/src/xbyak/xbyak.h +++ b/src/xbyak/xbyak.h @@ -9,14 +9,9 @@ @note modified new BSD license http://opensource.org/licenses/BSD-3-Clause */ -#if !defined(XBYAK_USE_OP_NAMES) && !defined(XBYAK_NO_OP_NAMES) +#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not #define XBYAK_NO_OP_NAMES #endif -#ifndef XBYAK_NO_OP_NAMES - #if not +0 // trick to detect whether 'not' is operator or not - #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()." - #endif -#endif #include // for debug print #include @@ -29,7 +24,9 @@ // #define XBYAK_DISABLE_AVX512 -//#define XBYAK_USE_MMAP_ALLOCATOR +#if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR) + #define XBYAK_USE_MMAP_ALLOCATOR +#endif #if !defined(__GNUC__) || defined(__MINGW32__) #undef XBYAK_USE_MMAP_ALLOCATOR #endif @@ -83,9 +80,12 @@ #include #include #endif -#if defined(__APPLE__) && defined(MAP_JIT) +#if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT) #define XBYAK_USE_MAP_JIT #include + #ifndef MAP_JIT + #define MAP_JIT 0x800 + #endif #endif #if !defined(_MSC_VER) || (_MSC_VER >= 1600) #include @@ -120,7 +120,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5850 /* 0xABCD = A.BC(D) */ + VERSION = 0x5912 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -469,9 +469,8 @@ public: } // err if MMX/FPU/OPMASK/BNDREG void setBit(int bit); - void setOpmaskIdx(int idx, bool ignore_idx0 = false) + void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true) { - if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID); if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET); mask_ = idx; } @@ -555,7 +554,7 @@ inline void Operand::setBit(int bit) { if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR; if (isBit(bit)) return; - if (is(MEM)) { + if (is(MEM | OPMASK)) { bit_ = bit; return; } @@ -1667,6 +1666,7 @@ private: bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx); bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false); if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET); + if (aaa == 0) z = 0; // clear T_z if mask is not set db(0x62); db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3)); db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3)); @@ -1780,6 +1780,7 @@ private: db(longCode); dd(disp - longJmpSize); } } + bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); } template void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) { @@ -1789,7 +1790,7 @@ private: makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); } else { int jmpSize = 0; - if (type == T_NEAR) { + if (isNEAR(type)) { jmpSize = 4; if (longPref) db(longPref); db(longCode); dd(0); @@ -1804,7 +1805,7 @@ private: void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0) { if (isAutoGrow()) { - if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW); + if (!isNEAR(type)) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW); if (size_ + 16 >= maxSize_) growMemory(); if (longPref) db(longPref); db(longCode); @@ -2258,7 +2259,7 @@ public: const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; - const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; + const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; const Reg16 ax, cx, dx, bx, sp, bp, si, di; const Reg8 al, cl, dl, bl, ah, ch, dh, bh; @@ -2298,6 +2299,9 @@ public: #ifndef XBYAK_DISABLE_SEGMENT const Segment es, cs, ss, ds, fs, gs; #endif +private: + bool isDefaultJmpNEAR_; +public: void L(const std::string& label) { labelMgr_.defineSlabel(label); } void L(Label& label) { labelMgr_.defineClabel(label); } Label L() { Label label; L(label); return label; } @@ -2317,6 +2321,8 @@ public: void putL(std::string label) { putL_inner(label); } void putL(const Label& label) { putL_inner(label); } + // set default type of `jmp` of undefined label to T_NEAR + void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; } void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); } void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } @@ -2575,6 +2581,7 @@ public: #ifndef XBYAK_DISABLE_SEGMENT , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs) #endif + , isDefaultJmpNEAR_(false) { labelMgr_.set(this); } diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h index 457a641..2de6ec2 100644 --- a/src/xbyak/xbyak_mnemonic.h +++ b/src/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.85"; } +const char *getVersionString() const { return "5.912"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -1671,17 +1671,17 @@ void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); } void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); } void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); } -void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } +void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); } void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); } void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); } -void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } +void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); } void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); } void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); } -void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } +void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); } -void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } +void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); } void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); } void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); } diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h index eefd152..4f79d8f 100644 --- a/src/xbyak/xbyak_util.h +++ b/src/xbyak/xbyak_util.h @@ -1,5 +1,6 @@ #ifndef XBYAK_XBYAK_UTIL_H_ #define XBYAK_XBYAK_UTIL_H_ +#include /** utility class and functions for Xbyak @@ -146,6 +147,11 @@ class Cpu { numCores_[level - 1] = extractBit(data[1], 0, 15); } } + /* + Fallback values in case a hypervisor has 0xB leaf zeroed-out. + */ + numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]); + numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]); } else { /* Failed to deremine num of cores without x2APIC support. @@ -754,7 +760,7 @@ public: }; Profiler() : mode_(None) - , suffix_(0) + , suffix_("") , startAddr_(0) #ifdef XBYAK_USE_PERF , fp_(0) @@ -828,7 +834,16 @@ public: #ifdef XBYAK_USE_PERF if (mode_ == Perf) { if (fp_ == 0) return; - fprintf(fp_, "%llx %zx %s%s\n", (long long)startAddr, funcSize, funcName, suffix_); + fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_); + /* + perf does not recognize the function name which is less than 3, + so append '_' at the end of the name if necessary + */ + size_t n = strlen(funcName) + strlen(suffix_); + for (size_t i = n; i < 3; i++) { + fprintf(fp_, "_"); + } + fprintf(fp_, "\n"); fflush(fp_); } #endif