Merge branch 'dev'

update-fork
MITSUNARI Shigeo 5 years ago
commit e769bd6cea
  1. 7
      Makefile
  2. 144
      include/cybozu/atomic.hpp
  3. 778
      include/cybozu/socket.hpp
  4. 4
      include/mcl/ec.hpp
  5. 78
      src/fp_generator.hpp
  6. 3
      src/xbyak/xbyak.h
  7. 7
      src/xbyak/xbyak_mnemonic.h
  8. 143
      src/xbyak/xbyak_util.h

@ -28,6 +28,13 @@ endif
ifeq ($(MCL_USE_XBYAK),0)
CFLAGS+=-DMCL_DONT_USE_XBYAK
endif
ifeq ($(MCL_USE_PROF),1)
CFLAGS+=-DMCL_USE_PROF
endif
ifeq ($(MCL_USE_PROF),2)
CFLAGS+=-DMCL_USE_PROF -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/
LDFLAGS+=-L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
endif
##################################################################
MCL_LIB=$(LIB_DIR)/libmcl.a
MCL_SNAME=mcl

@ -0,0 +1,144 @@
#pragma once
/**
@file
@brief atomic operation
@author MITSUNARI Shigeo(@herumi)
@author MITSUNARI Shigeo
*/
#include <cybozu/inttype.hpp>
#ifdef _WIN32
#include <winsock2.h>
#include <windows.h>
#include <intrin.h>
#else
#include <emmintrin.h>
#endif
namespace cybozu {
namespace atomic_local {
template<size_t S>
struct Tag {};
template<>
struct Tag<4> {
template<class T>
static inline T AtomicAddSub(T *p, T y)
{
#ifdef _WIN32
return (T)_InterlockedExchangeAdd((long*)p, (long)y);
#else
return static_cast<T>(__sync_fetch_and_add(p, y));
#endif
}
template<class T>
static inline T AtomicCompareExchangeSub(T *p, T newValue, T oldValue)
{
#ifdef _WIN32
return (T)_InterlockedCompareExchange((long*)p, (long)newValue, (long)oldValue);
#else
return static_cast<T>(__sync_val_compare_and_swap(p, oldValue, newValue));
#endif
}
template<class T>
static inline T AtomicExchangeSub(T *p, T newValue)
{
#ifdef _WIN32
return (T)_InterlockedExchange((long*)p, (long)newValue);
#else
return static_cast<T>(__sync_lock_test_and_set(p, newValue));
#endif
}
};
template<>
struct Tag<8> {
#if (CYBOZU_OS_BIT == 64)
template<class T>
static inline T AtomicAddSub(T *p, T y)
{
#ifdef _WIN32
return (T)_InterlockedExchangeAdd64((int64_t*)p, (int64_t)y);
#else
return static_cast<T>(__sync_fetch_and_add(p, y));
#endif
}
#endif
template<class T>
static inline T AtomicCompareExchangeSub(T *p, T newValue, T oldValue)
{
#ifdef _WIN32
return (T)_InterlockedCompareExchange64((int64_t*)p, (int64_t)newValue, (int64_t)oldValue);
#else
return static_cast<T>(__sync_val_compare_and_swap(p, oldValue, newValue));
#endif
}
#if (CYBOZU_OS_BIT == 64)
template<class T>
static inline T AtomicExchangeSub(T *p, T newValue)
{
#ifdef _WIN32
return (T)_InterlockedExchange64((int64_t*)p, (int64_t)newValue);
#else
return static_cast<T>(__sync_lock_test_and_set(p, newValue));
#endif
}
#endif
};
} // atomic_local
/**
atomic operation
see http://gcc.gnu.org/onlinedocs/gcc-4.4.0/gcc/Atomic-Builtins.html
http://msdn.microsoft.com/en-us/library/ms683504(VS.85).aspx
*/
/**
tmp = *p;
*p += y;
return tmp;
*/
template<class T>
T AtomicAdd(T *p, T y)
{
return atomic_local::Tag<sizeof(T)>::AtomicAddSub(p, y);
}
/**
tmp = *p;
if (*p == oldValue) *p = newValue;
return tmp;
*/
template<class T>
T AtomicCompareExchange(T *p, T newValue, T oldValue)
{
return atomic_local::Tag<sizeof(T)>::AtomicCompareExchangeSub(p, newValue, oldValue);
}
/**
tmp = *p;
*p = newValue;
return tmp;
*/
template<class T>
T AtomicExchange(T *p, T newValue)
{
return atomic_local::Tag<sizeof(T)>::AtomicExchangeSub(p, newValue);
}
inline void mfence()
{
#ifdef _MSC_VER
MemoryBarrier();
#else
_mm_mfence();
#endif
}
} // cybozu

@ -0,0 +1,778 @@
#pragma once
/**
@file
@brief tiny socket class
@author MITSUNARI Shigeo(@herumi)
@author MITSUNARI Shigeo
*/
#include <errno.h>
#include <assert.h>
#include <stdio.h>
#ifdef _WIN32
#include <winsock2.h>
#include <ws2tcpip.h> // for socklen_t
#pragma comment(lib, "ws2_32.lib")
#pragma comment(lib, "iphlpapi.lib")
#pragma warning(push)
#pragma warning(disable : 4127) // constant condition
#else
#include <unistd.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <memory.h>
#include <signal.h>
#endif
#ifndef NDEBUG
#include <stdio.h>
#endif
#include <cybozu/atomic.hpp>
#include <cybozu/exception.hpp>
#include <cybozu/itoa.hpp>
#include <string>
#ifdef __linux__
// #define CYBOZU_SOCKET_USE_EPOLL
#include <sys/epoll.h>
#endif
namespace cybozu {
#ifdef _MSC_VER
struct NetErrorNo : public cybozu::ErrorNo {
NetErrorNo(NativeErrorNo err)
: cybozu::ErrorNo(err)
{
}
NetErrorNo()
: cybozu::ErrorNo(WSAGetLastError())
{
}
};
#else
typedef cybozu::ErrorNo NetErrorNo;
#endif
#ifdef CYBOZU_SOCKET_USE_EPOLL
namespace experimental {
struct EpollEvent {
struct epoll_event ev_;
EpollEvent()
{
memset(&ev_, 0, sizeof(ev_));
}
void set(int fd, uint32_t events = EPOLLIN)
{
ev_.events = events;
ev_.data.fd = fd;
}
int getFd() const { return ev_.data.fd; }
};
class Epoll {
int efd_;
bool verify(const char *msg, int ret, int *err) const {
if (ret >= 0) return true;
if (err == 0) throw cybozu::Exception(msg) << cybozu::NetErrorNo();
*err = errno;
return false;
}
public:
Epoll() : efd_(-1) {}
bool init(int *err = 0)
{
efd_ = ::epoll_create1(0);
return verify("Epoll:init", efd_, err);
}
~Epoll()
{
if (efd_ >= 0) ::close(efd_);
}
/*
throw if err == NULL
*/
bool ctrl(int op, int fd, EpollEvent *ev, int *err = 0) {
int ret = ::epoll_ctl(efd_, op, fd, &ev->ev_);
return verify("Epoll:ctrl", ret, err);
}
bool add(int fd, uint32_t events = EPOLLIN, int *err = 0) {
EpollEvent ev;
ev.set(fd, events);
return ctrl(EPOLL_CTL_ADD, fd, &ev, err);
}
bool del(int fd, int *err = 0) {
return ctrl(EPOLL_CTL_DEL, fd, NULL, err);
}
/*
msec : 0 : block
*/
int wait(EpollEvent *ev, int maxEv, int msec = 0)
{
/*
0 : return immediately
-1 : block indefinitely
*/
if (msec == 0) {
msec = -1;
} else if (msec == -1) {
msec = 0;
}
int ret = ::epoll_wait(efd_, &ev->ev_, maxEv, msec);
if (ret == 0) return 0; // timeout
if (ret < 0) return -errno;
return ret;
}
};
struct AutoLock {
Epoll& ep_;
int fd_;
AutoLock(Epoll& ep, int fd, int events = EPOLLIN)
: ep_(ep)
, fd_(fd)
{
ep_.add(fd, events);
}
~AutoLock()
{
int err;
ep_.del(fd_, &err);
}
};
} // cybozu::experimental
#endif
namespace ssl {
class ClientSocket;
};
namespace socket_local {
#ifdef _WIN32
typedef SOCKET SocketHandle;
#else
typedef int SocketHandle;
#endif
struct InitTerm {
/** call once for init */
InitTerm()
{
#ifdef _WIN32
WSADATA data;
int err = ::WSAStartup(MAKEWORD(2, 2), &data);
if (err) {
fprintf(stderr, "WSAStartup failed : %d\n", err);
exit(1);
}
#else
::signal(SIGPIPE, SIG_IGN);
#endif
}
/** call once for term */
~InitTerm()
{
#ifdef _WIN32
::WSACleanup();
#endif
}
void dummyCall() { }
};
template<int dummy = 0>
struct InstanceIsHere { static InitTerm it_; };
template<int dummy>
InitTerm InstanceIsHere<dummy>::it_;
struct DummyCall {
DummyCall() { InstanceIsHere<>::it_.dummyCall(); }
};
} // cybozu::socket_local
class SocketAddr {
union {
// http://www.coins.tsukuba.ac.jp/~syspro/2010/No6_files/sockaddr.html
struct sockaddr sa; /* 16byte */
struct sockaddr_in v4; /* 16byte */
struct sockaddr_in6 v6;
} addr_;
socklen_t addrlen_;
int family_;
friend class Socket;
void verify() // call in only Socket::accept
{
if (addrlen_ == sizeof(addr_.v4) && addr_.sa.sa_family == AF_INET) {
family_ = AF_INET;
return;
}
if (addrlen_ == sizeof(addr_.v6) && addr_.sa.sa_family == AF_INET6) {
family_ = AF_INET6;
return;
}
throw cybozu::Exception("cybozu:SocketAddr:verify") << addrlen_;
}
public:
SocketAddr()
: addrlen_(0)
, family_(0)
{
}
SocketAddr(const std::string& address, uint16_t port, bool forceIpV6 = false)
{
set(address, port, forceIpV6);
}
void set(const std::string& address, uint16_t port, bool forceIpV6 = false)
{
char portStr[16];
CYBOZU_SNPRINTF(portStr, sizeof(portStr), "%d", port);
memset(&addr_, 0, sizeof(addr_));
addrlen_ = 0;
family_ = 0;
struct addrinfo *result = 0;
struct addrinfo hints;
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
hints.ai_flags = AI_NUMERICSERV; // AI_PASSIVE;
const int s = getaddrinfo(address.c_str(), portStr, &hints, &result);
// s == EAI_AGAIN
if (s || forceIpV6) {
hints.ai_family = AF_INET6;
hints.ai_flags |= AI_V4MAPPED;
if (getaddrinfo(address.c_str(), portStr, &hints, &result)) {
goto ERR_EXIT;
}
}
{
bool found = false;
for (const struct addrinfo *p = result; p; p = p->ai_next) {
const int family = p->ai_family;
if (family == hints.ai_family) {
if (p->ai_addrlen > sizeof(addr_)) {
break;
}
memcpy(&addr_, p->ai_addr, p->ai_addrlen);
addrlen_ = (socklen_t)p->ai_addrlen;
family_ = family;
found = true;
break;
}
}
freeaddrinfo(result);
if (found) return;
}
ERR_EXIT:
throw cybozu::Exception("SocketAddr:set") << address << port << cybozu::NetErrorNo();
}
socklen_t getSize() const { return addrlen_; }
int getFamily() const { return family_; }
const struct sockaddr *get() const { return &addr_.sa; }
uint16_t getPort() const {
if (family_ == AF_INET) {
return ntohs(addr_.v4.sin_port);
} else if (family_ == AF_INET6) {
return ntohs(addr_.v6.sin6_port);
}
throw cybozu::Exception("SocketAddr:getPort:bad family") << family_;
}
// compare addr without port
bool hasSameAddr(const SocketAddr& rhs) const
{
const uint8_t *v4 = 0;
const uint8_t *v6 = 0;
if (family_ == AF_INET) {
if (rhs.family_ == AF_INET) return memcmp(&addr_.v4.sin_addr, &rhs.addr_.v4.sin_addr, sizeof(in_addr)) == 0;
if (rhs.family_ != AF_INET6) return false;
v4 = (const uint8_t*)&addr_.v4.sin_addr;
v6 = (const uint8_t*)&rhs.addr_.v6.sin6_addr;
} else if (family_ != AF_INET6) {
return false;
} else {
if (rhs.family_ == AF_INET6) return memcmp(&addr_.v6.sin6_addr, &rhs.addr_.v6.sin6_addr, sizeof(in6_addr)) == 0;
if (rhs.family_ != AF_INET) return false;
v4 = (const uint8_t*)&rhs.addr_.v4.sin_addr;
v6 = (const uint8_t*)&addr_.v6.sin6_addr;
}
// Ipv6-mapped?
const uint8_t header[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
return memcmp(v6, header, 12) == 0 && memcmp(v6 + 12, v4, 4) == 0;
}
std::string toStr() const
{
if (family_ == AF_INET || family_ == AF_INET6) {
char buf[INET6_ADDRSTRLEN];
assert(INET_ADDRSTRLEN <= INET6_ADDRSTRLEN);
const bool isIPv4 = family_ == AF_INET;
const void *pa = isIPv4 ? (const void*)&addr_.v4.sin_addr : (const void*)&addr_.v6.sin6_addr;
// not "const void*" because of vc
const char *p = inet_ntop(family_, const_cast<void*>(pa), buf, sizeof(buf));
if (!p) throw cybozu::Exception("cybozu:SocketAddr:toStr") << cybozu::NetErrorNo();
if (isIPv4) return std::string(p) + ':' + cybozu::itoa(getPort());
return std::string("[") + p + "]:" + cybozu::itoa(getPort());
}
throw cybozu::Exception("cybozu:SocketAddr:toStr:bad family_") << family_;
}
};
/*
socket class
@note ower is moved if copied
*/
class Socket {
friend class cybozu::ssl::ClientSocket;
private:
cybozu::socket_local::SocketHandle sd_;
Socket(const Socket&);
void operator=(const Socket&);
#ifdef WIN32
void setTimeout(int type, int msec)
{
setSocketOption(type, msec);
}
/* return msec */
int getTimeout(int type) const
{
return getSocketOption(type);
}
#else
void setTimeout(int type, int msec)
{
struct timeval t;
t.tv_sec = msec / 1000;
t.tv_usec = (msec % 1000) * 1000;
setSocketOption(type, t);
}
/* return msec */
int getTimeout(int type) const
{
struct timeval t;
getSocketOption(type, &t);
return t.tv_sec * 1000 + t.tv_usec / 1000; /* msec */
}
#endif
void setBlocking(bool isBlocking)
{
#ifdef _WIN32
u_long val = isBlocking ? 0 : 1;
int ret = ::ioctlsocket(sd_, FIONBIO, &val);
#else
int val = isBlocking ? 0 : 1;
int ret = ::ioctl(sd_, FIONBIO, &val);
#endif
if (ret < 0) throw cybozu::Exception("Socket:setBlocking") << cybozu::NetErrorNo() << isBlocking;
}
public:
#ifndef _WIN32
static const int INVALID_SOCKET = -1;
#endif
Socket()
: sd_(INVALID_SOCKET)
{
}
bool isValid() const { return sd_ != INVALID_SOCKET; }
// move
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
Socket(Socket&& rhs)
: sd_(INVALID_SOCKET)
{
sd_ = cybozu::AtomicExchange(&rhs.sd_, sd_);
}
#endif
// close and move
void moveFrom(Socket& rhs)
{
close();
sd_ = cybozu::AtomicExchange(&rhs.sd_, INVALID_SOCKET);
}
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
void operator=(Socket&& rhs)
#else
void operator=(Socket& rhs)
#endif
{
moveFrom(rhs);
}
~Socket()
{
close(cybozu::DontThrow);
}
bool close(bool dontThrow = false)
{
cybozu::socket_local::SocketHandle sd = cybozu::AtomicExchange(&sd_, INVALID_SOCKET);
if (sd == INVALID_SOCKET) return true;
#ifdef _WIN32
// ::shutdown(sd, SD_SEND);
// shutdown is called in closesocket
bool isOK = ::closesocket(sd) == 0;
#else
bool isOK = ::close(sd) == 0;
#endif
if (!dontThrow && !isOK) throw cybozu::Exception("Socket:close") << cybozu::NetErrorNo();
return isOK;
}
/*
how 0 : SHUTRD ; disallow read
1 : SHUT_WR ; disallow write
2 : SHUT_RDWR ; disallow read/write
*/
bool shutdown(int how, bool dontThrow = false)
{
bool isOK = ::shutdown(sd_, how) == 0;
if (!dontThrow && !isOK) throw cybozu::Exception("Socket:waitForClose:shutdown") << cybozu::NetErrorNo();
return isOK;
}
/*
send FIN and wait for remote's close().
this function is used for the following situation.
sock.write()
sock.waitForClose()
sock.close()
*/
void waitForClose()
{
if (sd_ == INVALID_SOCKET) return;
// send FIN and this socket can't write any data.
shutdown(1);
// wait for FIN from the peer.
char buf[1];
ssize_t readSize = readSome(buf, sizeof(buf));
if (readSize != 0) {
throw cybozu::Exception("Socket:waitForClose:readSome:bad size") << readSize;
}
}
/*!
receive data
@param buf [out] receive buffer
@param bufSize [in] receive buffer size(byte)
@note return read size
*/
size_t readSome(void *buf, size_t bufSize)
{
int size = (int)(std::min)((size_t)0x7fffffff, bufSize);
#ifdef _WIN32
int readSize = ::recv(sd_, (char *)buf, size, 0);
#else
RETRY:
ssize_t readSize = ::read(sd_, buf, size);
if (readSize < 0 && errno == EINTR) goto RETRY;
#endif
if (readSize < 0) throw cybozu::Exception("Socket:readSome") << cybozu::NetErrorNo() << bufSize;
return readSize;
}
/*!
receive all data unless timeout
@param buf [out] receive buffer
@param bufSize [in] receive buffer size(byte)
*/
void read(void *buf, size_t bufSize)
{
char *p = (char *)buf;
while (bufSize > 0) {
size_t readSize = readSome(p, bufSize);
if (readSize == 0) throw cybozu::Exception("Socket:read:readSize is zero");
p += readSize;
bufSize -= readSize;
}
}
/*!
write all data
@param buf [out] send buffer
@param bufSize [in] send buffer size(byte)
*/
void write(bool *pb, const void *buf, size_t bufSize)
{
const char *p = (const char *)buf;
while (bufSize > 0) {
int size = (int)(std::min)(size_t(0x7fffffff), bufSize);
#ifdef _WIN32
int writeSize = ::send(sd_, p, size, 0);
#else
int writeSize = ::write(sd_, p, size);
if (writeSize < 0 && errno == EINTR) continue;
#endif
if (writeSize < 0) {
*pb = false;
return;
}
p += writeSize;
bufSize -= writeSize;
}
*pb = true;
}
void write(const void *buf, size_t bufSize)
{
bool b;
write(&b, buf, bufSize);
if (!b) throw cybozu::Exception("Socket:write") << cybozu::NetErrorNo() << bufSize;
}
/**
connect to address:port
@param address [in] address
@param port [in] port
@param msec: 0 : block
*/
void connect(const std::string& address, uint16_t port, int msec = 0)
{
SocketAddr addr;
addr.set(address, port);
connect(addr, msec);
}
/**
connect to resolved socket addr
*/
void connect(const cybozu::SocketAddr& addr, int msec = 0)
{
if (isValid()) throw cybozu::Exception("Socket:connect:already connect");
sd_ = ::socket(addr.getFamily(), SOCK_STREAM, IPPROTO_TCP);
if (!isValid()) {
throw cybozu::Exception("Socket:connect:socket") << cybozu::NetErrorNo();
}
if (msec == 0) {
if (::connect(sd_, addr.get(), addr.getSize()) < 0) {
throw cybozu::Exception("Socket:connect") << cybozu::NetErrorNo() << addr.toStr();
}
} else {
setBlocking(false);
if (::connect(sd_, addr.get(), addr.getSize()) < 0) {
#ifdef _WIN32
bool inProgress = WSAGetLastError() == WSAEWOULDBLOCK;
#else
bool inProgress = errno == EINPROGRESS;
#endif
if (!inProgress) throw cybozu::Exception("Socket:connect:not in progress") << cybozu::NetErrorNo() << addr.toStr();
if (!queryAccept(msec, false)) throw cybozu::Exception("Socket:connect:timeout") << addr.toStr();
int err = getSocketOption(SO_ERROR);
if (err != 0) throw cybozu::Exception("Socket::connect:bad socket") << cybozu::NetErrorNo(err);
}
setBlocking(true);
}
}
static const int allowIPv4 = 1;
static const int allowIPv6 = 2;
/**
init for server
@param port [in] port number
*/
void bind(uint16_t port, int mode = allowIPv4 | allowIPv6)
{
const int family = (mode & allowIPv6) ? AF_INET6 : AF_INET;
sd_ = ::socket(family, SOCK_STREAM, IPPROTO_TCP);
if (!isValid()) {
throw cybozu::Exception("Socket:bind:socket") << cybozu::NetErrorNo();
}
setSocketOption(SO_REUSEADDR, 1);
struct sockaddr_in6 addr6;
struct sockaddr_in addr4;
struct sockaddr *addr;
socklen_t addrLen;
if (mode & allowIPv6) {
setSocketOption(IPV6_V6ONLY, (mode & allowIPv4) ? 0 : 1, IPPROTO_IPV6);
memset(&addr6, 0, sizeof(addr6));
addr6.sin6_family = AF_INET6;
addr6.sin6_port = htons(port);
addr = (struct sockaddr*)&addr6;
addrLen = sizeof(addr6);
} else {
memset(&addr4, 0, sizeof(addr4));
addr4.sin_family = AF_INET;
addr4.sin_port = htons(port);
addr = (struct sockaddr*)&addr4;
addrLen = sizeof(addr4);
}
if (::bind(sd_, addr, addrLen) == 0) {
if (::listen(sd_, SOMAXCONN) == 0) {
return;
}
}
cybozu::NetErrorNo keep;
close(cybozu::DontThrow);
throw cybozu::Exception("Socket:bind") << keep;
}
/**
return positive if accepted
return zero if timeout
return negative(-errno) if error
*/
int queryAcceptNoThrow(int msec = 1000, bool checkWrite = true)
{
if (sd_ < 0) return -EBADF;
#ifdef CYBOZU_SOCKET_USE_EPOLL
int err;
experimental::Epoll ep;
if (!ep.init(&err)) return -err;
uint32_t events = checkWrite ? EPOLLIN : EPOLLOUT;
experimental::AutoLock al(ep, sd_, events);
experimental::EpollEvent ev;
int ret = ep.wait(&ev, 1, msec);
if (ret != 1) return ret;
assert(ev.getFd() == sd_);
return ret;
#else
#ifndef _WIN32
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms739169.aspx
if (sd_ >= FD_SETSIZE) return -EMFILE;
#endif
struct timeval timeout;
timeout.tv_sec = msec / 1000;
timeout.tv_usec = (msec % 1000) * 1000;
fd_set fds;
FD_ZERO(&fds);
FD_SET((unsigned)sd_, &fds);
int fdNum;
if (checkWrite) {
fdNum = ::select((int)sd_ + 1, &fds, 0, 0, &timeout);
} else {
fdNum = ::select((int)sd_ + 1, 0, &fds, 0, &timeout);
}
if (fdNum < 0) return -errno;
return fdNum;
#endif
}
/**
return true if acceptable, otherwise false
return false if one second passed
while (!server.queryAccept()) {
}
client.accept(server);
*/
bool queryAccept(int msec = 1000, bool checkWrite = true)
{
int ret = queryAcceptNoThrow(msec, checkWrite);
if (ret < 0) throw cybozu::Exception("Socket:queryAccept") << cybozu::NetErrorNo(-ret);
return ret > 0;
}
/**
accept for server
*/
void accept(Socket& client, SocketAddr *paddr = 0) const
{
if (paddr) {
struct sockaddr *psa = &paddr->addr_.sa;
paddr->addrlen_ = sizeof(paddr->addr_);
client.sd_ = ::accept(sd_, psa, &paddr->addrlen_);
paddr->verify();
} else {
client.sd_ = ::accept(sd_, 0, 0);
}
if (!client.isValid()) throw cybozu::Exception("Socket:accept") << cybozu::NetErrorNo();
}
template<typename T>
void setSocketOption(int optname, const T& value, int level = SOL_SOCKET)
{
bool isOK = setsockopt(sd_, level, optname, cybozu::cast<const char*>(&value), sizeof(T)) == 0;
if (!isOK) throw cybozu::Exception("Socket:setSocketOption") << cybozu::NetErrorNo();
}
template<typename T>
void getSocketOption(int optname, T* value, int level = SOL_SOCKET) const
{
socklen_t len = (socklen_t)sizeof(T);
bool isOK = getsockopt(sd_, level, optname, cybozu::cast<char*>(value), &len) == 0;
if (!isOK) throw cybozu::Exception("Socket:getSocketOption") << cybozu::NetErrorNo();
}
int getSocketOption(int optname) const
{
int ret;
getSocketOption(optname, &ret);
return ret;
}
/**
setup linger
*/
void setLinger(uint16_t l_onoff, uint16_t l_linger)
{
struct linger linger;
linger.l_onoff = l_onoff;
linger.l_linger = l_linger;
setSocketOption(SO_LINGER, &linger);
}
/**
get receive buffer size
@retval positive buffer size(byte)
@retval -1 error
*/
int getReceiveBufferSize() const
{
return getSocketOption(SO_RCVBUF);
}
/**
set receive buffer size
@param size [in] buffer size(byte)
*/
void setReceiveBufferSize(int size)
{
setSocketOption(SO_RCVBUF, size);
}
/**
get send buffer size
@retval positive buffer size(byte)
@retval -1 error
*/
int getSendBufferSize() const
{
return getSocketOption(SO_SNDBUF);
}
/**
sed send buffer size
@param size [in] buffer size(byte)
*/
void setSendBufferSize(int size)
{
setSocketOption(SO_SNDBUF, size);
}
/**
set send timeout
@param msec [in] msec
*/
void setSendTimeout(int msec)
{
setTimeout(SO_SNDTIMEO, msec);
}
/**
set receive timeout
@param msec [in] msec
*/
void setReceiveTimeout(int msec)
{
setTimeout(SO_RCVTIMEO, msec);
}
/**
get send timeout(msec)
*/
int getSendTimeout() const
{
return getTimeout(SO_SNDTIMEO);
}
/**
get receive timeout(msec)
*/
int getReceiveTimeout() const
{
return getTimeout(SO_RCVTIMEO);
}
};
} // cybozu
#ifdef _WIN32
#pragma warning(pop)
#endif

@ -7,9 +7,7 @@
http://opensource.org/licenses/BSD-3-Clause
*/
#include <stdlib.h>
#include <cybozu/exception.hpp>
#include <mcl/op.hpp>
#include <mcl/util.hpp>
#include <mcl/fp.hpp>
#include <mcl/ecparam.hpp>
//#define MCL_EC_USE_AFFINE

@ -127,71 +127,6 @@ if (rm.isReg()) { \
namespace fp {
struct Profiler {
FILE *fp_;
const char *suf_;
const uint8_t *prev_;
Profiler()
: fp_(0)
, suf_(0)
, prev_(0)
{
}
void init(const char *suf, const uint8_t *prev)
{
#ifdef __linux__
close();
const char *s = getenv("MCL_PERF");
if (s == 0 || strcmp(s, "1") != 0) return;
fprintf(stderr, "use perf suf=%s\n", suf);
suf_ = suf;
const int pid = getpid();
char name[128];
snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
fp_ = fopen(name, "wb");
if (fp_ == 0) throw cybozu::Exception("PerMap") << name;
prev_ = prev;
#else
(void)suf;
(void)prev;
#endif
}
~Profiler()
{
close();
}
void close()
{
#ifdef __linux__
if (fp_ == 0) return;
fclose(fp_);
fp_ = 0;
prev_ = 0;
#endif
}
void set(const uint8_t *p, size_t n, const char *name) const
{
#ifdef __linux__
if (fp_ == 0) return;
fprintf(fp_, "%llx %zx %s%s\n", (long long)p, n, name, suf_);
#else
(void)p;
(void)n;
(void)name;
#endif
}
void set(const char *name, const uint8_t *cur)
{
#ifdef __linux__
set(prev_, cur - prev_, name);
prev_ = cur;
#else
(void)name;
(void)cur;
#endif
}
};
struct FpGenerator : Xbyak::CodeGenerator {
typedef Xbyak::RegExp RegExp;
typedef Xbyak::Reg64 Reg64;
@ -268,7 +203,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
int pn_;
int FpByte_;
bool isFullBit_;
Profiler prof_;
Xbyak::util::Profiler prof_;
/*
@param op [in] ; use op.p, op.N, op.isFullBit
@ -331,9 +266,16 @@ private:
FpByte_ = int(op.maxN * sizeof(uint64_t));
isFullBit_ = op.isFullBit;
// printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_);
#ifdef MCL_USE_PROF
static char suf[] = "_0";
prof_.init(suf, getCurr());
suf[1]++;
const char *s = getenv("MCL_PROF");
if (s && s[0] && s[1] == '\0') {
prof_.init(s[0] - '0');
prof_.setStartAddr(getCurr());
prof_.setNameSuffix(suf);
suf[1]++;
}
#endif
op.fp_addPre = gen_addSubPre(true, pn_);
prof_.set("Fp_addPre", getCurr());

@ -113,7 +113,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5790 /* 0xABCD = A.BC(D) */
VERSION = 0x5802 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -551,6 +551,7 @@ inline void Operand::setBit(int bit)
idx_ = idx;
kind_ = kind;
bit_ = bit;
if (bit >= 128) return; // keep mask_ and rounding_
mask_ = 0;
rounding_ = 0;
return;

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.79"; }
const char *getVersionString() const { return "5.802"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@ -1684,6 +1684,8 @@ void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 |
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
@ -1709,6 +1711,7 @@ void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
@ -1769,6 +1772,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }

@ -54,6 +54,20 @@
#endif
#endif
#ifdef XBYAK_USE_VTUNE
// -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
#include <jitprofiling.h>
#ifdef _MSC_VER
#pragma comment(lib, "libittnotify.lib")
#endif
#ifdef __linux__
#include <dlfcn.h>
#endif
#endif
#ifdef __linux__
#define XBYAK_USE_PERF
#endif
namespace Xbyak { namespace util {
typedef enum {
@ -331,6 +345,8 @@ public:
static const Type tAVX512_VNNI = uint64(1) << 54;
static const Type tAVX512_BITALG = uint64(1) << 55;
static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
static const Type tAVX512_BF16 = uint64(1) << 57;
static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
Cpu()
: type_(NONE)
@ -410,6 +426,12 @@ public:
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
}
// EAX=07H, ECX=1
getCpuidEx(7, 1, data);
if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
}
}
}
@ -722,5 +744,126 @@ private:
};
#endif
class Profiler {
int mode_;
const char *suffix_;
const void *startAddr_;
#ifdef XBYAK_USE_PERF
FILE *fp_;
#endif
public:
enum {
None = 0,
Perf = 1,
VTune = 2
};
Profiler()
: mode_(None)
, suffix_(0)
, startAddr_(0)
#ifdef XBYAK_USE_PERF
, fp_(0)
#endif
{
}
// append suffix to funcName
void setNameSuffix(const char *suffix)
{
suffix_ = suffix;
}
void setStartAddr(const void *startAddr)
{
startAddr_ = startAddr;
}
void init(int mode)
{
mode_ = None;
switch (mode) {
default:
case None:
return;
case Perf:
#ifdef XBYAK_USE_PERF
close();
{
const int pid = getpid();
char name[128];
snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
fp_ = fopen(name, "a+");
if (fp_ == 0) {
fprintf(stderr, "can't open %s\n", name);
return;
}
}
mode_ = Perf;
#endif
return;
case VTune:
#ifdef XBYAK_USE_VTUNE
dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling
if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) {
fprintf(stderr, "VTune profiling is not active\n");
return;
}
mode_ = VTune;
#endif
return;
}
}
~Profiler()
{
close();
}
void close()
{
#ifdef XBYAK_USE_PERF
if (fp_ == 0) return;
fclose(fp_);
fp_ = 0;
#endif
}
void set(const char *funcName, const void *startAddr, size_t funcSize) const
{
if (mode_ == None) return;
#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE)
(void)funcName;
(void)startAddr;
(void)funcSize;
#endif
#ifdef XBYAK_USE_PERF
if (mode_ == Perf) {
if (fp_ == 0) return;
fprintf(fp_, "%llx %zx %s%s\n", (long long)startAddr, funcSize, funcName, suffix_);
fflush(fp_);
}
#endif
#ifdef XBYAK_USE_VTUNE
if (mode_ != VTune) return;
char className[] = "";
char fileName[] = "";
iJIT_Method_Load jmethod = {};
jmethod.method_id = iJIT_GetNewMethodID();
jmethod.class_file_name = className;
jmethod.source_file_name = fileName;
jmethod.method_load_address = const_cast<void*>(startAddr);
jmethod.method_size = funcSize;
jmethod.line_number_size = 0;
char buf[128];
snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_);
jmethod.method_name = buf;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod);
#endif
}
/*
for continuous set
funcSize = endAddr - <previous set endAddr>
*/
void set(const char *funcName, const void *endAddr)
{
set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_);
startAddr_ = endAddr;
}
};
} } // end of util
#endif

Loading…
Cancel
Save