diff --git a/Makefile b/Makefile index 39a415c..364c17e 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,13 @@ endif ifeq ($(MCL_USE_XBYAK),0) CFLAGS+=-DMCL_DONT_USE_XBYAK endif +ifeq ($(MCL_USE_PROF),1) + CFLAGS+=-DMCL_USE_PROF +endif +ifeq ($(MCL_USE_PROF),2) + CFLAGS+=-DMCL_USE_PROF -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ + LDFLAGS+=-L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl +endif ################################################################## MCL_LIB=$(LIB_DIR)/libmcl.a MCL_SNAME=mcl diff --git a/include/cybozu/atomic.hpp b/include/cybozu/atomic.hpp new file mode 100644 index 0000000..4ecade1 --- /dev/null +++ b/include/cybozu/atomic.hpp @@ -0,0 +1,144 @@ +#pragma once +/** + @file + @brief atomic operation + + @author MITSUNARI Shigeo(@herumi) + @author MITSUNARI Shigeo +*/ +#include +#ifdef _WIN32 +#include +#include +#include +#else +#include +#endif + +namespace cybozu { + +namespace atomic_local { + +template +struct Tag {}; + +template<> +struct Tag<4> { + template + static inline T AtomicAddSub(T *p, T y) + { +#ifdef _WIN32 + return (T)_InterlockedExchangeAdd((long*)p, (long)y); +#else + return static_cast(__sync_fetch_and_add(p, y)); +#endif + } + + template + static inline T AtomicCompareExchangeSub(T *p, T newValue, T oldValue) + { +#ifdef _WIN32 + return (T)_InterlockedCompareExchange((long*)p, (long)newValue, (long)oldValue); +#else + return static_cast(__sync_val_compare_and_swap(p, oldValue, newValue)); +#endif + } + + template + static inline T AtomicExchangeSub(T *p, T newValue) + { +#ifdef _WIN32 + return (T)_InterlockedExchange((long*)p, (long)newValue); +#else + return static_cast(__sync_lock_test_and_set(p, newValue)); +#endif + } +}; + +template<> +struct Tag<8> { +#if (CYBOZU_OS_BIT == 64) + template + static inline T AtomicAddSub(T *p, T y) + { +#ifdef _WIN32 + return (T)_InterlockedExchangeAdd64((int64_t*)p, (int64_t)y); +#else + return static_cast(__sync_fetch_and_add(p, y)); +#endif + } +#endif + + template + static inline T AtomicCompareExchangeSub(T *p, T newValue, T oldValue) + { +#ifdef _WIN32 + return (T)_InterlockedCompareExchange64((int64_t*)p, (int64_t)newValue, (int64_t)oldValue); +#else + return static_cast(__sync_val_compare_and_swap(p, oldValue, newValue)); +#endif + } + +#if (CYBOZU_OS_BIT == 64) + template + static inline T AtomicExchangeSub(T *p, T newValue) + { +#ifdef _WIN32 + return (T)_InterlockedExchange64((int64_t*)p, (int64_t)newValue); +#else + return static_cast(__sync_lock_test_and_set(p, newValue)); +#endif + } +#endif +}; + +} // atomic_local + +/** + atomic operation + see http://gcc.gnu.org/onlinedocs/gcc-4.4.0/gcc/Atomic-Builtins.html + http://msdn.microsoft.com/en-us/library/ms683504(VS.85).aspx +*/ +/** + tmp = *p; + *p += y; + return tmp; +*/ +template +T AtomicAdd(T *p, T y) +{ + return atomic_local::Tag::AtomicAddSub(p, y); +} + +/** + tmp = *p; + if (*p == oldValue) *p = newValue; + return tmp; +*/ +template +T AtomicCompareExchange(T *p, T newValue, T oldValue) +{ + return atomic_local::Tag::AtomicCompareExchangeSub(p, newValue, oldValue); +} + +/** + tmp = *p; + *p = newValue; + return tmp; +*/ +template +T AtomicExchange(T *p, T newValue) +{ + return atomic_local::Tag::AtomicExchangeSub(p, newValue); +} + +inline void mfence() +{ +#ifdef _MSC_VER + MemoryBarrier(); +#else + _mm_mfence(); +#endif +} + +} // cybozu diff --git a/include/cybozu/socket.hpp b/include/cybozu/socket.hpp new file mode 100644 index 0000000..b470c94 --- /dev/null +++ b/include/cybozu/socket.hpp @@ -0,0 +1,778 @@ +#pragma once +/** + @file + @brief tiny socket class + + @author MITSUNARI Shigeo(@herumi) + @author MITSUNARI Shigeo +*/ +#include +#include +#include +#ifdef _WIN32 + #include + #include // for socklen_t + #pragma comment(lib, "ws2_32.lib") + #pragma comment(lib, "iphlpapi.lib") + #pragma warning(push) + #pragma warning(disable : 4127) // constant condition +#else + #include + #include + #include + #include + #include + #include + #include + #include +#endif +#ifndef NDEBUG + #include +#endif + +#include +#include +#include +#include + +#ifdef __linux__ +// #define CYBOZU_SOCKET_USE_EPOLL + #include +#endif + +namespace cybozu { + +#ifdef _MSC_VER +struct NetErrorNo : public cybozu::ErrorNo { + NetErrorNo(NativeErrorNo err) + : cybozu::ErrorNo(err) + { + } + NetErrorNo() + : cybozu::ErrorNo(WSAGetLastError()) + { + } +}; +#else +typedef cybozu::ErrorNo NetErrorNo; +#endif + +#ifdef CYBOZU_SOCKET_USE_EPOLL + +namespace experimental { + +struct EpollEvent { + struct epoll_event ev_; + EpollEvent() + { + memset(&ev_, 0, sizeof(ev_)); + } + void set(int fd, uint32_t events = EPOLLIN) + { + ev_.events = events; + ev_.data.fd = fd; + } + int getFd() const { return ev_.data.fd; } +}; + +class Epoll { + int efd_; + bool verify(const char *msg, int ret, int *err) const { + if (ret >= 0) return true; + if (err == 0) throw cybozu::Exception(msg) << cybozu::NetErrorNo(); + *err = errno; + return false; + } +public: + Epoll() : efd_(-1) {} + bool init(int *err = 0) + { + efd_ = ::epoll_create1(0); + return verify("Epoll:init", efd_, err); + } + ~Epoll() + { + if (efd_ >= 0) ::close(efd_); + } + /* + throw if err == NULL + */ + bool ctrl(int op, int fd, EpollEvent *ev, int *err = 0) { + int ret = ::epoll_ctl(efd_, op, fd, &ev->ev_); + return verify("Epoll:ctrl", ret, err); + } + bool add(int fd, uint32_t events = EPOLLIN, int *err = 0) { + EpollEvent ev; + ev.set(fd, events); + return ctrl(EPOLL_CTL_ADD, fd, &ev, err); + } + bool del(int fd, int *err = 0) { + return ctrl(EPOLL_CTL_DEL, fd, NULL, err); + } + /* + msec : 0 : block + */ + int wait(EpollEvent *ev, int maxEv, int msec = 0) + { + /* + 0 : return immediately + -1 : block indefinitely + */ + if (msec == 0) { + msec = -1; + } else if (msec == -1) { + msec = 0; + } + int ret = ::epoll_wait(efd_, &ev->ev_, maxEv, msec); + if (ret == 0) return 0; // timeout + if (ret < 0) return -errno; + return ret; + } +}; + +struct AutoLock { + Epoll& ep_; + int fd_; + AutoLock(Epoll& ep, int fd, int events = EPOLLIN) + : ep_(ep) + , fd_(fd) + { + ep_.add(fd, events); + } + ~AutoLock() + { + int err; + ep_.del(fd_, &err); + } +}; + +} // cybozu::experimental +#endif + +namespace ssl { +class ClientSocket; +}; + +namespace socket_local { + +#ifdef _WIN32 + typedef SOCKET SocketHandle; +#else + typedef int SocketHandle; +#endif + +struct InitTerm { + /** call once for init */ + InitTerm() + { +#ifdef _WIN32 + WSADATA data; + int err = ::WSAStartup(MAKEWORD(2, 2), &data); + if (err) { + fprintf(stderr, "WSAStartup failed : %d\n", err); + exit(1); + } +#else + ::signal(SIGPIPE, SIG_IGN); +#endif + } + /** call once for term */ + ~InitTerm() + { +#ifdef _WIN32 + ::WSACleanup(); +#endif + } + void dummyCall() { } +}; + +template +struct InstanceIsHere { static InitTerm it_; }; + +template +InitTerm InstanceIsHere::it_; + +struct DummyCall { + DummyCall() { InstanceIsHere<>::it_.dummyCall(); } +}; + +} // cybozu::socket_local + +class SocketAddr { + union { + // http://www.coins.tsukuba.ac.jp/~syspro/2010/No6_files/sockaddr.html + struct sockaddr sa; /* 16byte */ + struct sockaddr_in v4; /* 16byte */ + struct sockaddr_in6 v6; + } addr_; + socklen_t addrlen_; + int family_; + friend class Socket; + void verify() // call in only Socket::accept + { + if (addrlen_ == sizeof(addr_.v4) && addr_.sa.sa_family == AF_INET) { + family_ = AF_INET; + return; + } + if (addrlen_ == sizeof(addr_.v6) && addr_.sa.sa_family == AF_INET6) { + family_ = AF_INET6; + return; + } + throw cybozu::Exception("cybozu:SocketAddr:verify") << addrlen_; + } +public: + SocketAddr() + : addrlen_(0) + , family_(0) + { + } + SocketAddr(const std::string& address, uint16_t port, bool forceIpV6 = false) + { + set(address, port, forceIpV6); + } + void set(const std::string& address, uint16_t port, bool forceIpV6 = false) + { + char portStr[16]; + CYBOZU_SNPRINTF(portStr, sizeof(portStr), "%d", port); + memset(&addr_, 0, sizeof(addr_)); + addrlen_ = 0; + family_ = 0; + + struct addrinfo *result = 0; + struct addrinfo hints; + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_flags = AI_NUMERICSERV; // AI_PASSIVE; + const int s = getaddrinfo(address.c_str(), portStr, &hints, &result); + // s == EAI_AGAIN + if (s || forceIpV6) { + hints.ai_family = AF_INET6; + hints.ai_flags |= AI_V4MAPPED; + if (getaddrinfo(address.c_str(), portStr, &hints, &result)) { + goto ERR_EXIT; + } + } + { + bool found = false; + for (const struct addrinfo *p = result; p; p = p->ai_next) { + const int family = p->ai_family; + if (family == hints.ai_family) { + if (p->ai_addrlen > sizeof(addr_)) { + break; + } + memcpy(&addr_, p->ai_addr, p->ai_addrlen); + addrlen_ = (socklen_t)p->ai_addrlen; + family_ = family; + found = true; + break; + } + } + freeaddrinfo(result); + if (found) return; + } + ERR_EXIT: + throw cybozu::Exception("SocketAddr:set") << address << port << cybozu::NetErrorNo(); + } + socklen_t getSize() const { return addrlen_; } + int getFamily() const { return family_; } + const struct sockaddr *get() const { return &addr_.sa; } + uint16_t getPort() const { + if (family_ == AF_INET) { + return ntohs(addr_.v4.sin_port); + } else if (family_ == AF_INET6) { + return ntohs(addr_.v6.sin6_port); + } + throw cybozu::Exception("SocketAddr:getPort:bad family") << family_; + } + // compare addr without port + bool hasSameAddr(const SocketAddr& rhs) const + { + const uint8_t *v4 = 0; + const uint8_t *v6 = 0; + if (family_ == AF_INET) { + if (rhs.family_ == AF_INET) return memcmp(&addr_.v4.sin_addr, &rhs.addr_.v4.sin_addr, sizeof(in_addr)) == 0; + if (rhs.family_ != AF_INET6) return false; + v4 = (const uint8_t*)&addr_.v4.sin_addr; + v6 = (const uint8_t*)&rhs.addr_.v6.sin6_addr; + } else if (family_ != AF_INET6) { + return false; + } else { + if (rhs.family_ == AF_INET6) return memcmp(&addr_.v6.sin6_addr, &rhs.addr_.v6.sin6_addr, sizeof(in6_addr)) == 0; + if (rhs.family_ != AF_INET) return false; + v4 = (const uint8_t*)&rhs.addr_.v4.sin_addr; + v6 = (const uint8_t*)&addr_.v6.sin6_addr; + } + // Ipv6-mapped? + const uint8_t header[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff }; + return memcmp(v6, header, 12) == 0 && memcmp(v6 + 12, v4, 4) == 0; + } + std::string toStr() const + { + if (family_ == AF_INET || family_ == AF_INET6) { + char buf[INET6_ADDRSTRLEN]; + assert(INET_ADDRSTRLEN <= INET6_ADDRSTRLEN); + const bool isIPv4 = family_ == AF_INET; + const void *pa = isIPv4 ? (const void*)&addr_.v4.sin_addr : (const void*)&addr_.v6.sin6_addr; + // not "const void*" because of vc + const char *p = inet_ntop(family_, const_cast(pa), buf, sizeof(buf)); + if (!p) throw cybozu::Exception("cybozu:SocketAddr:toStr") << cybozu::NetErrorNo(); + if (isIPv4) return std::string(p) + ':' + cybozu::itoa(getPort()); + return std::string("[") + p + "]:" + cybozu::itoa(getPort()); + } + throw cybozu::Exception("cybozu:SocketAddr:toStr:bad family_") << family_; + } +}; +/* + socket class + @note ower is moved if copied +*/ +class Socket { + friend class cybozu::ssl::ClientSocket; +private: + cybozu::socket_local::SocketHandle sd_; + Socket(const Socket&); + void operator=(const Socket&); +#ifdef WIN32 + void setTimeout(int type, int msec) + { + setSocketOption(type, msec); + } + /* return msec */ + int getTimeout(int type) const + { + return getSocketOption(type); + } +#else + void setTimeout(int type, int msec) + { + struct timeval t; + t.tv_sec = msec / 1000; + t.tv_usec = (msec % 1000) * 1000; + setSocketOption(type, t); + } + /* return msec */ + int getTimeout(int type) const + { + struct timeval t; + getSocketOption(type, &t); + return t.tv_sec * 1000 + t.tv_usec / 1000; /* msec */ + } +#endif + void setBlocking(bool isBlocking) + { +#ifdef _WIN32 + u_long val = isBlocking ? 0 : 1; + int ret = ::ioctlsocket(sd_, FIONBIO, &val); +#else + int val = isBlocking ? 0 : 1; + int ret = ::ioctl(sd_, FIONBIO, &val); +#endif + if (ret < 0) throw cybozu::Exception("Socket:setBlocking") << cybozu::NetErrorNo() << isBlocking; + } +public: +#ifndef _WIN32 + static const int INVALID_SOCKET = -1; +#endif + Socket() + : sd_(INVALID_SOCKET) + { + } + + bool isValid() const { return sd_ != INVALID_SOCKET; } + + // move +#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11 + Socket(Socket&& rhs) + : sd_(INVALID_SOCKET) + { + sd_ = cybozu::AtomicExchange(&rhs.sd_, sd_); + } +#endif + // close and move + void moveFrom(Socket& rhs) + { + close(); + sd_ = cybozu::AtomicExchange(&rhs.sd_, INVALID_SOCKET); + } +#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11 + void operator=(Socket&& rhs) +#else + void operator=(Socket& rhs) +#endif + { + moveFrom(rhs); + } + + ~Socket() + { + close(cybozu::DontThrow); + } + + bool close(bool dontThrow = false) + { + cybozu::socket_local::SocketHandle sd = cybozu::AtomicExchange(&sd_, INVALID_SOCKET); + if (sd == INVALID_SOCKET) return true; +#ifdef _WIN32 + // ::shutdown(sd, SD_SEND); + // shutdown is called in closesocket + bool isOK = ::closesocket(sd) == 0; +#else + bool isOK = ::close(sd) == 0; +#endif + if (!dontThrow && !isOK) throw cybozu::Exception("Socket:close") << cybozu::NetErrorNo(); + return isOK; + } + /* + how 0 : SHUTRD ; disallow read + 1 : SHUT_WR ; disallow write + 2 : SHUT_RDWR ; disallow read/write + */ + bool shutdown(int how, bool dontThrow = false) + { + bool isOK = ::shutdown(sd_, how) == 0; + if (!dontThrow && !isOK) throw cybozu::Exception("Socket:waitForClose:shutdown") << cybozu::NetErrorNo(); + return isOK; + } + /* + send FIN and wait for remote's close(). + this function is used for the following situation. + sock.write() + sock.waitForClose() + sock.close() + */ + void waitForClose() + { + if (sd_ == INVALID_SOCKET) return; + // send FIN and this socket can't write any data. + shutdown(1); + // wait for FIN from the peer. + char buf[1]; + ssize_t readSize = readSome(buf, sizeof(buf)); + if (readSize != 0) { + throw cybozu::Exception("Socket:waitForClose:readSome:bad size") << readSize; + } + } + + /*! + receive data + @param buf [out] receive buffer + @param bufSize [in] receive buffer size(byte) + @note return read size + */ + size_t readSome(void *buf, size_t bufSize) + { + int size = (int)(std::min)((size_t)0x7fffffff, bufSize); +#ifdef _WIN32 + int readSize = ::recv(sd_, (char *)buf, size, 0); +#else + RETRY: + ssize_t readSize = ::read(sd_, buf, size); + if (readSize < 0 && errno == EINTR) goto RETRY; +#endif + if (readSize < 0) throw cybozu::Exception("Socket:readSome") << cybozu::NetErrorNo() << bufSize; + return readSize; + } + + /*! + receive all data unless timeout + @param buf [out] receive buffer + @param bufSize [in] receive buffer size(byte) + */ + void read(void *buf, size_t bufSize) + { + char *p = (char *)buf; + while (bufSize > 0) { + size_t readSize = readSome(p, bufSize); + if (readSize == 0) throw cybozu::Exception("Socket:read:readSize is zero"); + p += readSize; + bufSize -= readSize; + } + } + /*! + write all data + @param buf [out] send buffer + @param bufSize [in] send buffer size(byte) + */ + void write(bool *pb, const void *buf, size_t bufSize) + { + const char *p = (const char *)buf; + while (bufSize > 0) { + int size = (int)(std::min)(size_t(0x7fffffff), bufSize); +#ifdef _WIN32 + int writeSize = ::send(sd_, p, size, 0); +#else + int writeSize = ::write(sd_, p, size); + if (writeSize < 0 && errno == EINTR) continue; +#endif + if (writeSize < 0) { + *pb = false; + return; + } + p += writeSize; + bufSize -= writeSize; + } + *pb = true; + } + void write(const void *buf, size_t bufSize) + { + bool b; + write(&b, buf, bufSize); + if (!b) throw cybozu::Exception("Socket:write") << cybozu::NetErrorNo() << bufSize; + } + /** + connect to address:port + @param address [in] address + @param port [in] port + @param msec: 0 : block + */ + void connect(const std::string& address, uint16_t port, int msec = 0) + { + SocketAddr addr; + addr.set(address, port); + connect(addr, msec); + } + /** + connect to resolved socket addr + */ + void connect(const cybozu::SocketAddr& addr, int msec = 0) + { + if (isValid()) throw cybozu::Exception("Socket:connect:already connect"); + sd_ = ::socket(addr.getFamily(), SOCK_STREAM, IPPROTO_TCP); + if (!isValid()) { + throw cybozu::Exception("Socket:connect:socket") << cybozu::NetErrorNo(); + } + if (msec == 0) { + if (::connect(sd_, addr.get(), addr.getSize()) < 0) { + throw cybozu::Exception("Socket:connect") << cybozu::NetErrorNo() << addr.toStr(); + } + } else { + setBlocking(false); + if (::connect(sd_, addr.get(), addr.getSize()) < 0) { +#ifdef _WIN32 + bool inProgress = WSAGetLastError() == WSAEWOULDBLOCK; +#else + bool inProgress = errno == EINPROGRESS; +#endif + if (!inProgress) throw cybozu::Exception("Socket:connect:not in progress") << cybozu::NetErrorNo() << addr.toStr(); + if (!queryAccept(msec, false)) throw cybozu::Exception("Socket:connect:timeout") << addr.toStr(); + int err = getSocketOption(SO_ERROR); + if (err != 0) throw cybozu::Exception("Socket::connect:bad socket") << cybozu::NetErrorNo(err); + } + setBlocking(true); + } + } + + static const int allowIPv4 = 1; + static const int allowIPv6 = 2; + /** + init for server + @param port [in] port number + */ + void bind(uint16_t port, int mode = allowIPv4 | allowIPv6) + { + const int family = (mode & allowIPv6) ? AF_INET6 : AF_INET; + sd_ = ::socket(family, SOCK_STREAM, IPPROTO_TCP); + if (!isValid()) { + throw cybozu::Exception("Socket:bind:socket") << cybozu::NetErrorNo(); + } + setSocketOption(SO_REUSEADDR, 1); + struct sockaddr_in6 addr6; + struct sockaddr_in addr4; + struct sockaddr *addr; + socklen_t addrLen; + if (mode & allowIPv6) { + setSocketOption(IPV6_V6ONLY, (mode & allowIPv4) ? 0 : 1, IPPROTO_IPV6); + memset(&addr6, 0, sizeof(addr6)); + addr6.sin6_family = AF_INET6; + addr6.sin6_port = htons(port); + addr = (struct sockaddr*)&addr6; + addrLen = sizeof(addr6); + } else { + memset(&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_port = htons(port); + addr = (struct sockaddr*)&addr4; + addrLen = sizeof(addr4); + } + if (::bind(sd_, addr, addrLen) == 0) { + if (::listen(sd_, SOMAXCONN) == 0) { + return; + } + } + cybozu::NetErrorNo keep; + close(cybozu::DontThrow); + throw cybozu::Exception("Socket:bind") << keep; + } + + /** + return positive if accepted + return zero if timeout + return negative(-errno) if error + */ + int queryAcceptNoThrow(int msec = 1000, bool checkWrite = true) + { + if (sd_ < 0) return -EBADF; +#ifdef CYBOZU_SOCKET_USE_EPOLL + int err; + experimental::Epoll ep; + if (!ep.init(&err)) return -err; + uint32_t events = checkWrite ? EPOLLIN : EPOLLOUT; + experimental::AutoLock al(ep, sd_, events); + experimental::EpollEvent ev; + int ret = ep.wait(&ev, 1, msec); + if (ret != 1) return ret; + assert(ev.getFd() == sd_); + return ret; +#else +#ifndef _WIN32 + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms739169.aspx + if (sd_ >= FD_SETSIZE) return -EMFILE; +#endif + struct timeval timeout; + timeout.tv_sec = msec / 1000; + timeout.tv_usec = (msec % 1000) * 1000; + fd_set fds; + FD_ZERO(&fds); + FD_SET((unsigned)sd_, &fds); + int fdNum; + if (checkWrite) { + fdNum = ::select((int)sd_ + 1, &fds, 0, 0, &timeout); + } else { + fdNum = ::select((int)sd_ + 1, 0, &fds, 0, &timeout); + } + if (fdNum < 0) return -errno; + return fdNum; +#endif + } + /** + return true if acceptable, otherwise false + return false if one second passed + while (!server.queryAccept()) { + } + client.accept(server); + */ + bool queryAccept(int msec = 1000, bool checkWrite = true) + { + int ret = queryAcceptNoThrow(msec, checkWrite); + if (ret < 0) throw cybozu::Exception("Socket:queryAccept") << cybozu::NetErrorNo(-ret); + return ret > 0; + } + + /** + accept for server + */ + void accept(Socket& client, SocketAddr *paddr = 0) const + { + if (paddr) { + struct sockaddr *psa = &paddr->addr_.sa; + paddr->addrlen_ = sizeof(paddr->addr_); + client.sd_ = ::accept(sd_, psa, &paddr->addrlen_); + paddr->verify(); + } else { + client.sd_ = ::accept(sd_, 0, 0); + } + if (!client.isValid()) throw cybozu::Exception("Socket:accept") << cybozu::NetErrorNo(); + } + + template + void setSocketOption(int optname, const T& value, int level = SOL_SOCKET) + { + bool isOK = setsockopt(sd_, level, optname, cybozu::cast(&value), sizeof(T)) == 0; + if (!isOK) throw cybozu::Exception("Socket:setSocketOption") << cybozu::NetErrorNo(); + } + template + void getSocketOption(int optname, T* value, int level = SOL_SOCKET) const + { + socklen_t len = (socklen_t)sizeof(T); + bool isOK = getsockopt(sd_, level, optname, cybozu::cast(value), &len) == 0; + if (!isOK) throw cybozu::Exception("Socket:getSocketOption") << cybozu::NetErrorNo(); + } + int getSocketOption(int optname) const + { + int ret; + getSocketOption(optname, &ret); + return ret; + } + /** + setup linger + */ + void setLinger(uint16_t l_onoff, uint16_t l_linger) + { + struct linger linger; + linger.l_onoff = l_onoff; + linger.l_linger = l_linger; + setSocketOption(SO_LINGER, &linger); + } + /** + get receive buffer size + @retval positive buffer size(byte) + @retval -1 error + */ + int getReceiveBufferSize() const + { + return getSocketOption(SO_RCVBUF); + } + /** + set receive buffer size + @param size [in] buffer size(byte) + */ + void setReceiveBufferSize(int size) + { + setSocketOption(SO_RCVBUF, size); + } + /** + get send buffer size + @retval positive buffer size(byte) + @retval -1 error + */ + int getSendBufferSize() const + { + return getSocketOption(SO_SNDBUF); + } + /** + sed send buffer size + @param size [in] buffer size(byte) + */ + void setSendBufferSize(int size) + { + setSocketOption(SO_SNDBUF, size); + } + /** + set send timeout + @param msec [in] msec + */ + void setSendTimeout(int msec) + { + setTimeout(SO_SNDTIMEO, msec); + } + /** + set receive timeout + @param msec [in] msec + */ + void setReceiveTimeout(int msec) + { + setTimeout(SO_RCVTIMEO, msec); + } + /** + get send timeout(msec) + */ + int getSendTimeout() const + { + return getTimeout(SO_SNDTIMEO); + } + /** + get receive timeout(msec) + */ + int getReceiveTimeout() const + { + return getTimeout(SO_RCVTIMEO); + } +}; + +} // cybozu + +#ifdef _WIN32 + #pragma warning(pop) +#endif diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index 3686239..1ab508a 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -7,9 +7,7 @@ http://opensource.org/licenses/BSD-3-Clause */ #include -#include -#include -#include +#include #include //#define MCL_EC_USE_AFFINE diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 97bb886..6185bb0 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -127,71 +127,6 @@ if (rm.isReg()) { \ namespace fp { -struct Profiler { - FILE *fp_; - const char *suf_; - const uint8_t *prev_; - Profiler() - : fp_(0) - , suf_(0) - , prev_(0) - { - } - void init(const char *suf, const uint8_t *prev) - { -#ifdef __linux__ - close(); - const char *s = getenv("MCL_PERF"); - if (s == 0 || strcmp(s, "1") != 0) return; - fprintf(stderr, "use perf suf=%s\n", suf); - suf_ = suf; - const int pid = getpid(); - char name[128]; - snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid); - fp_ = fopen(name, "wb"); - if (fp_ == 0) throw cybozu::Exception("PerMap") << name; - prev_ = prev; -#else - (void)suf; - (void)prev; -#endif - } - ~Profiler() - { - close(); - } - void close() - { -#ifdef __linux__ - if (fp_ == 0) return; - fclose(fp_); - fp_ = 0; - prev_ = 0; -#endif - } - void set(const uint8_t *p, size_t n, const char *name) const - { -#ifdef __linux__ - if (fp_ == 0) return; - fprintf(fp_, "%llx %zx %s%s\n", (long long)p, n, name, suf_); -#else - (void)p; - (void)n; - (void)name; -#endif - } - void set(const char *name, const uint8_t *cur) - { -#ifdef __linux__ - set(prev_, cur - prev_, name); - prev_ = cur; -#else - (void)name; - (void)cur; -#endif - } -}; - struct FpGenerator : Xbyak::CodeGenerator { typedef Xbyak::RegExp RegExp; typedef Xbyak::Reg64 Reg64; @@ -268,7 +203,7 @@ struct FpGenerator : Xbyak::CodeGenerator { int pn_; int FpByte_; bool isFullBit_; - Profiler prof_; + Xbyak::util::Profiler prof_; /* @param op [in] ; use op.p, op.N, op.isFullBit @@ -331,9 +266,16 @@ private: FpByte_ = int(op.maxN * sizeof(uint64_t)); isFullBit_ = op.isFullBit; // printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_); +#ifdef MCL_USE_PROF static char suf[] = "_0"; - prof_.init(suf, getCurr()); - suf[1]++; + const char *s = getenv("MCL_PROF"); + if (s && s[0] && s[1] == '\0') { + prof_.init(s[0] - '0'); + prof_.setStartAddr(getCurr()); + prof_.setNameSuffix(suf); + suf[1]++; + } +#endif op.fp_addPre = gen_addSubPre(true, pn_); prof_.set("Fp_addPre", getCurr()); diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h index c28a536..64b4ee3 100644 --- a/src/xbyak/xbyak.h +++ b/src/xbyak/xbyak.h @@ -113,7 +113,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5790 /* 0xABCD = A.BC(D) */ + VERSION = 0x5802 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -551,6 +551,7 @@ inline void Operand::setBit(int bit) idx_ = idx; kind_ = kind; bit_ = bit; + if (bit >= 128) return; // keep mask_ and rounding_ mask_ = 0; rounding_ = 0; return; diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h index 2733c61..893a588 100644 --- a/src/xbyak/xbyak_mnemonic.h +++ b/src/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.79"; } +const char *getVersionString() const { return "5.802"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -1684,6 +1684,8 @@ void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); } +void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } +void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); } void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); } void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); } @@ -1709,6 +1711,7 @@ void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); } +void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); } void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); } void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); } void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); } @@ -1769,6 +1772,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); } +void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); } void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); } void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); } void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); } diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h index c2474c5..04c661c 100644 --- a/src/xbyak/xbyak_util.h +++ b/src/xbyak/xbyak_util.h @@ -54,6 +54,20 @@ #endif #endif +#ifdef XBYAK_USE_VTUNE + // -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl + #include + #ifdef _MSC_VER + #pragma comment(lib, "libittnotify.lib") + #endif + #ifdef __linux__ + #include + #endif +#endif +#ifdef __linux__ + #define XBYAK_USE_PERF +#endif + namespace Xbyak { namespace util { typedef enum { @@ -331,6 +345,8 @@ public: static const Type tAVX512_VNNI = uint64(1) << 54; static const Type tAVX512_BITALG = uint64(1) << 55; static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56; + static const Type tAVX512_BF16 = uint64(1) << 57; + static const Type tAVX512_VP2INTERSECT = uint64(1) << 58; Cpu() : type_(NONE) @@ -410,6 +426,12 @@ public: if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ; if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW; if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS; + if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT; + } + // EAX=07H, ECX=1 + getCpuidEx(7, 1, data); + if (type_ & tAVX512F) { + if (EAX & (1U << 5)) type_ |= tAVX512_BF16; } } } @@ -722,5 +744,126 @@ private: }; #endif +class Profiler { + int mode_; + const char *suffix_; + const void *startAddr_; +#ifdef XBYAK_USE_PERF + FILE *fp_; +#endif +public: + enum { + None = 0, + Perf = 1, + VTune = 2 + }; + Profiler() + : mode_(None) + , suffix_(0) + , startAddr_(0) +#ifdef XBYAK_USE_PERF + , fp_(0) +#endif + { + } + // append suffix to funcName + void setNameSuffix(const char *suffix) + { + suffix_ = suffix; + } + void setStartAddr(const void *startAddr) + { + startAddr_ = startAddr; + } + void init(int mode) + { + mode_ = None; + switch (mode) { + default: + case None: + return; + case Perf: +#ifdef XBYAK_USE_PERF + close(); + { + const int pid = getpid(); + char name[128]; + snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid); + fp_ = fopen(name, "a+"); + if (fp_ == 0) { + fprintf(stderr, "can't open %s\n", name); + return; + } + } + mode_ = Perf; +#endif + return; + case VTune: +#ifdef XBYAK_USE_VTUNE + dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling + if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) { + fprintf(stderr, "VTune profiling is not active\n"); + return; + } + mode_ = VTune; +#endif + return; + } + } + ~Profiler() + { + close(); + } + void close() + { +#ifdef XBYAK_USE_PERF + if (fp_ == 0) return; + fclose(fp_); + fp_ = 0; +#endif + } + void set(const char *funcName, const void *startAddr, size_t funcSize) const + { + if (mode_ == None) return; +#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE) + (void)funcName; + (void)startAddr; + (void)funcSize; +#endif +#ifdef XBYAK_USE_PERF + if (mode_ == Perf) { + if (fp_ == 0) return; + fprintf(fp_, "%llx %zx %s%s\n", (long long)startAddr, funcSize, funcName, suffix_); + fflush(fp_); + } +#endif +#ifdef XBYAK_USE_VTUNE + if (mode_ != VTune) return; + char className[] = ""; + char fileName[] = ""; + iJIT_Method_Load jmethod = {}; + jmethod.method_id = iJIT_GetNewMethodID(); + jmethod.class_file_name = className; + jmethod.source_file_name = fileName; + jmethod.method_load_address = const_cast(startAddr); + jmethod.method_size = funcSize; + jmethod.line_number_size = 0; + char buf[128]; + snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_); + jmethod.method_name = buf; + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); +#endif + } + /* + for continuous set + funcSize = endAddr - + */ + void set(const char *funcName, const void *endAddr) + { + set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_); + startAddr_ = endAddr; + } +}; + } } // end of util #endif