Skip to content
Snippets Groups Projects
Unverified Commit 0da03aa7 authored by Juncheng's avatar Juncheng Committed by GitHub
Browse files

Add hwloc for topology detection (#5291)


* Add hwloc for topology detection

* refine

* fix

* SetAffinityByDevice

* fix ninja build

* fix

* fix

Co-authored-by: default avatarLuyang <flowingsun007@163.com>
Co-authored-by: default avatarShenghang Tsai <jackalcooper@gmail.com>
Co-authored-by: default avataroneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
parent 69e73c1a
No related branches found
No related tags found
No related merge requests found
Showing
with 525 additions and 96 deletions
......@@ -34,6 +34,8 @@ if (WITH_TENSORRT)
include(tensorrt)
endif()
include(hwloc)
option(CUDA_STATIC "" ON)
if (BUILD_CUDA)
......@@ -235,6 +237,14 @@ if(BUILD_RDMA)
endif()
endif()
if(BUILD_HWLOC)
list(APPEND oneflow_third_party_dependencies hwloc)
list(APPEND oneflow_third_party_libs ${HWLOC_STATIC_LIBRARIES})
list(APPEND oneflow_third_party_libs ${PCIACCESS_STATIC_LIBRARIES})
list(APPEND ONEFLOW_INCLUDE_SRC_DIRS ${HWLOC_INCLUDE_DIR})
add_definitions(-DWITH_HWLOC)
endif()
include_directories(${ONEFLOW_INCLUDE_SRC_DIRS})
if(WITH_XLA)
......
include(ExternalProject)
if(UNIX AND NOT APPLE)
set(BUILD_HWLOC_DEFAULT ON)
else()
set(BUILD_HWLOC_DEFAULT OFF)
endif()
option(BUILD_HWLOC "" ${BUILD_HWLOC_DEFAULT})
if(BUILD_HWLOC)
set(PCIACCESS_INSTALL ${THIRD_PARTY_DIR}/pciaccess)
set(PCIACCESS_INCLUDE_DIR ${PCIACCESS_INSTALL}/include)
set(PCIACCESS_LIBRARY_DIR ${PCIACCESS_INSTALL}/lib)
set(PCIACCESS_LIBRARY_NAMES libpciaccess.a)
foreach(LIBRARY_NAME ${PCIACCESS_LIBRARY_NAMES})
list(APPEND PCIACCESS_STATIC_LIBRARIES ${PCIACCESS_LIBRARY_DIR}/${LIBRARY_NAME})
endforeach()
set(HWLOC_INSTALL ${THIRD_PARTY_DIR}/hwloc)
set(HWLOC_INCLUDE_DIR ${HWLOC_INSTALL}/include)
set(HWLOC_LIBRARY_DIR ${HWLOC_INSTALL}/lib)
set(HWLOC_LIBRARY_NAMES libhwloc.a)
foreach(LIBRARY_NAME ${HWLOC_LIBRARY_NAMES})
list(APPEND HWLOC_STATIC_LIBRARIES ${HWLOC_LIBRARY_DIR}/${LIBRARY_NAME})
endforeach()
if(THIRD_PARTY)
include(ProcessorCount)
ProcessorCount(PROC_NUM)
set(XORG_MACROS_INSTALL ${THIRD_PARTY_DIR}/xorg-macros)
set(XORG_MACROS_TAR_URL https://github.com/freedesktop/xorg-macros/archive/refs/tags/util-macros-1.19.1.tar.gz)
use_mirror(VARIABLE XORG_MACROS_TAR_URL URL ${XORG_MACROS_TAR_URL})
set(XORG_MACROS_URL_HASH 37afda9e9b44ecb9b2c16293bacd0e21)
set(XORG_MACROS_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/xorg-macros)
set(XORG_MACROS_PKG_CONFIG_DIR ${XORG_MACROS_INSTALL}/share/pkgconfig)
ExternalProject_Add(xorg-macros
PREFIX xorg-macros
URL ${XORG_MACROS_TAR_URL}
URL_HASH MD5=${XORG_MACROS_URL_HASH}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ${XORG_MACROS_SOURCE_DIR}/src/xorg-macros/autogen.sh COMMAND ${XORG_MACROS_SOURCE_DIR}/src/xorg-macros/configure --prefix=${XORG_MACROS_INSTALL}
BUILD_COMMAND make -j${PROC_NUM}
INSTALL_COMMAND make install
)
set(PCIACCESS_TAR_URL https://github.com/freedesktop/xorg-libpciaccess/archive/refs/tags/libpciaccess-0.16.tar.gz)
use_mirror(VARIABLE PCIACCESS_TAR_URL URL ${PCIACCESS_TAR_URL})
set(PCIACCESS_URL_HASH 92e2b604e294a9160bc977c000507340)
set(PCIACCESS_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/pciaccess)
set(PCIACCESS_CFLAGS "-O3 -fPIC")
ExternalProject_Add(pciaccess
PREFIX pciaccess
URL ${PCIACCESS_TAR_URL}
URL_HASH MD5=${PCIACCESS_URL_HASH}
UPDATE_COMMAND ""
PATCH_COMMAND cp ${XORG_MACROS_INSTALL}/share/aclocal/xorg-macros.m4 ${PCIACCESS_SOURCE_DIR}/src/pciaccess/m4
CONFIGURE_COMMAND ${PCIACCESS_SOURCE_DIR}/src/pciaccess/autogen.sh COMMAND ${PCIACCESS_SOURCE_DIR}/src/pciaccess/configure --prefix=${PCIACCESS_INSTALL}
BUILD_COMMAND make -j${PROC_NUM} CFLAGS=${PCIACCESS_CFLAGS}
BUILD_BYPRODUCTS ${PCIACCESS_STATIC_LIBRARIES}
INSTALL_COMMAND make install
DEPENDS xorg-macros
)
set(HWLOC_TAR_URL https://github.com/open-mpi/hwloc/archive/refs/tags/hwloc-2.4.1.tar.gz)
use_mirror(VARIABLE HWLOC_TAR_URL URL ${HWLOC_TAR_URL})
set(HWLOC_URL_HASH ac25fc7c2a665b7914c6c21b782f1c4f)
set(HWLOC_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hwloc)
set(HWLOC_CFLAGS "-O3 -fPIC")
ExternalProject_Add(hwloc
PREFIX hwloc
URL ${HWLOC_TAR_URL}
URL_HASH MD5=${HWLOC_URL_HASH}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ${HWLOC_SOURCE_DIR}/src/hwloc/autogen.sh COMMAND ${HWLOC_SOURCE_DIR}/src/hwloc/configure --prefix=${HWLOC_INSTALL} PKG_CONFIG_PATH=${PCIACCESS_INSTALL}/lib/pkgconfig --disable-libxml2 --enable-static
BUILD_COMMAND make -j${PROC_NUM} CFLAGS=${HWLOC_CFLAGS}
BUILD_BYPRODUCTS ${HWLOC_STATIC_LIBRARIES}
INSTALL_COMMAND make install
DEPENDS pciaccess
)
endif(THIRD_PARTY)
endif(BUILD_HWLOC)
......@@ -31,7 +31,11 @@ BasicDeviceDescriptorList::~BasicDeviceDescriptorList() = default;
size_t BasicDeviceDescriptorList::DeviceCount() const { return device_descriptor_list_.size(); }
std::shared_ptr<const DeviceDescriptor> BasicDeviceDescriptorList::GetDevice(size_t ordinal) const {
return device_descriptor_list_.at(ordinal);
if (ordinal < device_descriptor_list_.size()) {
return device_descriptor_list_.at(ordinal);
} else {
return nullptr;
}
}
} // namespace device
......
......@@ -35,6 +35,7 @@ constexpr char kJsonKeyComputeCapabilityMajor[] = "compute_capability_major";
constexpr char kJsonKeyComputeCapabilityMinor[] = "compute_capability_minor";
constexpr char kJsonKeyMemoryClockRate[] = "memory_clock_rate_khz";
constexpr char kJsonKeyMemoryBusWidth[] = "memory_bus_width_bit";
constexpr char kJsonKeyPCIBusID[] = "pci_bus_id";
} // namespace
......@@ -47,6 +48,7 @@ struct CudaDeviceDescriptor::Impl {
int32_t compute_capability_minor{};
int32_t memory_clock_rate_khz{};
int32_t memory_bus_width_bit{};
std::string pci_bus_id;
};
CudaDeviceDescriptor::CudaDeviceDescriptor() { impl_.reset(new Impl()); }
......@@ -75,6 +77,8 @@ int32_t CudaDeviceDescriptor::MemoryClockRateKHz() const { return impl_->memory_
int32_t CudaDeviceDescriptor::MemoryBusWidthBit() const { return impl_->memory_bus_width_bit; }
const std::string& CudaDeviceDescriptor::PCIBusID() const { return impl_->pci_bus_id; }
std::shared_ptr<const CudaDeviceDescriptor> CudaDeviceDescriptor::Query(int32_t ordinal) {
cudaDeviceProp prop{};
OF_CUDA_CHECK(cudaGetDeviceProperties(&prop, ordinal));
......@@ -87,6 +91,15 @@ std::shared_ptr<const CudaDeviceDescriptor> CudaDeviceDescriptor::Query(int32_t
desc->impl_->compute_capability_minor = prop.minor;
desc->impl_->memory_clock_rate_khz = prop.memoryClockRate;
desc->impl_->memory_bus_width_bit = prop.memoryBusWidth;
char pci_bus_id_buf[sizeof("00000000:00:00.0")];
if (cudaDeviceGetPCIBusId(pci_bus_id_buf, sizeof(pci_bus_id_buf), ordinal) == cudaSuccess) {
for (int i = 0; i < sizeof(pci_bus_id_buf) - 1; ++i) {
pci_bus_id_buf[i] = static_cast<char>(std::tolower(pci_bus_id_buf[i]));
}
desc->impl_->pci_bus_id = pci_bus_id_buf;
} else {
desc->impl_->pci_bus_id = "";
}
return std::shared_ptr<const CudaDeviceDescriptor>(desc);
}
......@@ -100,6 +113,7 @@ void CudaDeviceDescriptor::Serialize(std::string* serialized) const {
json_object[kJsonKeyComputeCapabilityMinor] = impl_->compute_capability_minor;
json_object[kJsonKeyMemoryClockRate] = impl_->memory_clock_rate_khz;
json_object[kJsonKeyMemoryBusWidth] = impl_->memory_bus_width_bit;
json_object[kJsonKeyPCIBusID] = impl_->pci_bus_id;
*serialized = json_object.dump(2);
}
......@@ -115,6 +129,7 @@ std::shared_ptr<const CudaDeviceDescriptor> CudaDeviceDescriptor::Deserialize(
desc->impl_->compute_capability_minor = json_object[kJsonKeyComputeCapabilityMinor];
desc->impl_->memory_clock_rate_khz = json_object[kJsonKeyMemoryClockRate];
desc->impl_->memory_bus_width_bit = json_object[kJsonKeyMemoryBusWidth];
desc->impl_->pci_bus_id = json_object[kJsonKeyPCIBusID];
return std::shared_ptr<const CudaDeviceDescriptor>(desc);
}
......
......@@ -40,6 +40,7 @@ class CudaDeviceDescriptor : public DeviceDescriptor {
int32_t ComputeCapabilityMinor() const;
int32_t MemoryClockRateKHz() const;
int32_t MemoryBusWidthBit() const;
const std::string& PCIBusID() const;
void Serialize(std::string* serialized) const;
static std::shared_ptr<const CudaDeviceDescriptor> Query(int32_t ordinal);
static std::shared_ptr<const CudaDeviceDescriptor> Deserialize(const std::string& serialized);
......
......@@ -54,10 +54,7 @@ class CudaDeviceDescriptorClass : public DeviceDescriptorClass {
return std::make_shared<const BasicDeviceDescriptorList>(devices);
}
const std::string& Name() const override {
static const std::string name = kCudaDeviceDescriptorClassName;
return name;
}
std::string Name() const override { return kCudaDeviceDescriptorClassName; }
void SerializeDeviceDescriptorList(const std::shared_ptr<const DeviceDescriptorList>& list,
std::string* serialized) const override {
......
......@@ -14,8 +14,9 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/device/cuda_util.h"
#include "oneflow/core/common/platform.h"
#include "oneflow/core/common/global.h"
#include "oneflow/core/device/node_device_descriptor_manager.h"
#include "oneflow/core/device/cuda_device_descriptor.h"
namespace oneflow {
......@@ -130,90 +131,33 @@ size_t GetAvailableGpuMemSize(int dev_id) {
return prop.totalGlobalMem;
}
#ifdef OF_PLATFORM_POSIX
namespace {
void ParseCpuMask(const std::string& cpu_mask, cpu_set_t* cpu_set) {
CPU_ZERO_S(sizeof(cpu_set_t), cpu_set);
const char* const head = cpu_mask.c_str();
const char* const tail = head + cpu_mask.size();
const char* pos = head;
std::vector<uint64_t> masks;
while (pos < tail) {
char* end_pos = nullptr;
const uint64_t mask = std::strtoul(pos, &end_pos, 16);
if (pos != head) {
CHECK_EQ(end_pos - pos, 8);
} else {
CHECK_NE(end_pos, pos);
CHECK_LE(end_pos - pos, 8);
}
if (end_pos < tail) { CHECK_EQ(*end_pos, ','); }
masks.push_back(mask);
pos = end_pos + 1;
}
int32_t cpu = 0;
for (int64_t i = masks.size() - 1; i >= 0; i--) {
for (uint64_t b = 0; b < 32; b++) {
if ((masks.at(i) & (1UL << b)) != 0) { CPU_SET_S(cpu, sizeof(cpu_set_t), cpu_set); }
cpu += 1;
}
}
}
std::string CudaDeviceGetCpuMask(int32_t dev_id) {
std::vector<char> pci_bus_id_buf(sizeof("0000:00:00.0"));
OF_CUDA_CHECK(cudaDeviceGetPCIBusId(pci_bus_id_buf.data(),
static_cast<int>(pci_bus_id_buf.size()), dev_id));
for (int32_t i = 0; i < pci_bus_id_buf.size(); ++i) {
pci_bus_id_buf[i] = std::tolower(pci_bus_id_buf[i]);
}
const std::string pci_bus_id(pci_bus_id_buf.data(), pci_bus_id_buf.size() - 1);
const std::string pci_bus_id_short = pci_bus_id.substr(0, sizeof("0000:00") - 1);
const std::string local_cpus_file =
"/sys/class/pci_bus/" + pci_bus_id_short + "/device/" + pci_bus_id + "/local_cpus";
char* cpu_map_path = realpath(local_cpus_file.c_str(), nullptr);
CHECK_NOTNULL(cpu_map_path);
std::ifstream is(cpu_map_path);
std::string cpu_mask;
CHECK(std::getline(is, cpu_mask).good());
is.close();
free(cpu_map_path);
return cpu_mask;
}
void CudaDeviceGetCpuAffinity(int32_t dev_id, cpu_set_t* cpu_set) {
const std::string cpu_mask = CudaDeviceGetCpuMask(dev_id);
ParseCpuMask(cpu_mask, cpu_set);
std::function<void(void**, size_t)> GetCudaMallocHostFn(int32_t dev) {
auto default_fn = [](void** ptr, size_t size) { cudaMallocHost(ptr, size); };
auto manager = Global<device::NodeDeviceDescriptorManager>::Get();
if (manager == nullptr) { return default_fn; }
auto node_desc = manager->GetLocalNodeDeviceDescriptor();
auto cuda_device = std::dynamic_pointer_cast<const device::CudaDeviceDescriptor>(
node_desc->GetDevice(device::kCudaDeviceDescriptorClassName, dev));
if (!cuda_device) { return default_fn; }
auto saved_affinity = node_desc->Topology()->GetMemoryAffinity();
if (!saved_affinity) { return default_fn; }
auto device_affinity =
node_desc->Topology()->GetMemoryAffinityByPCIBusID(cuda_device->PCIBusID());
if (!device_affinity) { return default_fn; }
return [device_affinity, saved_affinity, node_desc, default_fn](void** ptr, size_t size) {
node_desc->Topology()->SetMemoryAffinity(device_affinity);
default_fn(ptr, size);
node_desc->Topology()->SetMemoryAffinity(saved_affinity);
};
}
} // namespace
#endif
void NumaAwareCudaMallocHost(int32_t dev, void** ptr, size_t size) {
#ifdef OF_PLATFORM_POSIX
cpu_set_t new_cpu_set;
CudaDeviceGetCpuAffinity(dev, &new_cpu_set);
cpu_set_t saved_cpu_set;
CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &saved_cpu_set), 0);
CHECK_EQ(sched_setaffinity(0, sizeof(cpu_set_t), &new_cpu_set), 0);
OF_CUDA_CHECK(cudaMallocHost(ptr, size));
CHECK_EQ(sched_setaffinity(0, sizeof(cpu_set_t), &saved_cpu_set), 0);
#else
UNIMPLEMENTED();
#endif
}
void CudaDeviceSetCpuAffinity(int32_t dev) {
#ifdef OF_PLATFORM_POSIX
cpu_set_t new_cpu_set;
CudaDeviceGetCpuAffinity(dev, &new_cpu_set);
CHECK_EQ(sched_setaffinity(0, sizeof(cpu_set_t), &new_cpu_set), 0);
#else
UNIMPLEMENTED();
#endif
auto fn = GetCudaMallocHostFn(dev);
fn(ptr, size);
}
cudaDataType_t GetCudaDataType(DataType val) {
......
......@@ -31,7 +31,7 @@ class DeviceClassRegistryStorage {
~DeviceClassRegistryStorage() = default;
void Register(std::shared_ptr<const DeviceDescriptorClass> descriptor_class) {
std::lock_guard<std::mutex> lock(mutex_);
const std::string& name = descriptor_class->Name();
const std::string name = descriptor_class->Name();
if (!name2index_.emplace(name, classes_.size()).second) { abort(); }
classes_.emplace_back(std::make_shared<std::string>(name), std::move(descriptor_class));
}
......
......@@ -28,7 +28,7 @@ class DeviceDescriptorClass {
virtual ~DeviceDescriptorClass() = default;
virtual std::shared_ptr<const DeviceDescriptorList> QueryDeviceDescriptorList() const = 0;
virtual const std::string& Name() const = 0;
virtual std::string Name() const = 0;
virtual void SerializeDeviceDescriptorList(
const std::shared_ptr<const DeviceDescriptorList>& list, std::string* serialized) const = 0;
virtual std::shared_ptr<const DeviceDescriptorList> DeserializeDeviceDescriptorList(
......
......@@ -31,6 +31,19 @@ constexpr char kJsonKeyGUID[] = "guid";
constexpr char kJsonKeyPort[] = "port";
constexpr char kJsonKeyLankLayer[] = "link_layer";
constexpr char kJsonValueLinkLayerInfiniBand[] = "InfiniBand";
constexpr char kJsonKeyPCIBusID[] = "pci_bus_id";
void GetPCIBusID(const std::string& name, std::string* pci_bus_id) {
#ifdef __linux__
const std::string device_path = "/sys/class/infiniband/" + name + "/device";
const char* device_real_path = realpath(device_path.data(), nullptr);
if (device_real_path == nullptr) { return; }
const std::string device_real_path_str = device_real_path;
const size_t pos = device_real_path_str.rfind('/');
if (pos == std::string::npos) { return; }
*pci_bus_id = device_real_path_str.substr(pos + 1);
#endif
}
} // namespace
......@@ -40,6 +53,7 @@ struct NetIBDeviceDescriptor::Impl {
uint64_t guid{};
uint8_t port{};
NetIBDeviceDescriptorLinkLayer link_layer{};
std::string pci_bus_id;
};
NetIBDeviceDescriptor::NetIBDeviceDescriptor() { impl_.reset(new Impl()); }
......@@ -58,6 +72,8 @@ NetIBDeviceDescriptorLinkLayer NetIBDeviceDescriptor::LinkLayer() const {
return impl_->link_layer;
}
const std::string& NetIBDeviceDescriptor::PCIBusID() const { return impl_->pci_bus_id; }
void NetIBDeviceDescriptor::Serialize(std::string* serialized) const {
nlohmann::json json_object;
json_object[kJsonKeyOrdinal] = impl_->ordinal;
......@@ -69,6 +85,7 @@ void NetIBDeviceDescriptor::Serialize(std::string* serialized) const {
} else {
UNIMPLEMENTED();
}
json_object[kJsonKeyPCIBusID] = impl_->pci_bus_id;
*serialized = json_object.dump(2);
}
......@@ -106,6 +123,7 @@ std::shared_ptr<const NetIBDeviceDescriptor> NetIBDeviceDescriptor::Query(int32_
} else {
UNIMPLEMENTED();
}
GetPCIBusID(desc->impl_->name, &desc->impl_->pci_bus_id);
return std::shared_ptr<const NetIBDeviceDescriptor>(desc);
}
......@@ -123,6 +141,7 @@ std::shared_ptr<const NetIBDeviceDescriptor> NetIBDeviceDescriptor::Deserialize(
} else {
UNIMPLEMENTED();
}
desc->impl_->pci_bus_id = json_object[kJsonKeyPCIBusID];
return std::shared_ptr<const NetIBDeviceDescriptor>(desc);
}
......
......@@ -44,6 +44,7 @@ class NetIBDeviceDescriptor : public DeviceDescriptor {
uint64_t GUID() const;
uint8_t Port() const;
NetIBDeviceDescriptorLinkLayer LinkLayer() const;
const std::string& PCIBusID() const;
void Serialize(std::string* serialized) const;
static std::shared_ptr<const NetIBDeviceDescriptor> Query(int32_t ordinal, ibv_context* context,
uint8_t port);
......
......@@ -65,10 +65,7 @@ class NetIBDeviceDescriptorClass : public DeviceDescriptorClass {
return std::make_shared<const BasicDeviceDescriptorList>(devices);
}
const std::string& Name() const override {
static const std::string name = kNetIBDeviceDescriptorClassName;
return name;
}
std::string Name() const override { return kNetIBDeviceDescriptorClassName; }
void SerializeDeviceDescriptorList(const std::shared_ptr<const DeviceDescriptorList>& list,
std::string* serialized) const override {
......
......@@ -28,6 +28,19 @@ namespace {
constexpr char kJsonKeyOrdinal[] = "ordinal";
constexpr char kJsonKeyName[] = "name";
constexpr char kJsonKeyAddress[] = "address";
constexpr char kJsonKeyPCIBusID[] = "pci_bus_id";
void GetPCIBusID(const std::string& name, std::string* pci_bus_id) {
#ifdef __linux__
const std::string device_path = "/sys/class/net/" + name + "/device";
const char* device_real_path = realpath(device_path.data(), nullptr);
if (device_real_path == nullptr) { return; }
const std::string device_real_path_str = device_real_path;
const size_t pos = device_real_path_str.rfind('/');
if (pos == std::string::npos) { return; }
*pci_bus_id = device_real_path_str.substr(pos + 1);
#endif
}
} // namespace
......@@ -35,6 +48,7 @@ struct NetSocketDeviceDescriptor::Impl {
int32_t ordinal{};
std::string name;
std::string address;
std::string pci_bus_id;
};
NetSocketDeviceDescriptor::NetSocketDeviceDescriptor() { impl_.reset(new Impl()); }
......@@ -47,11 +61,14 @@ const std::string& NetSocketDeviceDescriptor::Name() const { return impl_->name;
const std::string& NetSocketDeviceDescriptor::Address() const { return impl_->address; }
const std::string& NetSocketDeviceDescriptor::PCIBusID() const { return impl_->pci_bus_id; }
void NetSocketDeviceDescriptor::Serialize(std::string* serialized) const {
nlohmann::json json_object;
json_object[kJsonKeyOrdinal] = impl_->ordinal;
json_object[kJsonKeyName] = impl_->name;
json_object[kJsonKeyAddress] = impl_->address;
json_object[kJsonKeyPCIBusID] = impl_->pci_bus_id;
*serialized = json_object.dump(2);
}
......@@ -61,6 +78,7 @@ std::shared_ptr<const NetSocketDeviceDescriptor> NetSocketDeviceDescriptor::Quer
desc->impl_->ordinal = ordinal;
desc->impl_->name = name;
desc->impl_->address = address;
GetPCIBusID(name, &desc->impl_->pci_bus_id);
return std::shared_ptr<const NetSocketDeviceDescriptor>(desc);
}
......@@ -71,6 +89,7 @@ std::shared_ptr<const NetSocketDeviceDescriptor> NetSocketDeviceDescriptor::Dese
desc->impl_->ordinal = json_object[kJsonKeyOrdinal];
desc->impl_->name = json_object[kJsonKeyName];
desc->impl_->address = json_object[kJsonKeyAddress];
desc->impl_->pci_bus_id = json_object[kJsonKeyPCIBusID];
return std::shared_ptr<const NetSocketDeviceDescriptor>(desc);
}
......
......@@ -36,6 +36,7 @@ class NetSocketDeviceDescriptor : public DeviceDescriptor {
int32_t Ordinal() const;
const std::string& Name() const;
const std::string& Address() const;
const std::string& PCIBusID() const;
void Serialize(std::string* serialized) const;
static std::shared_ptr<const NetSocketDeviceDescriptor> Query(int32_t ordinal,
const std::string& name,
......
......@@ -78,10 +78,7 @@ class NetSocketDeviceDescriptorClass : public DeviceDescriptorClass {
std::vector<std::shared_ptr<const DeviceDescriptor>>{devices.begin(), devices.end()});
}
const std::string& Name() const override {
static const std::string name = kNetSocketDeviceDescriptorClassName;
return name;
}
std::string Name() const override { return kNetSocketDeviceDescriptorClassName; }
void SerializeDeviceDescriptorList(const std::shared_ptr<const DeviceDescriptorList>& list,
std::string* serialized) const override {
......
......@@ -16,7 +16,11 @@ limitations under the License.
#include "oneflow/core/device/node_device_descriptor.h"
#include "oneflow/core/device/device_descriptor_class.h"
#include "oneflow/core/common/str_util.h"
#include "oneflow/core/persistence/tee_persistent_log_stream.h"
#include <json.hpp>
#ifdef WITH_HWLOC
#include <hwloc.h>
#endif // WITH_HWLOC
namespace oneflow {
......@@ -28,6 +32,215 @@ constexpr char kJsonKeyClasses[] = "classes";
constexpr char kJsonKeyClassName[] = "class_name";
constexpr char kJsonKeySerializedDescriptorList[] = "serialized_descriptor_list";
constexpr char kJsonKeyHostMemorySize[] = "host_memory_size_bytes";
constexpr char kJsonKeyTopology[] = "topology";
class DummyCPUAffinityDescriptor : public TopologyCPUAffinityDescriptor {
public:
DummyCPUAffinityDescriptor() = default;
~DummyCPUAffinityDescriptor() override = default;
};
class DummyMemoryAffinityDescriptor : public TopologyMemoryAffinityDescriptor {
public:
DummyMemoryAffinityDescriptor() = default;
~DummyMemoryAffinityDescriptor() override = default;
};
class DummyTopologyDescriptor : public TopologyDescriptor {
public:
DummyTopologyDescriptor() = default;
~DummyTopologyDescriptor() override = default;
std::shared_ptr<const TopologyCPUAffinityDescriptor> GetCPUAffinity() const override {
return std::make_shared<const DummyCPUAffinityDescriptor>();
}
std::shared_ptr<const TopologyMemoryAffinityDescriptor> GetMemoryAffinity() const override {
return std::make_shared<const DummyMemoryAffinityDescriptor>();
}
std::shared_ptr<const TopologyCPUAffinityDescriptor> GetCPUAffinityByPCIBusID(
const std::string& bus_id) const override {
return std::make_shared<const DummyCPUAffinityDescriptor>();
}
std::shared_ptr<const TopologyMemoryAffinityDescriptor> GetMemoryAffinityByPCIBusID(
const std::string& bus_id) const override {
return std::make_shared<const DummyMemoryAffinityDescriptor>();
}
void SetCPUAffinity(
const std::shared_ptr<const TopologyCPUAffinityDescriptor>& affinity) const override {}
void SetMemoryAffinity(
const std::shared_ptr<const TopologyMemoryAffinityDescriptor>& affinity) const override {}
};
#ifdef WITH_HWLOC
class HWLocCPUAffinityDescriptor : public TopologyCPUAffinityDescriptor {
public:
OF_DISALLOW_COPY_AND_MOVE(HWLocCPUAffinityDescriptor);
explicit HWLocCPUAffinityDescriptor(hwloc_cpuset_t hwloc_cpu_set)
: hwloc_cpu_set_(hwloc_cpu_set) {}
~HWLocCPUAffinityDescriptor() override { hwloc_bitmap_free(hwloc_cpu_set_); }
hwloc_cpuset_t HWLocCPUSet() const { return hwloc_cpu_set_; }
private:
hwloc_cpuset_t hwloc_cpu_set_;
};
class HWLocMemoryAffinityDescriptor : public TopologyMemoryAffinityDescriptor {
public:
OF_DISALLOW_COPY_AND_MOVE(HWLocMemoryAffinityDescriptor);
explicit HWLocMemoryAffinityDescriptor(hwloc_bitmap_t hwloc_bitmap, hwloc_membind_policy_t policy)
: hwloc_bitmap_(hwloc_bitmap), policy_(policy) {}
~HWLocMemoryAffinityDescriptor() override { hwloc_bitmap_free(hwloc_bitmap_); }
hwloc_bitmap_t HWLocBitmap() const { return hwloc_bitmap_; }
hwloc_membind_policy_t HWLocPolicy() const { return policy_; }
private:
hwloc_bitmap_t hwloc_bitmap_;
hwloc_membind_policy_t policy_;
};
class HWLocTopologyDescriptor : public TopologyDescriptor {
public:
~HWLocTopologyDescriptor() override { hwloc_topology_destroy(topology_); }
std::shared_ptr<const TopologyCPUAffinityDescriptor> GetCPUAffinity() const override {
hwloc_bitmap_t set = hwloc_bitmap_alloc();
if (hwloc_get_cpubind(topology_, set, HWLOC_CPUBIND_THREAD) != 0) { return nullptr; }
return std::make_shared<const HWLocCPUAffinityDescriptor>(set);
}
std::shared_ptr<const TopologyMemoryAffinityDescriptor> GetMemoryAffinity() const override {
hwloc_bitmap_t set = hwloc_bitmap_alloc();
hwloc_membind_policy_t policy;
if (hwloc_get_membind(topology_, set, &policy, HWLOC_MEMBIND_THREAD) != 0) { return nullptr; }
return std::make_shared<const HWLocMemoryAffinityDescriptor>(set, policy);
}
std::shared_ptr<const TopologyCPUAffinityDescriptor> GetCPUAffinityByPCIBusID(
const std::string& bus_id) const override {
hwloc_obj_t non_io_ancestor = GetNonIOAncestorByPCIBusID(bus_id);
if (non_io_ancestor == nullptr) { return nullptr; }
if (non_io_ancestor->cpuset == nullptr) { return nullptr; }
return std::make_shared<const HWLocCPUAffinityDescriptor>(
hwloc_bitmap_dup(non_io_ancestor->cpuset));
}
std::shared_ptr<const TopologyMemoryAffinityDescriptor> GetMemoryAffinityByPCIBusID(
const std::string& bus_id) const override {
hwloc_obj_t non_io_ancestor = GetNonIOAncestorByPCIBusID(bus_id);
if (non_io_ancestor == nullptr) { return nullptr; }
if (non_io_ancestor->nodeset == nullptr) { return nullptr; }
return std::make_shared<const HWLocMemoryAffinityDescriptor>(
hwloc_bitmap_dup(non_io_ancestor->nodeset), HWLOC_MEMBIND_BIND);
}
void SetCPUAffinity(
const std::shared_ptr<const TopologyCPUAffinityDescriptor>& affinity) const override {
auto hwloc_affinity = std::dynamic_pointer_cast<const HWLocCPUAffinityDescriptor>(affinity);
if (!hwloc_affinity) { return; }
hwloc_set_cpubind(topology_, hwloc_affinity->HWLocCPUSet(), HWLOC_CPUBIND_THREAD);
}
void SetMemoryAffinity(
const std::shared_ptr<const TopologyMemoryAffinityDescriptor>& affinity) const override {
auto hwloc_affinity = std::dynamic_pointer_cast<const HWLocMemoryAffinityDescriptor>(affinity);
if (!hwloc_affinity) { return; }
hwloc_set_membind(topology_, hwloc_affinity->HWLocBitmap(), hwloc_affinity->HWLocPolicy(),
HWLOC_MEMBIND_THREAD);
}
static std::shared_ptr<const HWLocTopologyDescriptor> Query() {
hwloc_topology_t topology = nullptr;
do {
if (hwloc_topology_init(&topology) != 0) { break; }
if (hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL) != 0) { break; }
if (hwloc_topology_load(topology) != 0) { break; }
auto* desc = new HWLocTopologyDescriptor(topology);
return std::shared_ptr<const HWLocTopologyDescriptor>(desc);
} while (false);
if (topology != nullptr) { hwloc_topology_destroy(topology); }
return nullptr;
}
static std::shared_ptr<const HWLocTopologyDescriptor> Deserialize(const std::string& serialized) {
hwloc_topology_t topology = nullptr;
do {
if (hwloc_topology_init(&topology) != 0) { break; }
if (hwloc_topology_set_xmlbuffer(topology, serialized.data(),
static_cast<int>(serialized.size()))
!= 0) {
break;
}
if (hwloc_topology_load(topology) != 0) { break; }
auto* desc = new HWLocTopologyDescriptor(topology);
return std::shared_ptr<const HWLocTopologyDescriptor>(desc);
} while (false);
if (topology != nullptr) { hwloc_topology_destroy(topology); }
return nullptr;
}
void Serialize(std::string* serialized) const {
char* buffer;
int len;
if (hwloc_topology_export_xmlbuffer(topology_, &buffer, &len, 0) == 0) {
*serialized = buffer;
hwloc_free_xmlbuffer(topology_, buffer);
}
}
private:
hwloc_obj_t GetNonIOAncestorByPCIBusID(const std::string& pci_bus_id) const {
hwloc_obj_t device = hwloc_get_pcidev_by_busidstring(topology_, pci_bus_id.data());
if (device == nullptr) { return nullptr; }
hwloc_obj_t non_io_ancestor = hwloc_get_non_io_ancestor_obj(topology_, device);
return non_io_ancestor;
}
explicit HWLocTopologyDescriptor(hwloc_topology_t topology) : topology_(topology) {}
hwloc_topology_t topology_;
};
#endif // WITH_HWLOC
std::shared_ptr<const TopologyDescriptor> QueryTopologyDescriptor() {
std::shared_ptr<const TopologyDescriptor> topology;
#ifdef WITH_HWLOC
topology = HWLocTopologyDescriptor::Query();
#endif // WITH_HWLOC
if (!topology) { topology.reset(new DummyTopologyDescriptor()); }
return topology;
}
std::shared_ptr<const TopologyDescriptor> DeserializeTopologyDescriptor(
const std::string& serialized) {
std::shared_ptr<const TopologyDescriptor> topology;
if (serialized.empty()) {
topology.reset(new DummyTopologyDescriptor());
} else {
#ifdef WITH_HWLOC
topology = HWLocTopologyDescriptor::Deserialize(serialized);
#else
UNIMPLEMENTED();
#endif // WITH_HWLOC
}
if (!topology) { topology.reset(new DummyTopologyDescriptor()); }
return topology;
}
void SerializeTopologyDescriptor(const std::shared_ptr<const TopologyDescriptor>& topology,
std::string* serialized) {
#ifdef WITH_HWLOC
auto hwloc_topology = std::dynamic_pointer_cast<const HWLocTopologyDescriptor>(topology);
if (hwloc_topology) { hwloc_topology->Serialize(serialized); }
#endif // WITH_HWLOC
}
} // namespace
......@@ -35,6 +248,7 @@ struct NodeDeviceDescriptor::Impl {
std::unordered_map<std::string, std::shared_ptr<const DeviceDescriptorList>>
class_name2descriptor_list;
size_t host_memory_size_bytes{};
std::shared_ptr<const TopologyDescriptor> topology;
};
NodeDeviceDescriptor::NodeDeviceDescriptor() { impl_.reset(new Impl()); }
......@@ -49,12 +263,29 @@ bool NodeDeviceDescriptor::HasDeviceClass(const std::string& class_name) const {
std::shared_ptr<const DeviceDescriptorList> NodeDeviceDescriptor::GetDeviceDescriptorList(
const std::string& class_name) const {
auto it = impl_->class_name2descriptor_list.find(class_name);
CHECK(it != impl_->class_name2descriptor_list.end());
return it->second;
if (it != impl_->class_name2descriptor_list.end()) {
return it->second;
} else {
return nullptr;
}
}
std::shared_ptr<const DeviceDescriptor> NodeDeviceDescriptor::GetDevice(
const std::string& class_name, size_t ordinal) const {
const auto device_list = GetDeviceDescriptorList(class_name);
if (device_list) {
return device_list->GetDevice(ordinal);
} else {
return nullptr;
}
}
size_t NodeDeviceDescriptor::HostMemorySizeBytes() const { return impl_->host_memory_size_bytes; }
std::shared_ptr<const TopologyDescriptor> NodeDeviceDescriptor::Topology() const {
return impl_->topology;
}
void NodeDeviceDescriptor::Serialize(std::string* serialized) const {
nlohmann::json json_object;
json_object[kJsonKeyHostMemorySize] = impl_->host_memory_size_bytes;
......@@ -67,6 +298,9 @@ void NodeDeviceDescriptor::Serialize(std::string* serialized) const {
{{kJsonKeyClassName, clz->Name()},
{kJsonKeySerializedDescriptorList, serialized_descriptor_list}});
}
std::string serialized_topology;
SerializeTopologyDescriptor(impl_->topology, &serialized_topology);
json_object[kJsonKeyTopology] = serialized_topology;
*serialized = json_object.dump();
}
......@@ -77,6 +311,11 @@ void NodeDeviceDescriptor::DumpSummary(const std::string& path) const {
CHECK(clz);
clz->DumpDeviceDescriptorListSummary(pair.second, JoinPath(classes_base, pair.first));
}
std::string serialized_topology;
SerializeTopologyDescriptor(impl_->topology, &serialized_topology);
if (!serialized_topology.empty()) {
TeePersistentLogStream::Create(JoinPath(path, "topology"))->Write(serialized_topology);
}
}
std::shared_ptr<const NodeDeviceDescriptor> NodeDeviceDescriptor::Query() {
......@@ -89,6 +328,7 @@ std::shared_ptr<const NodeDeviceDescriptor> NodeDeviceDescriptor::Query() {
desc->impl_->class_name2descriptor_list.emplace(descriptor_class->Name(),
descriptor_class->QueryDeviceDescriptorList());
}
desc->impl_->topology = QueryTopologyDescriptor();
return std::shared_ptr<const NodeDeviceDescriptor>(desc);
}
......@@ -107,6 +347,7 @@ std::shared_ptr<const NodeDeviceDescriptor> NodeDeviceDescriptor::Deserialize(
const auto descriptor_list = clz->DeserializeDeviceDescriptorList(serialized_descriptor_list);
desc->impl_->class_name2descriptor_list.emplace(class_name, descriptor_list);
}
desc->impl_->topology = DeserializeTopologyDescriptor(json_object[kJsonKeyTopology]);
return std::shared_ptr<const NodeDeviceDescriptor>(desc);
}
......
......@@ -17,6 +17,7 @@ limitations under the License.
#define ONEFLOW_CORE_DEVICE_NODE_DEVICE_DESCRIPTOR_H_
#include "oneflow/core/device/device_descriptor_list.h"
#include "oneflow/core/device/topology_descriptor.h"
namespace oneflow {
......@@ -29,7 +30,10 @@ class NodeDeviceDescriptor {
bool HasDeviceClass(const std::string& class_name) const;
std::shared_ptr<const DeviceDescriptorList> GetDeviceDescriptorList(
const std::string& class_name) const;
std::shared_ptr<const DeviceDescriptor> GetDevice(const std::string& class_name,
size_t ordinal) const;
size_t HostMemorySizeBytes() const;
std::shared_ptr<const TopologyDescriptor> Topology() const;
void Serialize(std::string* serialized) const;
void DumpSummary(const std::string& path) const;
......
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/device/topology_descriptor.h"
namespace oneflow {
namespace device {
void TopologyDescriptor::SetCPUAffinityByPCIBusID(const std::string& bus_id) const {
SetCPUAffinity(GetCPUAffinityByPCIBusID(bus_id));
}
void TopologyDescriptor::SetMemoryAffinityByPCIBusID(const std::string& bus_id) const {
SetMemoryAffinity(GetMemoryAffinityByPCIBusID(bus_id));
}
} // namespace device
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_CORE_DEVICE_TOPOLOGY_DESCRIPTOR_H_
#define ONEFLOW_CORE_DEVICE_TOPOLOGY_DESCRIPTOR_H_
#include <string>
#include <memory>
namespace oneflow {
namespace device {
class TopologyCPUAffinityDescriptor {
public:
virtual ~TopologyCPUAffinityDescriptor() = default;
};
class TopologyMemoryAffinityDescriptor {
public:
virtual ~TopologyMemoryAffinityDescriptor() = default;
};
class TopologyDescriptor {
public:
virtual ~TopologyDescriptor() = default;
virtual std::shared_ptr<const TopologyCPUAffinityDescriptor> GetCPUAffinity() const = 0;
virtual std::shared_ptr<const TopologyMemoryAffinityDescriptor> GetMemoryAffinity() const = 0;
virtual std::shared_ptr<const TopologyCPUAffinityDescriptor> GetCPUAffinityByPCIBusID(
const std::string& bus_id) const = 0;
virtual std::shared_ptr<const TopologyMemoryAffinityDescriptor> GetMemoryAffinityByPCIBusID(
const std::string& bus_id) const = 0;
virtual void SetCPUAffinity(
const std::shared_ptr<const TopologyCPUAffinityDescriptor>& affinity) const = 0;
virtual void SetMemoryAffinity(
const std::shared_ptr<const TopologyMemoryAffinityDescriptor>& affinity) const = 0;
virtual void SetCPUAffinityByPCIBusID(const std::string& bus_id) const;
virtual void SetMemoryAffinityByPCIBusID(const std::string& bus_id) const;
};
} // namespace device
} // namespace oneflow
#endif // ONEFLOW_CORE_DEVICE_TOPOLOGY_DESCRIPTOR_H_
......@@ -45,7 +45,6 @@ message Resource {
optional uint64 rdma_recv_msg_buf_mbyte = 10 [default = 6];
optional uint64 reserved_host_mem_mbyte = 12 [default = 500];
optional uint64 reserved_device_mem_mbyte = 13 [default = 500];
optional bool enable_numa_aware_cuda_malloc_host = 14 [default = false];
optional int32 compute_thread_pool_size = 15;
optional bool thread_enable_local_message_queue = 103 [default = false];
optional bool enable_thread_local_cache = 16 [default = true];
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment