diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h index 2a3b087323b2d9c6cf2913e558742893f021ed4f..a13aa7f725ae9f7e7e27619787de1bf68bd95d8b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h @@ -44,68 +44,40 @@ class StreamInterface { class GpuDeviceProperties { public: - GpuDeviceProperties() : initialized_(false), first_(true), device_properties_(nullptr) {} + static const GpuDeviceProperties& instance() { + static const GpuDeviceProperties& kInstance = *new GpuDeviceProperties(); - ~GpuDeviceProperties() { - if (device_properties_) { - delete[] device_properties_; - } + return kInstance; } EIGEN_STRONG_INLINE const gpuDeviceProp_t& get(int device) const { return device_properties_[device]; } - EIGEN_STRONG_INLINE bool isInitialized() const { return initialized_; } - - void initialize() { - if (!initialized_) { - // Attempts to ensure proper behavior in the case of multiple threads - // calling this function simultaneously. This would be trivial to - // implement if we could use std::mutex, but unfortunately mutex don't - // compile with nvcc, so we resort to atomics and thread fences instead. - // Note that if the caller uses a compiler that doesn't support c++11 we - // can't ensure that the initialization is thread safe. - if (first_.exchange(false)) { - // We're the first thread to reach this point. - int num_devices; - gpuError_t status = gpuGetDeviceCount(&num_devices); - if (status != gpuSuccess) { - std::cerr << "Failed to get the number of GPU devices: " << gpuGetErrorString(status) << std::endl; - gpu_assert(status == gpuSuccess); - } - device_properties_ = new gpuDeviceProp_t[num_devices]; - for (int i = 0; i < num_devices; ++i) { - status = gpuGetDeviceProperties(&device_properties_[i], i); - if (status != gpuSuccess) { - std::cerr << "Failed to initialize GPU device #" << i << ": " << gpuGetErrorString(status) << std::endl; - gpu_assert(status == gpuSuccess); - } - } - - std::atomic_thread_fence(std::memory_order_release); - initialized_ = true; - } else { - // Wait for the other thread to inititialize the properties. - while (!initialized_) { - std::atomic_thread_fence(std::memory_order_acquire); - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - } + private: + GpuDeviceProperties() = default; + + static std::vector GetDeviceProperties() { + int num_devices = 0; + gpuError_t status = gpuGetDeviceCount(&num_devices); + if (status != gpuSuccess) { + std::cerr << "Failed to get the number of GPU devices: " << gpuGetErrorString(status) << std::endl; + gpu_assert(status == gpuSuccess); + } + std::vector device_properties(num_devices); + for (int i = 0; i < num_devices; ++i) { + status = gpuGetDeviceProperties(&device_properties_[i], i); + if (status != gpuSuccess) { + std::cerr << "Failed to initialize GPU device #" << i << ": " << gpuGetErrorString(status) << std::endl; + gpu_assert(status == gpuSuccess); } } + + return device_properties; } - private: - volatile bool initialized_; - std::atomic first_; - gpuDeviceProp_t* device_properties_; + std::vector device_properties_ = GetDeviceProperties(); }; -EIGEN_ALWAYS_INLINE const GpuDeviceProperties& GetGpuDeviceProperties() { - static GpuDeviceProperties* deviceProperties = new GpuDeviceProperties(); - if (!deviceProperties->isInitialized()) { - deviceProperties->initialize(); - } - return *deviceProperties; -} +EIGEN_ALWAYS_INLINE const GpuDeviceProperties& GetGpuDeviceProperties() { return GpuDeviceProperties::instance(); } EIGEN_ALWAYS_INLINE const gpuDeviceProp_t& GetGpuDeviceProperties(int device) { return GetGpuDeviceProperties().get(device);