From 6ed717d2a7e9cdcf570ea485e0fc84980bf94161 Mon Sep 17 00:00:00 2001 From: Chiming Zhang Date: Mon, 25 Mar 2024 01:19:32 -0700 Subject: [PATCH] VITIS-11112 HIP Binding: Memory Management. (#7983) * Initial implementation of some HIP memory APIs. (#11) Signed-off-by: Chiming Zhang * Add hipMemset(). (#12) Signed-off-by: Chiming Zhang * minor fix to hipHostMalloc() (#13) Signed-off-by: Chiming Zhang * Fix the typo. (#14) Signed-off-by: Chiming Zhang * fix some issues from code review. Signed-off-by: Chiming Zhang * fix minor error in error.cpp. Signed-off-by: Chiming Zhang * remove new operation from memory_database::GetInstance(). Signed-off-by: Chiming Zhang * Fix some issues from code review. Signed-off-by: Chiming Zhang * change class name error_state to error. Signed-off-by: Chiming Zhang * remove some duplicate code. Signed-off-by: Chiming Zhang * fix some issues. Signed-off-by: Chiming Zhang * fix some issues. Signed-off-by: Chiming Zhang * fix more issues. Signed-off-by: Chiming Zhang * fix more issues. Signed-off-by: Chiming Zhang --------- Signed-off-by: Chiming Zhang --- src/runtime_src/hip/api/CMakeLists.txt | 1 + src/runtime_src/hip/api/hip_error.cpp | 117 ++++++++ src/runtime_src/hip/api/hip_memory.cpp | 256 ++++++++++------ src/runtime_src/hip/core/CMakeLists.txt | 1 + src/runtime_src/hip/core/error.cpp | 141 +++++++++ src/runtime_src/hip/core/error.h | 48 +++ src/runtime_src/hip/core/memory.cpp | 377 +++++++++++++++++++++++- src/runtime_src/hip/core/memory.h | 238 ++++++++++++++- 8 files changed, 1083 insertions(+), 96 deletions(-) create mode 100644 src/runtime_src/hip/api/hip_error.cpp create mode 100644 src/runtime_src/hip/core/error.cpp create mode 100644 src/runtime_src/hip/core/error.h diff --git a/src/runtime_src/hip/api/CMakeLists.txt b/src/runtime_src/hip/api/CMakeLists.txt index c18e74a6846..36e3c32f5e6 100644 --- a/src/runtime_src/hip/api/CMakeLists.txt +++ b/src/runtime_src/hip/api/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(hip_api_library_objects OBJECT hip_stream.cpp hip_memory.cpp hip_event.cpp + hip_error.cpp ) target_include_directories(hip_api_library_objects diff --git a/src/runtime_src/hip/api/hip_error.cpp b/src/runtime_src/hip/api/hip_error.cpp new file mode 100644 index 00000000000..ac3c8d435f3 --- /dev/null +++ b/src/runtime_src/hip/api/hip_error.cpp @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Device, Inc. All rights reserved. + +#include "core/common/error.h" +#include "hip/config.h" +#include "hip/hip_runtime_api.h" +#include "hip/core/device.h" +#include "hip/core/error.h" +#include + +namespace xrt::core::hip +{ + static hipError_t + hip_peek_last_error() + { + return error::instance().peek_last_error(); + } + + static hipError_t + hip_get_last_error() + { + hipError_t last_error = error::instance().peek_last_error(); + error::instance().reset_last_error(); + return last_error; + } + +} // xrt::core::hip + +// Return hip error as text string form. +hipError_t +hipDrvGetErrorName(hipError_t hipError, + const char **errorName) +{ + try { + *errorName = xrt::core::hip::error::get_error_name(hipError); + return hipSuccess; + } catch (const std::exception &ex) { + xrt_core::send_exception_message(ex.what()); + } + return hipErrorInvalidValue; +} + +// Return handy text string message to explain the error which occurred. +hipError_t +hipDrvGetErrorString(hipError_t hipError, + const char **errorString) +{ + try { + // TODO: return more detailed error string instead of error name + *errorString = xrt::core::hip::error::get_error_name(hipError); + return hipSuccess; + } catch (const std::exception &ex) { + xrt_core::send_exception_message(ex.what()); + } + return hipErrorInvalidValue; +} + +// Return handy text string message to explain the error which occurred. +const char * +hipGetErrorString(hipError_t hipError) +{ + const char *error_string = nullptr; + try { + // TODO: return more detailed error string instead of error name + error_string = xrt::core::hip::error::get_error_name(hipError); + } catch (const std::exception &ex) { + xrt_core::send_exception_message(ex.what()); + } + return error_string; +} + +// Return hip error as text string form. +const char * +hipGetErrorName(hipError_t hipError) +{ + const char *error_name = nullptr; + try { + error_name = xrt::core::hip::error::get_error_name(hipError); + } catch (const std::exception &ex) { + xrt_core::send_exception_message(ex.what()); + } + return error_name; +} + +template hipError_t +handle_hip_error_error(F && f) +{ + hipError_t last_error = hipSuccess; + try { + return f(); + } catch (const std::exception &ex) { + xrt_core::send_exception_message(ex.what()); + } + return last_error; +} + +// return last error returned by any HIP API call and resets the stored error code +hipError_t +hipExtGetLastError() +{ + return hipGetLastError(); +} + +// return last error returned by any HIP API call and resets the stored error code +hipError_t +hipGetLastError(void) +{ + return handle_hip_error_error([&] { return xrt::core::hip::hip_get_last_error(); }); +} + +// Return last error returned by any HIP runtime API call. +hipError_t hipPeekAtLastError() +{ + return handle_hip_error_error([&] { return xrt::core::hip::hip_peek_last_error(); }); +} + +//hipError_t handle_hip_error([&] { xrt::core::hip::hipMemCpy(dst, src, sizeBytes, kind); }); diff --git a/src/runtime_src/hip/api/hip_memory.cpp b/src/runtime_src/hip/api/hip_memory.cpp index 0a038b46b26..0ccea53ee46 100644 --- a/src/runtime_src/hip/api/hip_memory.cpp +++ b/src/runtime_src/hip/api/hip_memory.cpp @@ -3,6 +3,7 @@ #include #include "core/common/error.h" +#include "core/common/unistd.h" #include "hip/config.h" #include "hip/core/device.h" #include "hip/core/memory.h" @@ -10,183 +11,262 @@ namespace xrt::core::hip { + static std::shared_ptr + get_current_device() + { + // TODO: get REAL current hip device + auto dev = device_cache.get(0); + if (dev == nullptr) + { + if (hipInit(0) != hipSuccess) + { + throw std::runtime_error("hipInit() failed!"); + } + dev = device_cache.get(0); + } + return dev; + } // Allocate memory on the device. static void - hip_malloc(void **ptr, size_t size) + hip_malloc(void** ptr, size_t size) { - throw std::runtime_error("Not implemented"); + assert(ptr); + assert(size > 0); + + auto dev = get_current_device(); + assert(dev); + + auto hip_mem = std::make_shared(dev, size); + auto dev_addr = hip_mem->get_addr(address_type::hip_address_type_device); + if (dev_addr != 0) + { + memory_database::instance().insert_addr(address_type::hip_address_type_device, reinterpret_cast(dev_addr), size, hip_mem); + *ptr = reinterpret_cast(dev_addr); + return; + } + auto host_addr = hip_mem->get_addr(address_type::hip_address_type_host); + memory_database::instance().insert_addr(address_type::hip_address_type_host, reinterpret_cast(host_addr), size, hip_mem); + *ptr = host_addr; } // Allocates device accessible host memory. static void - hip_host_malloc(void **ptr, size_t size, unsigned int flags) + hip_host_malloc(void** ptr, size_t size, unsigned int flags) { - throw std::runtime_error("Not implemented"); + assert(ptr); + assert(size > 0); + + auto dev = get_current_device(); + assert(dev); + + auto hip_mem = std::make_shared(dev, size, flags); + auto host_addr = hip_mem->get_addr(address_type::hip_address_type_host); + memory_database::instance().insert_addr(address_type::hip_address_type_host, reinterpret_cast(host_addr), size, hip_mem); + *ptr = host_addr; } // Free memory allocated by the hipHostMalloc(). static void - hip_host_free(void *ptr) + hip_host_free(void* ptr) { - throw std::runtime_error("Not implemented"); + memory_database::instance().delete_addr(reinterpret_cast(ptr)); } // Free memory allocated by the hipMalloc(). static void - hip_free(void *ptr) + hip_free(void* ptr) { - throw std::runtime_error("Not implemented"); + memory_database::instance().delete_addr(reinterpret_cast(ptr)); } // Register host memory so it can be accessed from the current device. static void - hip_host_register(void *hostPtr, size_t sizeBytes, unsigned int flags) + hip_host_register(void* hostPtr, size_t size, unsigned int flags) { - throw std::runtime_error("Not implemented"); + auto dev = get_current_device(); + assert(dev); + + auto hip_mem = std::make_shared(dev, size, hostPtr, flags); + auto host_addr = hip_mem->get_addr(address_type::hip_address_type_host); + memory_database::instance().insert_addr(address_type::hip_address_type_host, reinterpret_cast(host_addr), size, hip_mem); } // Un-register host pointer. static void - hip_host_unregister(void *hostPtr) + hip_host_unregister(void* hostPtr) { - throw std::runtime_error("Not implemented"); + memory_database::instance().delete_addr(reinterpret_cast(hostPtr)); } // Get Device pointer from Host Pointer allocated through hipHostMalloc(). static void - hip_host_get_device_pointer(void **devPtr, void *hstPtr, unsigned int flags) + hip_host_get_device_pointer(void** devPtr, void* hstPtr, unsigned int flags) + { + assert(devPtr); + + *devPtr = nullptr; + auto hip_mem = memory_database::instance().get_hip_mem_from_host_addr(hstPtr); + if (hip_mem != nullptr) + { + *devPtr = hip_mem->get_addr(address_type::hip_address_type_device); + } + } + + static void + hip_memcpy_host2device(void* dst, const void* src, size_t sizeBytes) + { + auto hip_mem = memory_database::instance().get_hip_mem_from_addr(dst); + hip_mem->copy_from(src, sizeBytes); + } + + static void + hip_memcpy_host2host(void* dst, const void* src, size_t sizeBytes) + { + memcpy(dst, src, sizeBytes); + } + + static void + hip_memcpy_device2host(void* dst, const void* src, size_t sizeBytes) + { + auto hip_mem = memory_database::instance().get_hip_mem_from_addr(src); + hip_mem->copy_to(dst, sizeBytes); + } + + static void + hip_memcpy_device2device(void* dst, const void* src, size_t sizeBytes) { - throw std::runtime_error("Not implemented"); + auto hip_mem_src = memory_database::instance().get_hip_mem_from_addr(src); + auto hip_mem_dst = memory_database::instance().get_hip_mem_from_addr(dst); + + hip_mem_dst->copy_from(hip_mem_src.get(), sizeBytes); } // Copy data from src to dst. static void - hip_memcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) + hip_memcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) + { + switch (kind) + { + case hipMemcpyHostToDevice: + hip_memcpy_host2device(dst, src, sizeBytes); + break; + + case hipMemcpyDeviceToHost: + hip_memcpy_device2host(dst, src, sizeBytes); + break; + + case hipMemcpyDeviceToDevice: + hip_memcpy_device2device(dst, src, sizeBytes); + break; + + case hipMemcpyHostToHost: + hip_memcpy_host2host(dst, src, sizeBytes); + break; + + default: + break; + }; + } + + // fill data to dst. + static void + hip_memset(void* dst, int value, size_t sizeBytes) { - throw std::runtime_error("Not implemented"); + auto hip_mem = memory_database::instance().get_hip_mem_from_addr(dst); + assert(hip_mem->get_type() != xrt::core::hip::memory_type::hip_memory_type_invalid); + + auto host_src = aligned_alloc(xrt_core::getpagesize(), sizeBytes); + memset(host_src, value, sizeBytes); + + hip_mem->copy_from(host_src, sizeBytes); + + free(host_src); } } // xrt::core::hip +template hipError_t +handle_hip_memory_error(F && f) +{ + try { + f(); + return hipSuccess; + } catch (const std::exception &ex) { + xrt_core::send_exception_message(ex.what()); + } + return hipErrorUnknown; +} + // Allocate memory on the device. hipError_t -hipMalloc(void **ptr, size_t size) +hipMalloc(void** ptr, size_t size) { if (size == 0) { *ptr = nullptr; return hipSuccess; } - try { - xrt::core::hip::hip_malloc(ptr, size); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_malloc(ptr, size); }); } // Allocates device accessible host memory. hipError_t -hipHostMalloc(void **ptr, size_t size, unsigned int flags) +hipHostMalloc(void** ptr, size_t size, unsigned int flags) { if (size == 0) { *ptr = nullptr; return hipSuccess; } - try { - xrt::core::hip::hip_host_malloc(ptr, size, flags); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_host_malloc(ptr, size, flags); }); } // Free memory allocated by the hipHostMalloc(). hipError_t -hipHostFree(void *ptr) +hipHostFree(void* ptr) { - try { - xrt::core::hip::hip_host_free(ptr); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_host_free(ptr); }); } // Free memory allocated by the hipMalloc(). hipError_t -hipFree(void *ptr) +hipFree(void* ptr) { - try { - xrt::core::hip::hip_free(ptr); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_free(ptr); }); } // Register host memory so it can be accessed from the current device. hipError_t -hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) +hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { - try { - xrt::core::hip::hip_host_register(hostPtr, sizeBytes, flags); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_host_register(hostPtr, sizeBytes, flags); }); } // Un-register host pointer. hipError_t -hipHostUnregister(void *hostPtr) +hipHostUnregister(void* hostPtr) { - try { - xrt::core::hip::hip_host_unregister(hostPtr); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_host_unregister(hostPtr); }); } // Get Device pointer from Host Pointer allocated through hipHostMalloc. hipError_t -hipHostGetDevicePointer(void **devPtr, void *hstPtr, unsigned int flags) +hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags) { - try { - xrt::core::hip::hip_host_get_device_pointer(devPtr, hstPtr, flags); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_host_get_device_pointer(devPtr, hstPtr, flags); }); } // Copy data from src to dst. hipError_t -hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) +hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { - try { - xrt::core::hip::hip_memcpy(dst, src, sizeBytes, kind); - return hipSuccess; - } - catch (const std::exception &ex) { - xrt_core::send_exception_message(ex.what()); - } - return hipErrorUnknown; + return handle_hip_memory_error([&] { xrt::core::hip::hip_memcpy(dst, src, sizeBytes, kind); }); +} + +// Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant byte value value. +hipError_t +hipMemset(void* dst, int value, size_t sizeBytes) +{ + return handle_hip_memory_error([&] { xrt::core::hip::hip_memset(dst, value, sizeBytes); }); } diff --git a/src/runtime_src/hip/core/CMakeLists.txt b/src/runtime_src/hip/core/CMakeLists.txt index 53e51317f54..2abc780f812 100644 --- a/src/runtime_src/hip/core/CMakeLists.txt +++ b/src/runtime_src/hip/core/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(hip_core_library_objects OBJECT memory.cpp module.cpp stream.cpp + error.cpp ) target_include_directories(hip_core_library_objects diff --git a/src/runtime_src/hip/core/error.cpp b/src/runtime_src/hip/core/error.cpp new file mode 100644 index 00000000000..714c3a5549f --- /dev/null +++ b/src/runtime_src/hip/core/error.cpp @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Device, Inc. All rights reserved. +#include "hip/config.h" +#include "hip/hip_runtime_api.h" +#include "error.h" + +#define HIP_ERROR_NAME_PAIR(x) \ + { \ + x, #x \ + } + +namespace xrt::core::hip +{ + + thread_local static error* hip_error_state = nullptr; + + error::error() + : m_last_error(hipSuccess) + { + if (hip_error_state) + { + throw std::runtime_error + ("Multiple instances of hip error detected, only one per thread\n" + "can be loaded at any given time."); + } + hip_error_state = this; + } + + error& + error::instance() + { + if (!hip_error_state) + { + thread_local static error err_st; + } + + if (hip_error_state) + { + return *hip_error_state; + } + + throw std::runtime_error("error singleton is not loaded"); + } + + static std::map hip_error_names = + { + HIP_ERROR_NAME_PAIR(hipSuccess), + HIP_ERROR_NAME_PAIR(hipErrorInvalidValue), + HIP_ERROR_NAME_PAIR(hipErrorOutOfMemory), + HIP_ERROR_NAME_PAIR(hipErrorMemoryAllocation), + HIP_ERROR_NAME_PAIR(hipErrorNotInitialized), + HIP_ERROR_NAME_PAIR(hipErrorInitializationError), + HIP_ERROR_NAME_PAIR(hipErrorDeinitialized), + HIP_ERROR_NAME_PAIR(hipErrorProfilerDisabled), + HIP_ERROR_NAME_PAIR(hipErrorProfilerNotInitialized), + HIP_ERROR_NAME_PAIR(hipErrorProfilerAlreadyStarted), + HIP_ERROR_NAME_PAIR(hipErrorProfilerAlreadyStopped), + HIP_ERROR_NAME_PAIR(hipErrorInvalidConfiguration), + HIP_ERROR_NAME_PAIR(hipErrorInvalidPitchValue), + HIP_ERROR_NAME_PAIR(hipErrorInvalidSymbol), + HIP_ERROR_NAME_PAIR(hipErrorInvalidDevicePointer), + HIP_ERROR_NAME_PAIR(hipErrorInvalidMemcpyDirection), + HIP_ERROR_NAME_PAIR(hipErrorInsufficientDriver), + HIP_ERROR_NAME_PAIR(hipErrorMissingConfiguration), + HIP_ERROR_NAME_PAIR(hipErrorPriorLaunchFailure), + HIP_ERROR_NAME_PAIR(hipErrorInvalidDeviceFunction), + HIP_ERROR_NAME_PAIR(hipErrorNoDevice), + HIP_ERROR_NAME_PAIR(hipErrorInvalidDevice), + HIP_ERROR_NAME_PAIR(hipErrorInvalidImage), + HIP_ERROR_NAME_PAIR(hipErrorInvalidContext), + HIP_ERROR_NAME_PAIR(hipErrorContextAlreadyCurrent), + HIP_ERROR_NAME_PAIR(hipErrorMapFailed), + HIP_ERROR_NAME_PAIR(hipErrorMapBufferObjectFailed), + HIP_ERROR_NAME_PAIR(hipErrorUnmapFailed), + HIP_ERROR_NAME_PAIR(hipErrorArrayIsMapped), + HIP_ERROR_NAME_PAIR(hipErrorAlreadyMapped), + HIP_ERROR_NAME_PAIR(hipErrorNoBinaryForGpu), + HIP_ERROR_NAME_PAIR(hipErrorAlreadyAcquired), + HIP_ERROR_NAME_PAIR(hipErrorNotMapped), + HIP_ERROR_NAME_PAIR(hipErrorNotMappedAsArray), + HIP_ERROR_NAME_PAIR(hipErrorNotMappedAsPointer), + HIP_ERROR_NAME_PAIR(hipErrorECCNotCorrectable), + HIP_ERROR_NAME_PAIR(hipErrorUnsupportedLimit), + HIP_ERROR_NAME_PAIR(hipErrorContextAlreadyInUse), + HIP_ERROR_NAME_PAIR(hipErrorPeerAccessUnsupported), + HIP_ERROR_NAME_PAIR(hipErrorInvalidKernelFile), + HIP_ERROR_NAME_PAIR(hipErrorInvalidGraphicsContext), + HIP_ERROR_NAME_PAIR(hipErrorInvalidSource), + HIP_ERROR_NAME_PAIR(hipErrorFileNotFound), + HIP_ERROR_NAME_PAIR(hipErrorSharedObjectSymbolNotFound), + HIP_ERROR_NAME_PAIR(hipErrorSharedObjectInitFailed), + HIP_ERROR_NAME_PAIR(hipErrorOperatingSystem), + HIP_ERROR_NAME_PAIR(hipErrorInvalidHandle), + HIP_ERROR_NAME_PAIR(hipErrorInvalidResourceHandle), + HIP_ERROR_NAME_PAIR(hipErrorIllegalState), + HIP_ERROR_NAME_PAIR(hipErrorNotFound), + HIP_ERROR_NAME_PAIR(hipErrorNotReady), + HIP_ERROR_NAME_PAIR(hipErrorIllegalAddress), + HIP_ERROR_NAME_PAIR(hipErrorLaunchOutOfResources), + HIP_ERROR_NAME_PAIR(hipErrorLaunchTimeOut), + HIP_ERROR_NAME_PAIR(hipErrorPeerAccessAlreadyEnabled), + HIP_ERROR_NAME_PAIR(hipErrorPeerAccessNotEnabled), + HIP_ERROR_NAME_PAIR(hipErrorSetOnActiveProcess), + HIP_ERROR_NAME_PAIR(hipErrorContextIsDestroyed), + HIP_ERROR_NAME_PAIR(hipErrorAssert), + HIP_ERROR_NAME_PAIR(hipErrorHostMemoryAlreadyRegistered), + HIP_ERROR_NAME_PAIR(hipErrorHostMemoryNotRegistered), + HIP_ERROR_NAME_PAIR(hipErrorLaunchFailure), + HIP_ERROR_NAME_PAIR(hipErrorCooperativeLaunchTooLarge), + HIP_ERROR_NAME_PAIR(hipErrorNotSupported), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureUnsupported), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureInvalidated), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureMerge), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureUnmatched), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureUnjoined), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureIsolation), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureImplicit), + HIP_ERROR_NAME_PAIR(hipErrorCapturedEvent), + HIP_ERROR_NAME_PAIR(hipErrorStreamCaptureWrongThread), + HIP_ERROR_NAME_PAIR(hipErrorGraphExecUpdateFailure), + HIP_ERROR_NAME_PAIR(hipErrorUnknown), + HIP_ERROR_NAME_PAIR(hipErrorRuntimeMemory), + HIP_ERROR_NAME_PAIR(hipErrorRuntimeOther), + HIP_ERROR_NAME_PAIR(hipErrorTbd), + }; + + const char * + error::get_error_name(hipError_t err) + { + const char *error_name = nullptr; + + auto itr = hip_error_names.find(err); + if (itr != hip_error_names.end()) + { + error_name = itr->second.c_str(); + } + + return error_name; + } + +} diff --git a/src/runtime_src/hip/core/error.h b/src/runtime_src/hip/core/error.h new file mode 100644 index 00000000000..60d3a763c65 --- /dev/null +++ b/src/runtime_src/hip/core/error.h @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Device, Inc. All rights reserved. +#ifndef xrthip_error_h +#define xrthip_error_h + +#include "core/common/device.h" +#include "core/common/api/bo.h" +#include "xrt/config.h" +#include "xrt/device/hal.h" +#include "xrt/util/range.h" + +namespace xrt::core::hip +{ + class error + { + public: + static error& + instance(); + + static const char* + get_error_name(hipError_t err); + + hipError_t + peek_last_error() + { + return m_last_error; + } + + void + reset_last_error() + { + m_last_error = hipSuccess; + } + + void + set_last_error(hipError_t err) + { + m_last_error = err; + } + + protected: + error(); + + private: + hipError_t m_last_error; + }; // class error +} +#endif // xrthip_error_h \ No newline at end of file diff --git a/src/runtime_src/hip/core/memory.cpp b/src/runtime_src/hip/core/memory.cpp index 4f6903bd14e..34ca2f4dbe4 100644 --- a/src/runtime_src/hip/core/memory.cpp +++ b/src/runtime_src/hip/core/memory.cpp @@ -1,11 +1,380 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2023 Advanced Micro Device, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Device, Inc. All rights reserved. +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +#include "core/common/unistd.h" #include "device.h" #include "memory.h" +#include "hip/config.h" +#include "hip/hip_runtime_api.h" + +namespace xrt::core::hip +{ + + memory::memory(std::shared_ptr dev, size_t sz) + : m_device(std::move(dev)), m_size(sz), m_type(memory_type::hip_memory_type_device), m_hip_flags(0), m_host_mem(nullptr), m_bo(nullptr), m_sync_host_mem_required(false) + { + assert(m_device); + + // TODO: support non-npu device that may require delayed xrt::bo allocation until xrt kernel is created + init_xrt_bo(); + } + + memory::memory(std::shared_ptr dev, size_t sz, unsigned int flags) + : m_device(std::move(dev)), m_size(sz), m_type(memory_type::hip_memory_type_host), m_hip_flags(flags), m_host_mem(nullptr), m_bo(nullptr), m_sync_host_mem_required(false) + { + assert(m_device); + + switch (m_hip_flags) + { + case hipHostMallocDefault: + case hipHostMallocPortable: + // allocate pinned memory on host only, xrt::bo object will not be allocated. + m_host_mem = reinterpret_cast(aligned_alloc(xrt_core::getpagesize(), m_size)); + assert(m_host_mem); + lock_pages(m_host_mem, m_size); + break; + + case hipHostMallocMapped: + init_xrt_bo(); + m_host_mem = reinterpret_cast(m_bo->map()); + break; + + case hipHostMallocWriteCombined: + init_xrt_bo(); + m_host_mem = reinterpret_cast(m_bo->map()); + lock_pages(m_host_mem, m_size); + break; + + default: + break; + } + } + + void + memory::lock_pages(void *addr, size_t size) + { +#ifdef _WIN32 + VirtualLock(addr, size); +#else + mlock(addr, size); +#endif + } + + void + memory::init_xrt_bo() + { + auto xrt_device = m_device->get_xrt_device(); + m_bo = std::make_shared(xrt_device, m_size); + } + + void + memory::validate() + { + // validate() is requied only on non-npu device that require delayed xrt::bo allocation until xrt kernel is created + assert(m_type == memory_type::hip_memory_type_device); + + if (m_bo == nullptr) + { + auto xrt_device = m_device->get_xrt_device(); + m_bo = std::make_shared(xrt_device, m_size, XRT_BO_FLAGS_HOST_ONLY, m_group); + + if (m_sync_host_mem_required == true) + { + m_bo->write(m_host_mem, m_size, 0); + m_bo->sync(XCL_BO_SYNC_BO_TO_DEVICE); + m_sync_host_mem_required = false; + } + } + } + + void + memory::sync(sync_direction drtn) + { + assert(m_bo); + + if (m_sync_host_mem_required) + { + switch (drtn) + { + case sync_direction::sync_from_host_to_device: + m_bo->write(m_host_mem, m_size, 0); + m_bo->sync(XCL_BO_SYNC_BO_TO_DEVICE); + break; + + case sync_direction::sync_from_device_to_host: + m_bo->sync(XCL_BO_SYNC_BO_FROM_DEVICE); + m_bo->read(m_host_mem, m_size, 0); + break; + + default: + break; + }; + } + } + + void + memory::free_mem() + { + if (m_type != memory_type::hip_memory_type_registered && + m_host_mem != nullptr) + { + free(m_host_mem); + } + } + + void* + memory::get_addr(address_type type) + { + switch (type) + { + case address_type::hip_address_type_device: + return get_device_addr(); + break; + + case address_type::hip_address_type_host: + return get_host_addr(); + break; + + default: + assert(0); + break; + }; + return nullptr; + } + + void* + memory::get_device_addr() + { + if (m_bo != nullptr) + { + return reinterpret_cast(m_bo->address()); + } + return nullptr; + } + + void + memory::copy_from(const xrt::core::hip::memory *src, size_t size, size_t src_offset, size_t offset) + { + auto src_bo = src->get_xrt_bo(); + assert(src_bo); + if (m_bo != nullptr) + { + m_bo->copy(*src_bo, size, src_offset, offset); + m_bo->sync(XCL_BO_SYNC_BO_TO_DEVICE); + } + else + { + src->copy_to(m_host_mem, size, offset, src_offset); + m_sync_host_mem_required = true; + } + } + + void + memory::copy_from(const void *host_src, size_t size, size_t src_offset, size_t offset) + { + auto src_hip_mem = memory_database::instance().get_hip_mem_from_host_addr(host_src); + if (src_hip_mem != nullptr && + src_hip_mem->get_type() == memory_type::hip_memory_type_host) + { + // pinned hip mem + assert(src_hip_mem->get_hip_flags() == hipHostMallocDefault || src_hip_mem->get_hip_flags() == hipHostMallocPortable); + + // TODO: get better performance by avoiding two step copy in case of copying from pinned host mem + } + + const unsigned char *src_ptr = reinterpret_cast(host_src); + src_ptr += src_offset; + if (m_bo != nullptr) + { + m_bo->write(src_ptr, size, offset); + m_bo->sync(XCL_BO_SYNC_BO_TO_DEVICE); + } + else + { + memcpy(m_host_mem, src_ptr, size); + m_sync_host_mem_required = true; + } + } + + void + memory::copy_to(void *host_dst, size_t size, size_t dst_offset, size_t offset) const + { + unsigned char *dst_ptr = reinterpret_cast(host_dst); + dst_ptr += dst_offset; + if (m_bo != nullptr) + { + m_bo->sync(XCL_BO_SYNC_BO_FROM_DEVICE); + m_bo->read(dst_ptr, size, offset); + } + else + { + memcpy(dst_ptr, m_host_mem, size); + } + } + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + memory_database* memory_database::m_memory_database = nullptr; + + memory_database& memory_database::instance() + { + if (!m_memory_database) + { + static memory_database mem_db; + } + return *m_memory_database; + } + + memory_database::memory_database() + : m_hostAddrMap(), m_devAddrMap() + { + if (m_memory_database) + { + throw std::runtime_error + ("Multiple instances of hip memory_database detected, only one\n" + "can be loaded at any given time."); + } + m_memory_database = this; + } + + memory_database::~memory_database() + { + m_hostAddrMap.clear(); + m_devAddrMap.clear(); + } + + void + memory_database::insert_host_addr(void *host_addr, size_t size, std::shared_ptr hip_mem) + { + m_hostAddrMap.insert({address_range_key(reinterpret_cast(host_addr), size), hip_mem}); + } + + void + memory_database::delete_host_addr(void *host_addr) + { + m_hostAddrMap.erase(address_range_key(reinterpret_cast(host_addr), 0)); + } + + void + memory_database::insert_device_addr(uint64_t dev_addr, size_t size, std::shared_ptr hip_mem) + { + m_devAddrMap.insert({address_range_key(dev_addr, size), hip_mem}); + } + + void + memory_database::delete_device_addr(uint64_t dev_addr) + { + m_devAddrMap.erase(address_range_key(dev_addr, 0)); + } + + void + memory_database::insert_addr(address_type type, uint64_t addr, size_t size, std::shared_ptr hip_mem) + { + switch (type) + { + case address_type::hip_address_type_device: + m_devAddrMap.insert({address_range_key(addr, size), hip_mem}); + break; + case address_type::hip_address_type_host: + m_hostAddrMap.insert({address_range_key(addr, size), hip_mem}); + break; + + default: + break; + }; + } + + void + memory_database::delete_addr(uint64_t addr) + { + m_devAddrMap.erase(address_range_key(addr, 0)); + m_hostAddrMap.erase(address_range_key(addr, 0)); + } + + std::shared_ptr + memory_database::get_hip_mem_from_host_addr(void *host_addr) + { + auto itr = m_hostAddrMap.find(address_range_key(reinterpret_cast(host_addr), 0)); + if (itr == m_hostAddrMap.end()) + { + return nullptr; + } + else + { + return itr->second; + } + } + + std::shared_ptr + memory_database::get_hip_mem_from_host_addr(const void *host_addr) + { + auto itr = m_hostAddrMap.find(address_range_key(reinterpret_cast(host_addr), 0)); + if (itr == m_hostAddrMap.end()) + { + return nullptr; + } + else + { + return itr->second; + } + } + + std::shared_ptr + memory_database::get_hip_mem_from_device_addr(void *dev_addr) + { + auto itr = m_devAddrMap.find(address_range_key(reinterpret_cast(dev_addr), 0)); + if (itr == m_devAddrMap.end()) + { + return nullptr; + } + else + { + return itr->second; + } + } + + std::shared_ptr + memory_database::get_hip_mem_from_device_addr(const void *dev_addr) + { + auto itr = m_devAddrMap.find(address_range_key(reinterpret_cast(dev_addr), 0)); + if (itr == m_devAddrMap.end()) + { + return nullptr; + } + else + { + return itr->second; + } + } -namespace xrt::core::hip { + std::shared_ptr + memory_database::get_hip_mem_from_addr(void *addr) + { + auto hip_mem = get_hip_mem_from_device_addr(addr); + if (hip_mem != nullptr) + { + return hip_mem; + } + hip_mem = get_hip_mem_from_host_addr(addr); + return hip_mem; + } -// Implementation + std::shared_ptr + memory_database::get_hip_mem_from_addr(const void *addr) + { + auto hip_mem = get_hip_mem_from_device_addr(addr); + if (hip_mem != nullptr) + { + return hip_mem; + } + hip_mem = get_hip_mem_from_host_addr(addr); + return hip_mem; + } -} //namespace xrt::core::hip +} // namespace xrt::core::hip diff --git a/src/runtime_src/hip/core/memory.h b/src/runtime_src/hip/core/memory.h index 44c61a45e79..38c5d293545 100644 --- a/src/runtime_src/hip/core/memory.h +++ b/src/runtime_src/hip/core/memory.h @@ -1,17 +1,247 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2023 Advanced Micro Device, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Device, Inc. All rights reserved. #ifndef xrthip_memory_h #define xrthip_memory_h +#include "core/common/device.h" +#include "experimental/xrt_bo.h" +#include "experimental/xrt_ext.h" +#include "device.h" +#include "xrt/config.h" +#include "xrt/device/hal.h" +#include "xrt/util/range.h" -namespace xrt::core::hip { +namespace xrt::core::hip +{ + enum class memory_type : int + { + hip_memory_type_host = 0, + hip_memory_type_device, + hip_memory_type_managed, + hip_memory_type_registered, + hip_memory_type_invalid + }; + + enum class address_type : int + { + hip_address_type_host = 0, + hip_address_type_device + }; + + enum class sync_direction : int + { + sync_from_host_to_device = 0, + sync_from_device_to_host + }; + + class memory + { + + public: + memory(std::shared_ptr dev) + : m_device(std::move(dev)), m_size(0), m_type(memory_type::hip_memory_type_invalid), m_hip_flags(0), m_host_mem(nullptr), m_bo(nullptr), m_sync_host_mem_required(false) + { + assert(m_device); + init_xrt_bo(); + } + + memory(std::shared_ptr dev, size_t sz); + + memory(std::shared_ptr dev, size_t sz, unsigned int flags); + + // construct from user host buffer + memory(std::shared_ptr dev, size_t sz, void *host_mem, unsigned int flags) + : m_device(std::move(dev)), m_size(sz), m_type(memory_type::hip_memory_type_registered), m_hip_flags(flags), m_host_mem(reinterpret_cast(host_mem)), m_bo(nullptr), m_sync_host_mem_required(true) + { + assert(m_device); + + // user ptr BO is not supported on NPU Linux driver, hence sync between host_mem and internal xrt::bo object is required before and after kernel run + // TODO: set m_sync_host_mem_required to true for device that support user ptr BO + init_xrt_bo(); + } + + ~memory() + { + free_mem(); + } + + static void + lock_pages(void* addr, size_t size); + + void + validate(); + + void + sync(sync_direction); + + void + copy_from(const xrt::core::hip::memory *src, size_t size, size_t src_offset = 0, size_t offset = 0); + + void + copy_from(const void *host_src, size_t size, size_t src_offset = 0, size_t offset = 0); + + void + copy_to(void *host_dst, size_t size, size_t dst_offset = 0, size_t offset = 0) const; + + void + set_device(std::shared_ptr device) + { + m_device = device; + } + + void* + get_addr(address_type type); + + std::shared_ptr + get_xrt_bo() const + { + return m_bo; + } + + std::shared_ptr + get_xrt_bo() + { + return m_bo; + } + + unsigned int + get_hip_flags() const + { + return m_hip_flags; + } + + memory_type + get_type() const + { + return m_type; + } + + memory_group + get_group() const + { + return m_group; + } + + void + set_group(memory_group group) + { + m_group = group; + } + + std::shared_ptr + get_device() + { + return m_device; + } + + protected: + void* + get_host_addr() + { + return m_host_mem; + } + + void* + get_device_addr(); + + private: + std::shared_ptr m_device; + size_t m_size; + memory_type m_type; + unsigned int m_hip_flags; // hipHostMallocMapped etc. + unsigned char *m_host_mem; // host copy to store user data + std::shared_ptr m_bo; + xrt::memory_group m_group; + bool m_sync_host_mem_required; //true if sync between host copy and bo is required. + + void + free_mem(); + void + init_xrt_bo(); + }; -class memory +class address_range_key { +public: + address_range_key() : address(0), size(0) {} + address_range_key(uint64_t addr, size_t sz) : address(addr), size(sz) {} + uint64_t address; + size_t size; }; - + +struct addre_sz_key_compare +{ + bool operator() (const address_range_key& lhs, const address_range_key& rhs) const + { + return ((lhs.address + lhs.size) < rhs.address); + } +}; + +using addr_map = std::map, addre_sz_key_compare>; + +class memory_database +{ +private: + addr_map m_hostAddrMap; + addr_map m_devAddrMap; + +protected: + memory_database(); + + static memory_database* m_memory_database; + + void + insert_host_addr(void* host_addr, size_t size, std::shared_ptr hip_mem); + + void + delete_host_addr(void* host_addr); + + void + insert_device_addr(uint64_t dev_addr, size_t size, std::shared_ptr hip_mem); + + void + delete_device_addr(uint64_t dev_addr); + +public: + ~memory_database(); + + static memory_database& + instance(); + + addr_map& + get_hostaddr_map() + { + return m_hostAddrMap; + } + + void + insert_addr(address_type type, uint64_t addr, size_t size, std::shared_ptr hip_mem); + + void + delete_addr(uint64_t addr); + + std::shared_ptr + get_hip_mem_from_addr(void* addr); + + std::shared_ptr + get_hip_mem_from_addr(const void* addr); + + std::shared_ptr + get_hip_mem_from_host_addr(void* host_addr); + + std::shared_ptr + get_hip_mem_from_host_addr(const void* host_addr); + + std::shared_ptr + get_hip_mem_from_device_addr(void* dev_addr); + + std::shared_ptr + get_hip_mem_from_device_addr(const void* dev_addr); +}; + + } // xrt::core::hip #endif