Skip to content

Commit

Permalink
sycl: Increase logging.
Browse files Browse the repository at this point in the history
  • Loading branch information
fweig committed Oct 28, 2024
1 parent bf8dfdf commit 88a1943
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
.vscode/
build/
docs/html
compile_commands.json
.cache

# Prerequisites
*.d
Expand Down
27 changes: 23 additions & 4 deletions src/xpu/detail/platform/sycl/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@

#define XPU_DETAIL_ASSERT(x) assert(x)

#if 1
#define trace(message, ...) printf("sycl_driver::%s:%d: " message "\n", __FUNCTION__, __LINE__, ##__VA_ARGS__)
#else
#define trace(...) ((void)(0))
#endif

// Pull printf into global namespace, to be consistent with other backends.
using sycl::ext::oneapi::experimental::printf;

Expand Down Expand Up @@ -361,20 +367,29 @@ struct xpu::detail::action_runner<xpu::detail::kernel_tag, K, void(K::*)(xpu::ke
static int call(kernel_launch_info launch_info, Args... args) {
dim block_dim = K::block_size::value;
dim grid_dim{};
bool uses_y_dim = block_dim.y > 0;
bool uses_z_dim = block_dim.z > 0;
launch_info.g.get_compute_grid(block_dim, grid_dim);

XPU_LOG("Calling kernel '%s' [block_dim = (%d, %d, %d), grid_dim = (%d, %d, %d)] with SYCL driver.", type_name<K>(), block_dim.x, block_dim.y, block_dim.z, grid_dim.x, grid_dim.y, grid_dim.z);

auto *driver = static_cast<sycl_driver *>(backend::get(sycl));

sycl::queue queue = driver->get_queue(launch_info.queue_handle);
sycl::device device = queue.get_device();
trace("Run kernel '%s' on queue '%p' on '%s'", type_name<K>(), launch_info.queue_handle, device.get_info<sycl::info::device::name>().c_str());

sycl::range<3> global_range{size_t(grid_dim.x), size_t(grid_dim.y), size_t(grid_dim.z)};
sycl::range<3> local_range{size_t(block_dim.x), size_t(block_dim.y), size_t(block_dim.z)};
sycl::range<3> local_range{size_t(block_dim.x), uses_y_dim ? size_t(block_dim.y) : 0, uses_z_dim ? size_t(block_dim.z) : 0};
cmem_traits<constants> cmem_traits{};
auto cmem_buffers = cmem_traits.make_buffers();

global_range = global_range * local_range;

if (device.is_cpu()) {
local_range = sycl::range(1, 1, 1);
}

XPU_LOG("Calling kernel '%s' [local_range = (%d, %d, %d), global_range = (%d, %d, %d)] with SYCL driver.", type_name<K>(), local_range[0], local_range[1], local_range[2], global_range[0], global_range[1], global_range[2]);

sycl::event ev = queue.submit([&](sycl::handler &cgh) {
sycl::local_accessor<shared_memory, 0> shared_memory_acc{cgh};
auto cmem_accessors = cmem_traits.make_accessors(cmem_buffers, cgh);
Expand All @@ -383,21 +398,25 @@ struct xpu::detail::action_runner<xpu::detail::kernel_tag, K, void(K::*)(xpu::ke
#if defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20240000
sycl::stream out{0, 0, cgh};
#endif
cgh.parallel_for<K>(sycl::nd_range<3>{global_range, local_range}, [=](sycl::nd_item<3> item) {
cgh.parallel_for<K>(sycl::nd_range<3>(global_range, local_range), [=](sycl::nd_item<3> item) {
#if defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20240000
// WTF: old versions of icpx sometimes optimizes out the kernel call (when using O2)
// if we dont add the print statement
if (item.get_global_id(0) == static_cast<size_t>(-1)) {
out << "";
}
#endif
sycl::ext::oneapi::experimental::printf("HELLO\n");

shared_memory &smem = shared_memory_acc;
tpos pos{internal_ctor, item};
context ctx{internal_ctor, pos, smem, cmem};
K{}(ctx, args...);
});
});

queue.wait_and_throw();

if (launch_info.ms != nullptr) {
ev.wait();
int64_t nanoseconds = ev.get_profiling_info<sycl::info::event_profiling::command_end>() - ev.get_profiling_info<sycl::info::event_profiling::command_start>();
Expand Down
21 changes: 19 additions & 2 deletions src/xpu/detail/platform/sycl/sycl_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@

using namespace xpu::detail;

#if 0
#define trace(message, ...) printf("sycl_driver::%s:%d: " message "\n", __FUNCTION__, __LINE__, ##__VA_ARGS__)
#else
#define trace(...) ((void)(0))
#endif

error sycl_driver::setup() {
if (config::profile) {
m_prop_list = sycl::property_list{sycl::property::queue::enable_profiling(), sycl::property::queue::in_order()};
} else {
m_prop_list = sycl::property_list{sycl::property::queue::in_order()};
}
trace("Create context for device '%s'", m_device_obj.get_info<sycl::info::device::name>().c_str());
m_context = sycl::context{m_device_obj};
return 0;
}
Expand All @@ -35,9 +42,13 @@ error sycl_driver::free(void *ptr) {
}

error sycl_driver::create_queue(void **queue, int device) {
auto q = std::make_unique<sycl::queue>(sycl::device::get_devices()[device], m_prop_list);
auto odevice = sycl::device::get_devices()[device];

auto q = std::make_unique<sycl::queue>(odevice, m_prop_list);
m_queues.emplace_back(std::move(q));
*queue = m_queues.back().get();

trace("Create queue '%p' for device '%s'", *queue, odevice.get_info<sycl::info::device::name>().c_str());
return 0;
}

Expand All @@ -57,6 +68,8 @@ error sycl_driver::synchronize_queue(void *handle) {
}

error sycl_driver::memcpy_async(void *dst, const void *src, size_t bytes, void *handle, double *ms) {
trace("Copy %zu bytes in queue '%p'", bytes, handle);

auto q = get_queue(handle);
if (ms == nullptr) {
q.memcpy(dst, src, bytes);
Expand Down Expand Up @@ -93,6 +106,7 @@ error sycl_driver::set_device(int device) {
m_device_obj = sycl::device::get_devices()[device];
m_context = sycl::context{m_device_obj};
m_device = device;
trace("Select device '%s'", m_device_obj.get_info<sycl::info::device::name>().c_str());
return 0;
}

Expand Down Expand Up @@ -140,15 +154,18 @@ error sycl_driver::get_ptr_prop(const void *ptr, int *device, mem_type *type) {
break;
}

XPU_LOG("sycl_driver::get_ptr_prop: %p type is %d", ptr, *type);

if (*type == mem_host) {
*device = -1;
return 0;
}

sycl::device dev = sycl::get_pointer_device(ptr, m_context);
// XPU_LOG("sycl_driver::pointer_get_device: %s", dev.get_info<sycl::info::device::name>().c_str());
trace("sycl_driver::pointer_get_device: %s\n", dev.get_info<sycl::info::device::name>().c_str());
*device = get_device_id(dev);
} catch (sycl::exception &e) {
XPU_LOG("sycl_driver::get_ptr_prop: Caught SYCL exception: '%s'", e.what());
*type = mem_host;
*device = -1;
}
Expand Down
2 changes: 2 additions & 0 deletions src/xpu/detail/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ void runtime::get_ptr_prop(const void *ptr, ptr_prop *prop) {
RAISE_INTERNAL_ERROR();
}

XPU_LOG("runtime::get_ptr_prop: For address %p got type %d and platform device %d for driver '%s'", ptr, mem_type, platform_device, driver_to_str(driver_type));

prop->type = mem_type;
prop->dev = get_device(driver_type, platform_device);
return;
Expand Down

0 comments on commit 88a1943

Please sign in to comment.