Spaces:
Sleeping
Sleeping
Anton Mitkov
commited on
Commit
·
6799437
1
Parent(s):
4d12916
sycl: Adding additional cpy dbg print output (llama/14034)
Browse files- ggml/src/ggml-sycl/common.hpp +19 -22
- ggml/src/ggml-sycl/cpy.cpp +1 -2
- ggml/src/ggml-sycl/ggml-sycl.cpp +13 -13
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -513,9 +513,9 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
|
|
| 513 |
|
| 514 |
bool gpu_has_xmx(sycl::device &dev);
|
| 515 |
|
| 516 |
-
template <int N, class T>
|
| 517 |
if (LIKELY(!g_ggml_sycl_debug)) {
|
| 518 |
-
return;
|
| 519 |
}
|
| 520 |
std::stringstream ss;
|
| 521 |
ss << prefix << "=[";
|
|
@@ -526,29 +526,26 @@ template <int N, class T> void debug_print_array(const std::string & prefix, con
|
|
| 526 |
ss << array[N - 1];
|
| 527 |
}
|
| 528 |
ss << "]";
|
| 529 |
-
|
| 530 |
}
|
| 531 |
|
| 532 |
-
inline
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
GGML_SYCL_DEBUG("%s=", prefix.c_str());
|
| 538 |
if (tensor) {
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
}
|
| 545 |
-
if (ggml_is_permuted(tensor)) {
|
| 546 |
-
GGML_SYCL_DEBUG(";permuted");
|
| 547 |
-
}
|
| 548 |
} else {
|
| 549 |
-
|
| 550 |
}
|
| 551 |
-
|
|
|
|
| 552 |
}
|
| 553 |
|
| 554 |
// Use scope_op_debug_print to log operations coming from running a model
|
|
@@ -564,10 +561,10 @@ struct scope_op_debug_print {
|
|
| 564 |
return;
|
| 565 |
}
|
| 566 |
GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
|
| 567 |
-
|
| 568 |
if (dst) {
|
| 569 |
for (std::size_t i = 0; i < num_src; ++i) {
|
| 570 |
-
|
| 571 |
}
|
| 572 |
}
|
| 573 |
GGML_SYCL_DEBUG("%s\n", suffix.data());
|
|
|
|
| 513 |
|
| 514 |
bool gpu_has_xmx(sycl::device &dev);
|
| 515 |
|
| 516 |
+
template <int N, class T> std::string debug_get_array_str(const std::string & prefix, const T array[N]) {
|
| 517 |
if (LIKELY(!g_ggml_sycl_debug)) {
|
| 518 |
+
return "";
|
| 519 |
}
|
| 520 |
std::stringstream ss;
|
| 521 |
ss << prefix << "=[";
|
|
|
|
| 526 |
ss << array[N - 1];
|
| 527 |
}
|
| 528 |
ss << "]";
|
| 529 |
+
return ss.str();
|
| 530 |
}
|
| 531 |
|
| 532 |
+
inline std::string debug_get_tensor_str(const std::string &prefix,
|
| 533 |
+
const ggml_tensor *tensor, const std::string &suffix = "") {
|
| 534 |
+
std::stringstream ss;
|
| 535 |
+
if (LIKELY(!g_ggml_sycl_debug)) { return ss.str(); }
|
| 536 |
+
ss << prefix.c_str() << "=";
|
|
|
|
| 537 |
if (tensor) {
|
| 538 |
+
ss << "'" << tensor->name << "':type=" << ggml_type_name(tensor->type);
|
| 539 |
+
ss << debug_get_array_str<GGML_MAX_DIMS>(";ne", tensor->ne);
|
| 540 |
+
ss << debug_get_array_str<GGML_MAX_DIMS>(";nb", tensor->nb);
|
| 541 |
+
|
| 542 |
+
if (!ggml_is_contiguous(tensor)) { ss << ";strided"; }
|
| 543 |
+
if (ggml_is_permuted(tensor)) { ss << ";permuted"; }
|
|
|
|
|
|
|
|
|
|
| 544 |
} else {
|
| 545 |
+
ss << "nullptr";
|
| 546 |
}
|
| 547 |
+
ss << suffix;
|
| 548 |
+
return ss.str();
|
| 549 |
}
|
| 550 |
|
| 551 |
// Use scope_op_debug_print to log operations coming from running a model
|
|
|
|
| 561 |
return;
|
| 562 |
}
|
| 563 |
GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
|
| 564 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" dst", dst).c_str());
|
| 565 |
if (dst) {
|
| 566 |
for (std::size_t i = 0; i < num_src; ++i) {
|
| 567 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str("\tsrc" + std::to_string(i), dst->src[i]).c_str());
|
| 568 |
}
|
| 569 |
}
|
| 570 |
GGML_SYCL_DEBUG("%s\n", suffix.data());
|
ggml/src/ggml-sycl/cpy.cpp
CHANGED
|
@@ -723,8 +723,7 @@ static void ggml_cpy_q4_1_q4_1(const char * cx, char * cdst, const int ne, const
|
|
| 723 |
|
| 724 |
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
|
| 725 |
// Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
|
| 726 |
-
scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0,
|
| 727 |
-
std::string(" src0 type=") + ggml_type_name(src0->type));
|
| 728 |
const int64_t ne = ggml_nelements(src0);
|
| 729 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 730 |
|
|
|
|
| 723 |
|
| 724 |
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
|
| 725 |
// Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
|
| 726 |
+
scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, debug_get_tensor_str("\tsrc0", src0));
|
|
|
|
| 727 |
const int64_t ne = ggml_nelements(src0);
|
| 728 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 729 |
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -347,7 +347,7 @@ static enum ggml_status
|
|
| 347 |
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 348 |
ggml_tensor *tensor) try {
|
| 349 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 350 |
-
|
| 351 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 352 |
|
| 353 |
if (tensor->view_src != NULL) {
|
|
@@ -385,7 +385,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
| 385 |
const void *data, size_t offset,
|
| 386 |
size_t size) try {
|
| 387 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 388 |
-
|
| 389 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 390 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 391 |
ggml_sycl_set_device(ctx->device);
|
|
@@ -413,7 +413,7 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
| 413 |
void *data, size_t offset,
|
| 414 |
size_t size) try {
|
| 415 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 416 |
-
|
| 417 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 418 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 419 |
|
|
@@ -444,8 +444,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
|
| 444 |
ggml_tensor *dst) try {
|
| 445 |
bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
|
| 446 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 447 |
-
|
| 448 |
-
|
| 449 |
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
| 450 |
if (is_cpy_supported) {
|
| 451 |
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
|
|
@@ -525,7 +525,7 @@ catch (sycl::exception const &exc) {
|
|
| 525 |
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
|
| 526 |
size_t offset, size_t size) {
|
| 527 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 528 |
-
|
| 529 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
|
| 530 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
| 531 |
SYCL_CHECK(ggml_sycl_set_device(ctx->device));
|
|
@@ -805,7 +805,7 @@ static enum ggml_status
|
|
| 805 |
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 806 |
ggml_tensor *tensor) try {
|
| 807 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 808 |
-
|
| 809 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
| 810 |
|
| 811 |
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
@@ -891,7 +891,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
| 891 |
ggml_tensor *tensor, const void *data,
|
| 892 |
size_t offset, size_t size) try {
|
| 893 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 894 |
-
|
| 895 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 896 |
// split tensors must always be set in their entirety at once
|
| 897 |
GGML_ASSERT(offset == 0);
|
|
@@ -947,7 +947,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
| 947 |
const ggml_tensor *tensor, void *data,
|
| 948 |
size_t offset, size_t size) try {
|
| 949 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 950 |
-
|
| 951 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 952 |
// split tensors must always be set in their entirety at once
|
| 953 |
GGML_ASSERT(offset == 0);
|
|
@@ -3863,7 +3863,7 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
|
|
| 3863 |
const void *data, size_t offset,
|
| 3864 |
size_t size) try {
|
| 3865 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3866 |
-
|
| 3867 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 3868 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3869 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
@@ -3884,7 +3884,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
|
|
| 3884 |
void *data, size_t offset,
|
| 3885 |
size_t size) try {
|
| 3886 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3887 |
-
|
| 3888 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 3889 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3890 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
@@ -3907,8 +3907,8 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
|
|
| 3907 |
bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
|
| 3908 |
ggml_backend_buffer_is_sycl(src->buffer);
|
| 3909 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3910 |
-
|
| 3911 |
-
|
| 3912 |
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
| 3913 |
if (is_cpy_supported) {
|
| 3914 |
/*
|
|
|
|
| 347 |
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 348 |
ggml_tensor *tensor) try {
|
| 349 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 350 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str());
|
| 351 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
| 352 |
|
| 353 |
if (tensor->view_src != NULL) {
|
|
|
|
| 385 |
const void *data, size_t offset,
|
| 386 |
size_t size) try {
|
| 387 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 388 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 389 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 390 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 391 |
ggml_sycl_set_device(ctx->device);
|
|
|
|
| 413 |
void *data, size_t offset,
|
| 414 |
size_t size) try {
|
| 415 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 416 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 417 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 418 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 419 |
|
|
|
|
| 444 |
ggml_tensor *dst) try {
|
| 445 |
bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
|
| 446 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 447 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str());
|
| 448 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str());
|
| 449 |
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
| 450 |
if (is_cpy_supported) {
|
| 451 |
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
|
|
|
|
| 525 |
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
|
| 526 |
size_t offset, size_t size) {
|
| 527 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 528 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 529 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
|
| 530 |
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
| 531 |
SYCL_CHECK(ggml_sycl_set_device(ctx->device));
|
|
|
|
| 805 |
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
| 806 |
ggml_tensor *tensor) try {
|
| 807 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 808 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str());
|
| 809 |
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
| 810 |
|
| 811 |
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
|
|
|
| 891 |
ggml_tensor *tensor, const void *data,
|
| 892 |
size_t offset, size_t size) try {
|
| 893 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 894 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 895 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 896 |
// split tensors must always be set in their entirety at once
|
| 897 |
GGML_ASSERT(offset == 0);
|
|
|
|
| 947 |
const ggml_tensor *tensor, void *data,
|
| 948 |
size_t offset, size_t size) try {
|
| 949 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 950 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 951 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 952 |
// split tensors must always be set in their entirety at once
|
| 953 |
GGML_ASSERT(offset == 0);
|
|
|
|
| 3863 |
const void *data, size_t offset,
|
| 3864 |
size_t size) try {
|
| 3865 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3866 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 3867 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 3868 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3869 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
|
|
| 3884 |
void *data, size_t offset,
|
| 3885 |
size_t size) try {
|
| 3886 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3887 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
|
| 3888 |
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
| 3889 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3890 |
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
|
|
| 3907 |
bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
|
| 3908 |
ggml_backend_buffer_is_sycl(src->buffer);
|
| 3909 |
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
| 3910 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str());
|
| 3911 |
+
GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str());
|
| 3912 |
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
| 3913 |
if (is_cpy_supported) {
|
| 3914 |
/*
|