Anton Mitkov commited on
Commit
6799437
·
1 Parent(s): 4d12916

sycl: Adding additional cpy dbg print output (llama/14034)

Browse files
ggml/src/ggml-sycl/common.hpp CHANGED
@@ -513,9 +513,9 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
513
 
514
  bool gpu_has_xmx(sycl::device &dev);
515
 
516
- template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) {
517
  if (LIKELY(!g_ggml_sycl_debug)) {
518
- return;
519
  }
520
  std::stringstream ss;
521
  ss << prefix << "=[";
@@ -526,29 +526,26 @@ template <int N, class T> void debug_print_array(const std::string & prefix, con
526
  ss << array[N - 1];
527
  }
528
  ss << "]";
529
- GGML_SYCL_DEBUG("%s", ss.str().c_str());
530
  }
531
 
532
- inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor,
533
- const std::string & suffix = "") {
534
- if (LIKELY(!g_ggml_sycl_debug)) {
535
- return;
536
- }
537
- GGML_SYCL_DEBUG("%s=", prefix.c_str());
538
  if (tensor) {
539
- GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
540
- debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne);
541
- debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb);
542
- if (!ggml_is_contiguous(tensor)) {
543
- GGML_SYCL_DEBUG(";strided");
544
- }
545
- if (ggml_is_permuted(tensor)) {
546
- GGML_SYCL_DEBUG(";permuted");
547
- }
548
  } else {
549
- GGML_SYCL_DEBUG("nullptr");
550
  }
551
- GGML_SYCL_DEBUG("%s", suffix.c_str());
 
552
  }
553
 
554
  // Use scope_op_debug_print to log operations coming from running a model
@@ -564,10 +561,10 @@ struct scope_op_debug_print {
564
  return;
565
  }
566
  GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
567
- debug_print_tensor(" dst", dst);
568
  if (dst) {
569
  for (std::size_t i = 0; i < num_src; ++i) {
570
- debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
571
  }
572
  }
573
  GGML_SYCL_DEBUG("%s\n", suffix.data());
 
513
 
514
  bool gpu_has_xmx(sycl::device &dev);
515
 
516
+ template <int N, class T> std::string debug_get_array_str(const std::string & prefix, const T array[N]) {
517
  if (LIKELY(!g_ggml_sycl_debug)) {
518
+ return "";
519
  }
520
  std::stringstream ss;
521
  ss << prefix << "=[";
 
526
  ss << array[N - 1];
527
  }
528
  ss << "]";
529
+ return ss.str();
530
  }
531
 
532
+ inline std::string debug_get_tensor_str(const std::string &prefix,
533
+ const ggml_tensor *tensor, const std::string &suffix = "") {
534
+ std::stringstream ss;
535
+ if (LIKELY(!g_ggml_sycl_debug)) { return ss.str(); }
536
+ ss << prefix.c_str() << "=";
 
537
  if (tensor) {
538
+ ss << "'" << tensor->name << "':type=" << ggml_type_name(tensor->type);
539
+ ss << debug_get_array_str<GGML_MAX_DIMS>(";ne", tensor->ne);
540
+ ss << debug_get_array_str<GGML_MAX_DIMS>(";nb", tensor->nb);
541
+
542
+ if (!ggml_is_contiguous(tensor)) { ss << ";strided"; }
543
+ if (ggml_is_permuted(tensor)) { ss << ";permuted"; }
 
 
 
544
  } else {
545
+ ss << "nullptr";
546
  }
547
+ ss << suffix;
548
+ return ss.str();
549
  }
550
 
551
  // Use scope_op_debug_print to log operations coming from running a model
 
561
  return;
562
  }
563
  GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
564
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" dst", dst).c_str());
565
  if (dst) {
566
  for (std::size_t i = 0; i < num_src; ++i) {
567
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str("\tsrc" + std::to_string(i), dst->src[i]).c_str());
568
  }
569
  }
570
  GGML_SYCL_DEBUG("%s\n", suffix.data());
ggml/src/ggml-sycl/cpy.cpp CHANGED
@@ -723,8 +723,7 @@ static void ggml_cpy_q4_1_q4_1(const char * cx, char * cdst, const int ne, const
723
 
724
  void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
725
  // Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
726
- scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0,
727
- std::string(" src0 type=") + ggml_type_name(src0->type));
728
  const int64_t ne = ggml_nelements(src0);
729
  GGML_ASSERT(ne == ggml_nelements(src1));
730
 
 
723
 
724
  void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
725
  // Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
726
+ scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, debug_get_tensor_str("\tsrc0", src0));
 
727
  const int64_t ne = ggml_nelements(src0);
728
  GGML_ASSERT(ne == ggml_nelements(src1));
729
 
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -347,7 +347,7 @@ static enum ggml_status
347
  ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
348
  ggml_tensor *tensor) try {
349
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
350
- debug_print_tensor(": tensor=", tensor, "\n");
351
  ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
352
 
353
  if (tensor->view_src != NULL) {
@@ -385,7 +385,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
385
  const void *data, size_t offset,
386
  size_t size) try {
387
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
388
- debug_print_tensor(": tensor=", tensor);
389
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
390
  ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
391
  ggml_sycl_set_device(ctx->device);
@@ -413,7 +413,7 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
413
  void *data, size_t offset,
414
  size_t size) try {
415
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
416
- debug_print_tensor(": tensor=", tensor);
417
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
418
  ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
419
 
@@ -444,8 +444,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
444
  ggml_tensor *dst) try {
445
  bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
446
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
447
- debug_print_tensor(": dst=", dst);
448
- debug_print_tensor(" src=", src);
449
  GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
450
  if (is_cpy_supported) {
451
  ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
@@ -525,7 +525,7 @@ catch (sycl::exception const &exc) {
525
  static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
526
  size_t offset, size_t size) {
527
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
528
- debug_print_tensor(": tensor=", tensor);
529
  GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
530
  ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
531
  SYCL_CHECK(ggml_sycl_set_device(ctx->device));
@@ -805,7 +805,7 @@ static enum ggml_status
805
  ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
806
  ggml_tensor *tensor) try {
807
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
808
- debug_print_tensor(": tensor=", tensor, "\n");
809
  GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
810
 
811
  ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
@@ -891,7 +891,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
891
  ggml_tensor *tensor, const void *data,
892
  size_t offset, size_t size) try {
893
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
894
- debug_print_tensor(": tensor=", tensor);
895
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
896
  // split tensors must always be set in their entirety at once
897
  GGML_ASSERT(offset == 0);
@@ -947,7 +947,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
947
  const ggml_tensor *tensor, void *data,
948
  size_t offset, size_t size) try {
949
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
950
- debug_print_tensor(": tensor=", tensor);
951
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
952
  // split tensors must always be set in their entirety at once
953
  GGML_ASSERT(offset == 0);
@@ -3863,7 +3863,7 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
3863
  const void *data, size_t offset,
3864
  size_t size) try {
3865
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
3866
- debug_print_tensor(": tensor=", tensor);
3867
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
3868
  ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
3869
  ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
@@ -3884,7 +3884,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
3884
  void *data, size_t offset,
3885
  size_t size) try {
3886
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
3887
- debug_print_tensor(": tensor=", tensor);
3888
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
3889
  ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
3890
  ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
@@ -3907,8 +3907,8 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
3907
  bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
3908
  ggml_backend_buffer_is_sycl(src->buffer);
3909
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
3910
- debug_print_tensor(": dst=", dst);
3911
- debug_print_tensor(" src=", src);
3912
  GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
3913
  if (is_cpy_supported) {
3914
  /*
 
347
  ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
348
  ggml_tensor *tensor) try {
349
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
350
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str());
351
  ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
352
 
353
  if (tensor->view_src != NULL) {
 
385
  const void *data, size_t offset,
386
  size_t size) try {
387
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
388
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
389
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
390
  ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
391
  ggml_sycl_set_device(ctx->device);
 
413
  void *data, size_t offset,
414
  size_t size) try {
415
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
416
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
417
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
418
  ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
419
 
 
444
  ggml_tensor *dst) try {
445
  bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
446
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
447
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str());
448
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str());
449
  GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
450
  if (is_cpy_supported) {
451
  ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
 
525
  static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
526
  size_t offset, size_t size) {
527
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
528
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
529
  GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
530
  ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
531
  SYCL_CHECK(ggml_sycl_set_device(ctx->device));
 
805
  ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
806
  ggml_tensor *tensor) try {
807
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
808
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str());
809
  GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
810
 
811
  ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
 
891
  ggml_tensor *tensor, const void *data,
892
  size_t offset, size_t size) try {
893
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
894
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
895
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
896
  // split tensors must always be set in their entirety at once
897
  GGML_ASSERT(offset == 0);
 
947
  const ggml_tensor *tensor, void *data,
948
  size_t offset, size_t size) try {
949
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
950
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
951
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
952
  // split tensors must always be set in their entirety at once
953
  GGML_ASSERT(offset == 0);
 
3863
  const void *data, size_t offset,
3864
  size_t size) try {
3865
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
3866
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
3867
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
3868
  ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
3869
  ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
 
3884
  void *data, size_t offset,
3885
  size_t size) try {
3886
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
3887
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
3888
  GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
3889
  ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
3890
  ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
 
3907
  bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
3908
  ggml_backend_buffer_is_sycl(src->buffer);
3909
  GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
3910
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str());
3911
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str());
3912
  GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
3913
  if (is_cpy_supported) {
3914
  /*