38
38
#pragma GCC visibility push(protected)
39
39
#endif
40
40
41
- static void l_dump_printf_buffer (cl_event event, cl_kernel kernel,
41
+ static size_t l_dump_printf_buffer (cl_event event, cl_kernel kernel,
42
42
unsigned size);
43
43
static void decode_string (std::string &print_data);
44
44
@@ -719,12 +719,13 @@ static std::string::const_iterator get_data_elem_at_offset(
719
719
return end_of_string;
720
720
}
721
721
722
- static void l_dump_printf_buffer (cl_event event, cl_kernel kernel,
722
+ static size_t l_dump_printf_buffer (cl_event event, cl_kernel kernel,
723
723
unsigned size) {
724
724
unsigned global_offset; // the location in the printf buffer
725
725
unsigned single_printf_offset; // the offset of a single printf
726
726
void (*hal_dma_fn)(cl_event, const void *, void *, size_t ) = 0 ;
727
727
int src_on_host = 1 ;
728
+ size_t dumped_buffer_size = 0 ;
728
729
#ifdef _WIN32
729
730
__declspec (align (64 )) char
730
731
buffer[ACL_PRINTF_BUFFER_TOTAL_SIZE]; // Aligned to 64, for dma transfers
@@ -743,11 +744,11 @@ static void l_dump_printf_buffer(cl_event event, cl_kernel kernel,
743
744
if (!verify_types ()) {
744
745
printf (" Host data types are incompatible with ACL compiler, ignoring "
745
746
" printfs...\n " );
746
- return ;
747
+ return dumped_buffer_size ;
747
748
}
748
749
749
750
if (printf_infos.empty ())
750
- return ;
751
+ return dumped_buffer_size ;
751
752
752
753
// Memory is on the device if all of these are true:
753
754
// The memory is not SVM or the device does not support SVM.
@@ -783,9 +784,10 @@ static void l_dump_printf_buffer(cl_event event, cl_kernel kernel,
783
784
if (size > (unsigned int ) kernel->processed_buffer_size ){
784
785
printf (" ZIBAI has something to print! \n " );
785
786
hal_dma_fn (NULL , unprocessed_begin, buffer, size - kernel->processed_buffer_size );
787
+ dumped_buffer_size = size - kernel->processed_buffer_size ;
786
788
} else {
787
789
printf (" Zibai Added, nothing to print \n " );
788
- return ;
790
+ return dumped_buffer_size ;
789
791
}
790
792
791
793
@@ -839,7 +841,7 @@ static void l_dump_printf_buffer(cl_event event, cl_kernel kernel,
839
841
if (!success) {
840
842
acl_print_debug_msg (
841
843
" corrupt printf data, ignoring remaining printfs...\n " );
842
- return ;
844
+ return dumped_buffer_size ;
843
845
}
844
846
845
847
#ifdef DEBUG
@@ -902,7 +904,7 @@ static void l_dump_printf_buffer(cl_event event, cl_kernel kernel,
902
904
if (vector_size == -1 ) {
903
905
acl_print_debug_msg (" wrong vector specifier in printf call, ignoring "
904
906
" remaining printfs...\n " );
905
- return ;
907
+ return dumped_buffer_size ;
906
908
}
907
909
908
910
// get the length specifier
@@ -923,7 +925,7 @@ static void l_dump_printf_buffer(cl_event event, cl_kernel kernel,
923
925
if (size_of_data == 0 ) {
924
926
acl_print_debug_msg (" wrong length modifier in printf call, ignoring "
925
927
" remaining printfs...\n " );
926
- return ;
928
+ return dumped_buffer_size ;
927
929
}
928
930
929
931
for (i = 0 ; i < vector_size; i++) {
@@ -979,6 +981,7 @@ static void l_dump_printf_buffer(cl_event event, cl_kernel kernel,
979
981
#ifdef DEBUG
980
982
printf (" exiting acl_dump_buffer...\n " );
981
983
#endif
984
+ return dumped_buffer_size;
982
985
}
983
986
984
987
//
@@ -1030,14 +1033,14 @@ void acl_process_printf_buffer(void *user_data, acl_device_op_t *op) {
1030
1033
1031
1034
// Grab the printf data and emit it.
1032
1035
cl_uint num_bytes = op->info .num_printf_bytes_pending ;
1033
- l_dump_printf_buffer (event, kernel, num_bytes);
1036
+ size_t dumped_buffer_size = l_dump_printf_buffer (event, kernel, num_bytes);
1034
1037
1035
1038
// Mark this printf work as done. Must do this *before* unstalling
1036
1039
// the kernel, to avoid a race against the kernel filling up the
1037
1040
// buffer again.
1038
1041
// Zibai Testing
1039
1042
if (op->info .debug_dump == 1 ){
1040
- kernel->processed_buffer_size += num_bytes; // TODO, Fix this, should be += processed bytes
1043
+ kernel->processed_buffer_size += dumped_buffer_size;
1041
1044
} else {
1042
1045
// Full dump, clear this global var
1043
1046
kernel->processed_buffer_size = 0 ;
@@ -1049,9 +1052,11 @@ void acl_process_printf_buffer(void *user_data, acl_device_op_t *op) {
1049
1052
acl_memory_barrier ();
1050
1053
1051
1054
// Allow the kernel to continue running.
1052
- // Zibai Testing!
1053
- // acl_get_hal()->unstall_kernel(
1054
- // event->cmd.info.ndrange_kernel.device->def.physical_device_id, op->id);
1055
+ // We don't need to unstall the kernel during the early flushing during debug.
1056
+ if (op->info .debug_dump == 0 ){
1057
+ acl_get_hal ()->unstall_kernel (
1058
+ event->cmd .info .ndrange_kernel .device ->def .physical_device_id , op->id );
1059
+ }
1055
1060
}
1056
1061
}
1057
1062
0 commit comments