Skip to content

Commit 952b4d4

Browse files
author
zibai.wang
committed
added printf early flushing
1 parent e8a57e6 commit 952b4d4

File tree

5 files changed

+82
-82
lines changed

5 files changed

+82
-82
lines changed

include/acl_types.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -922,7 +922,10 @@ typedef struct _cl_kernel {
922922
// Eventually this should be an array of ACLDeviceBinaries similar
923923
// to how cl_program contains an array of dev_prog.
924924
const acl_device_binary_t *dev_bin;
925-
size_t processed_buffer_size;
925+
926+
// In ACL_HAL_DEBUG mode, printf buffer could be dumped before Kernel ends
927+
// Therefore, we need to keep track of how much data has been processed.
928+
size_t processed_printf_buffer_size;
926929
} _cl_kernel;
927930

928931
ACL_DECLARE_CL_OBJECT_ALLOC_FUNCTIONS(cl_kernel);
@@ -1347,7 +1350,10 @@ typedef struct {
13471350
// it sees the CL_COMPLETE status for the first time, but it won't
13481351
// change after that.
13491352
cl_uint num_printf_bytes_pending;
1350-
int debug_dump = 0; // TODO, initialize to 0, somewhere
1353+
1354+
// Indicate whether this operation is dumping printf buffer before the Kernel
1355+
// for debug purpose
1356+
int debug_dump_printf = 0;
13511357
} acl_device_op_info_t;
13521358

13531359
// An operation to be performed on a device.

src/acl_hal_mmd.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,6 @@ void acl_hal_mmd_get_device_status(cl_uint num_devices,
919919
assert(devices[idevice]->opened_count > 0);
920920

921921
physical_device_id = devices[idevice]->def.physical_device_id;
922-
printf("Zibai Added, going to check kernel status! \n");
923922
acl_kernel_if_check_kernel_status(&kern[physical_device_id]);
924923
}
925924
}

src/acl_kernel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2420,7 +2420,7 @@ static int l_init_kernel(cl_kernel kernel, cl_program program,
24202420
kernel->printf_device_buffer = 0; // Default is none.
24212421
kernel->printf_device_ptr = 0; // Default is none.
24222422
// Zibai Testing
2423-
kernel->processed_buffer_size = 0;
2423+
kernel->processed_printf_buffer_size = 0;
24242424
if (!accel_def->printf_format_info.empty()) {
24252425
auto gmem_idx = static_cast<size_t>(
24262426
acl_get_default_memory(kernel->dev_bin->get_devdef()));

src/acl_kernel_if.cpp

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1369,7 +1369,7 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
13691369
":: Calling acl_process_printf_buffer_fn with "
13701370
"activation_id=%d and printf_size=%u.\n",
13711371
activation_id, printf_size);
1372-
// update status, which will dump the printf buffer, set debug_dump = 0
1372+
// update status, which will dump the printf buffer, set debug_dump_printf = 0
13731373
acl_process_printf_buffer_fn(activation_id, (int)printf_size, 0);
13741374

13751375
ACL_KERNEL_IF_DEBUG_MSG(
@@ -1378,7 +1378,6 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
13781378

13791379
acl_kernel_cra_write(kern, k, KERNEL_OFFSET_CSR, new_csr);
13801380
continue;
1381-
// ZIBAI TODO FIGURE OUT: Why Doesn't printf buffer need to be cleared, after this? Probably Handled outside of runtime?
13821381
}
13831382
}
13841383

@@ -1433,7 +1432,6 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
14331432
kern->accel_job_ids[k][next_queue_back] = -1;
14341433

14351434
#ifdef TEST_PROFILING_HARDWARE
1436-
// ZIBAI TODO: What is this doing?
14371435
// Test readback of fake profile data using the acl_hal_mmd function that
14381436
// would be called from the acl runtime.
14391437
ACL_KERNEL_IF_DEBUG_MSG(
@@ -1496,12 +1494,41 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
14961494
}
14971495
}
14981496

1497+
void acl_kernel_if_debug_dump_printf(acl_kernel_if *kern, unsigned k){
1498+
acl_assert_locked();
1499+
unsigned int printf_size = 0; // Do I have to initialize it to 0?
1500+
int activation_id;
1501+
unsigned int next_queue_back;
1502+
1503+
if (kern->accel_queue_back[k] == (int)kern->accel_invoc_queue_depth[k] - 1)
1504+
next_queue_back = 0;
1505+
else
1506+
next_queue_back = kern->accel_queue_back[k] + 1;
1507+
1508+
if (kern->accel_num_printfs[k] > 0) {
1509+
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_PRINTF_BUFFER_SIZE,
1510+
&printf_size);
1511+
assert(printf_size <= ACL_PRINTF_BUFFER_TOTAL_SIZE);
1512+
ACL_KERNEL_IF_DEBUG_MSG(kern,
1513+
":: Accelerator %d printf buffer size is %d.\n",
1514+
k, printf_size);
1515+
activation_id = kern->accel_job_ids[k][next_queue_back];
1516+
ACL_KERNEL_IF_DEBUG_MSG(kern,
1517+
":: Calling acl_process_printf_buffer_fn with "
1518+
"activation_id=%d and printf_size=%u.\n",
1519+
activation_id, printf_size);
1520+
1521+
// set debug_dump_printf to 1
1522+
acl_process_printf_buffer_fn(activation_id, (int)printf_size, 1);
1523+
}
1524+
1525+
}
1526+
14991527
void acl_kernel_if_dump_status(acl_kernel_if *kern) {
15001528
int expect_kernel = 0;
15011529
unsigned k, i;
15021530
acl_assert_locked();
15031531

1504-
kern->io.printf(" Zibai Added, kernel may be hung?\n");
15051532
for (k = 0; k < kern->num_accel; ++k) {
15061533
for (i = 0; i < kern->accel_invoc_queue_depth[k]; ++i) {
15071534
if (kern->accel_job_ids[k][i] >= 0) {
@@ -1513,10 +1540,6 @@ void acl_kernel_if_dump_status(acl_kernel_if *kern) {
15131540
if (!expect_kernel)
15141541
return;
15151542

1516-
kern->io.printf("No kernel updates in approximately 10 seconds for device %u",
1517-
kern->physical_device_id);
1518-
kern->io.printf(" ... a kernel may be hung?\n");
1519-
15201543
for (k = 0; k < kern->num_accel; ++k) {
15211544
unsigned int csr;
15221545

@@ -1542,37 +1565,8 @@ void acl_kernel_if_dump_status(acl_kernel_if *kern) {
15421565
if (ACL_KERNEL_READ_BIT(csr, KERNEL_CSR_LMEM_INVALID_BANK))
15431566
kern->io.printf(" lm_bank_exception");
15441567

1545-
//Testing start Zibai
1546-
unsigned int printf_size = 0; // Do I have to initialize it to 0?
1547-
int activation_id;
1548-
1549-
int next_queue_back2;
1550-
1551-
if (kern->accel_queue_back[k] == (int)kern->accel_invoc_queue_depth[k] - 1)
1552-
next_queue_back2 = 0;
1553-
else
1554-
next_queue_back2 = kern->accel_queue_back[k] + 1;
1555-
1556-
if (kern->accel_num_printfs[k] > 0) {
1557-
acl_kernel_cra_read(kern, k, KERNEL_OFFSET_PRINTF_BUFFER_SIZE,
1558-
&printf_size);
1559-
assert(printf_size <= ACL_PRINTF_BUFFER_TOTAL_SIZE);
1560-
ACL_KERNEL_IF_DEBUG_MSG(kern,
1561-
":: Accelerator %d printf buffer size is %d.\n",
1562-
k, printf_size);
1563-
activation_id = kern->accel_job_ids[k][next_queue_back2];
1564-
ACL_KERNEL_IF_DEBUG_MSG(kern,
1565-
":: Calling acl_process_printf_buffer_fn with "
1566-
"activation_id=%d and printf_size=%u.\n",
1567-
activation_id, printf_size);
1568-
1569-
// set debug_dump to 1
1570-
acl_process_printf_buffer_fn(activation_id, (int)printf_size, 1);
1571-
//acl_process_printf_buffer_fn(activation_id, 64, 0);
1572-
}
1573-
1574-
//Testing end Zibai
1575-
// Zibai TODO: What is the below code this doing?
1568+
//Dump the printf buffer to stdout
1569+
acl_kernel_if_debug_dump_printf(kern, k);
15761570

15771571
unsigned buffered_kernel_invocation = 0;
15781572
for (i = 0; i < kern->accel_invoc_queue_depth[k]; ++i) {
@@ -1611,15 +1605,18 @@ void acl_kernel_if_check_kernel_status(acl_kernel_if *kern) {
16111605
#endif
16121606
acl_assert_locked();
16131607

1614-
// Print kernel status if it hasn't done anything in a while
1615-
// If multiple thread calls this, only one will print every 10 seconds
1616-
kern->io.printf("Zibai Added, Are we calling acl_kernel_if_dump_status? \n");
16171608
if (kern->last_kern_update != 0 &&
1618-
(acl_kernel_if_get_time_us(kern) - kern->last_kern_update >
1619-
10 * 1000000)) {
1620-
kern->last_kern_update = acl_kernel_if_get_time_us(kern);
1621-
if (kern->io.debug_verbosity > 0)
1622-
kern->io.printf("Zibai Added, calling acl_kernel_if_dump_status! \n");
1609+
(acl_kernel_if_get_time_us(kern) - kern->last_kern_update >
1610+
10 * 1000000)) {
1611+
kern->last_kern_update = acl_kernel_if_get_time_us(kern);
1612+
kern->io.printf("No kernel updates in approximately 10 seconds for device %u",
1613+
kern->physical_device_id);
1614+
kern->io.printf(" ... a kernel may be hung?\n");
1615+
acl_kernel_if_dump_status(kern);
1616+
} else if (kern->io.debug_verbosity >= 3) {
1617+
// If ACL_HAL_DEBUG >= 3, the status will be printed even the server isn't hang.
1618+
// If there are spare cycles, it will be called at most every 5 seconds to dump the status and flush the printf buffer
1619+
// 5 seconds is configured in acl_thread->acl_wait_for_device_update()
16231620
acl_kernel_if_dump_status(kern);
16241621
}
16251622

src/acl_printf.cpp

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -770,29 +770,25 @@ static size_t l_dump_printf_buffer(cl_event event, cl_kernel kernel,
770770
hal_dma_fn = acl_get_hal()->copy_globalmem_to_hostmem;
771771
}
772772

773-
// void* b = (void*)((char*)kernel->printf_device_buffer->block_allocation->range.begin + kernel->processed_buffer_size);
774-
// // hal_dma_fn(NULL, b, buffer, size - 64);
775-
776-
//hal_dma_fn(NULL, b, buffer, size - 64);
777-
// test is giving segfault if I use size - 64, because test doesn't have that much data
778-
// What if I don't use -64
779-
printf("total size is %d \n", size);
780-
printf("kernel prcessed buffer size is %d \n", kernel->processed_buffer_size);
781-
782-
void* unprocessed_begin = (void*)((char*)kernel->printf_device_buffer->block_allocation->range.begin + kernel->processed_buffer_size);
783-
printf("Hang may begin here?? \n");
784-
if (size > (unsigned int) kernel->processed_buffer_size){
785-
printf("ZIBAI has something to print! \n");
786-
hal_dma_fn(NULL, unprocessed_begin, buffer, size - kernel->processed_buffer_size);
787-
dumped_buffer_size = size - kernel->processed_buffer_size;
773+
// It needs the context from ACL_HAL_DEBUG instead of ACL_DEBUG
774+
if (acl_get_hal()->get_debug_verbosity &&
775+
acl_get_hal()->get_debug_verbosity() > 0) {
776+
printf("Previously processed buffer size is %d \n", kernel->processed_printf_buffer_size);
777+
}
778+
779+
// Check if we have already processed all the printf buffer
780+
if (size > (unsigned int) kernel->processed_printf_buffer_size){
781+
void* unprocessed_begin = (void*)((char*)kernel->printf_device_buffer->block_allocation->range.begin + kernel->processed_printf_buffer_size);
782+
hal_dma_fn(NULL, unprocessed_begin, buffer, size - kernel->processed_printf_buffer_size);
783+
dumped_buffer_size = size - kernel->processed_printf_buffer_size;
788784
} else{
789-
printf("Zibai Added, nothing to print \n");
785+
if (acl_get_hal()->get_debug_verbosity &&
786+
acl_get_hal()->get_debug_verbosity() > 0) {
787+
printf("All Printf() buffer has already been dumped \n");
788+
}
790789
return dumped_buffer_size;
791790
}
792791

793-
794-
//hal_dma_fn(NULL, kernel->printf_device_buffer->block_allocation->range.begin, buffer, size);
795-
796792
#ifdef DEBUG
797793
if (debug_mode > 0) {
798794
printf("acl_dump_printf_buffer at %p size=%d\n", buffer, size);
@@ -987,7 +983,7 @@ static size_t l_dump_printf_buffer(cl_event event, cl_kernel kernel,
987983
//
988984
// Schedule enqueue read buffer to read printf buffer
989985
// The activation ID is the device op ID.
990-
void acl_schedule_printf_buffer_pickup(int activation_id, int size, int debug_dump) {
986+
void acl_schedule_printf_buffer_pickup(int activation_id, int size, int debug_dump_printf) {
991987
acl_device_op_queue_t *doq = &(acl_platform.device_op_queue);
992988

993989
// This function can potentially be called by a HAL that does not use the
@@ -1010,10 +1006,11 @@ void acl_schedule_printf_buffer_pickup(int activation_id, int size, int debug_du
10101006
acl_device_op_t *op = doq->op + activation_id;
10111007
op->info.num_printf_bytes_pending = (cl_uint)size;
10121008

1013-
if (debug_dump == 1){
1014-
op->info.debug_dump = 1;
1009+
// Propagate the operation info
1010+
if (debug_dump_printf == 1){
1011+
op->info.debug_dump_printf = 1;
10151012
}else {
1016-
op->info.debug_dump = 0;
1013+
op->info.debug_dump_printf = 0;
10171014
}
10181015
}
10191016
// Signal all waiters.
@@ -1035,16 +1032,17 @@ void acl_process_printf_buffer(void *user_data, acl_device_op_t *op) {
10351032
cl_uint num_bytes = op->info.num_printf_bytes_pending;
10361033
size_t dumped_buffer_size = l_dump_printf_buffer(event, kernel, num_bytes);
10371034

1035+
if (op->info.debug_dump_printf == 1){
1036+
// Update the already processed buffer size
1037+
kernel->processed_printf_buffer_size += dumped_buffer_size;
1038+
} else{
1039+
// Full dump, reset this variable
1040+
kernel->processed_printf_buffer_size = 0;
1041+
}
1042+
10381043
// Mark this printf work as done. Must do this *before* unstalling
10391044
// the kernel, to avoid a race against the kernel filling up the
10401045
// buffer again.
1041-
// Zibai Testing
1042-
if (op->info.debug_dump == 1){
1043-
kernel->processed_buffer_size += dumped_buffer_size;
1044-
} else{
1045-
// Full dump, clear this global var
1046-
kernel->processed_buffer_size = 0;
1047-
}
10481046
op->info.num_printf_bytes_pending = 0;
10491047

10501048
// Ensure kernel IRQ doesn't race with us to update the
@@ -1053,7 +1051,7 @@ void acl_process_printf_buffer(void *user_data, acl_device_op_t *op) {
10531051

10541052
// Allow the kernel to continue running.
10551053
// We don't need to unstall the kernel during the early flushing during debug.
1056-
if (op->info.debug_dump == 0){
1054+
if (op->info.debug_dump_printf == 0){
10571055
acl_get_hal()->unstall_kernel(
10581056
event->cmd.info.ndrange_kernel.device->def.physical_device_id, op->id);
10591057
}

0 commit comments

Comments
 (0)