@@ -3968,7 +3968,7 @@ TEST(acl_kernel_reprogram_scheduler, switch_prog) {
3968
3968
// set MEM_MIGRATE2.1 to COMPLETE +
3969
3969
// set MEM_MIGRATE2.2 to RUNNING +
3970
3970
// set MEM_MIGRATE2.2 to COMPLETE +
3971
- // submit KERNEL2 to device = 5
3971
+ // submit KERNEL2 to device = 10
3972
3972
CHECK_EQUAL (offset + 15 , m_devlog.num_ops );
3973
3973
3974
3974
// Should have copied the memory over.
@@ -4332,6 +4332,180 @@ TEST(acl_kernel_reprogram_scheduler, switch_prog) {
4332
4332
CHECK_EQUAL (CL_SUCCESS, clReleaseCommandQueue (cq2));
4333
4333
}
4334
4334
4335
+ TEST (acl_kernel_reprogram_scheduler, device_global_reprogram) {
4336
+ // In this test, we will force the device to contain reprogram
4337
+ // device global. The device will be first reprogrammed eagerly
4338
+ // due to the clCreateProgramWithBinary call, then when the
4339
+ // kernel is enqueued, another reprogram should be scheduled
4340
+ // even though the device is already programmed with the right
4341
+ // binary, due to the presence of the device global.
4342
+
4343
+ // Force device to contain device global
4344
+ m_device->def .autodiscovery_def .device_global_mem_defs .insert (
4345
+ {" dev_glob1" ,
4346
+ {/* address */ 1024 ,
4347
+ /* size */ 1024 ,
4348
+ /* host_access */ ACL_DEVICE_GLOBAL_HOST_ACCESS_READ_WRITE,
4349
+ /* init_mode */ ACL_DEVICE_GLOBAL_INIT_MODE_REPROGRAM,
4350
+ /* implement_in_csr */ false }});
4351
+
4352
+ // Initial eager reprogram
4353
+ int offset = m_devlog.num_ops ;
4354
+ CHECK_EQUAL (3 , offset);
4355
+
4356
+ acl_device_program_info_t *dp0 = check_dev_prog (m_program0);
4357
+
4358
+ m_context->reprogram_buf_read_callback = read_mem_callback;
4359
+ m_context->reprogram_buf_write_callback = write_mem_callback;
4360
+
4361
+ // A device side buffer
4362
+ cl_int status = CL_INVALID_VALUE;
4363
+ cl_mem mem = clCreateBuffer (m_context, CL_MEM_READ_WRITE, 2048 , 0 , &status);
4364
+ CHECK_EQUAL (CL_SUCCESS, status);
4365
+ CHECK (mem);
4366
+ memset (mem->host_mem .aligned_ptr , ' X' , mem->size );
4367
+ memset (mem->block_allocation ->range .begin , ' x' , mem->size );
4368
+
4369
+ CHECK_EQUAL (1 , m_context->device_buffers_have_backing_store );
4370
+ CHECK_EQUAL (0 , mem->block_allocation ->region ->is_host_accessible );
4371
+ CHECK_EQUAL (0 , mem->writable_copy_on_host );
4372
+
4373
+ cl_kernel k = get_kernel (m_program0);
4374
+
4375
+ // Just the initial program load.
4376
+ CHECK_EQUAL (m_first_dev_bin, m_device->last_bin );
4377
+ CHECK_EQUAL (m_first_dev_bin, m_device->loaded_bin );
4378
+
4379
+ cl_event ue = get_user_event ();
4380
+ cl_event k_e = 0 ;
4381
+
4382
+ CHECK_EQUAL (CL_SUCCESS, clSetKernelArg (k, 0 , sizeof (cl_mem), &mem));
4383
+ CHECK_EQUAL (CL_SUCCESS, clSetKernelArg (k, 1 , sizeof (cl_mem), &mem));
4384
+ CHECK_EQUAL (CL_SUCCESS, clEnqueueTask (m_cq, k, 1 , &ue, &k_e));
4385
+ CHECK_EQUAL (CL_COMMAND_TASK, k_e->cmd .type );
4386
+
4387
+ // Only initial programming has occurred.
4388
+ // Has 3 transitions logged: SUBMITTED, RUNNING, COMPLETE
4389
+ CHECK_EQUAL (m_first_dev_bin, m_device->last_bin );
4390
+ CHECK_EQUAL (m_first_dev_bin, m_device->loaded_bin );
4391
+
4392
+ acl_print_debug_msg (" Forcing user event completion\n " );
4393
+ CHECK_EQUAL (CL_SUCCESS, clSetUserEventStatus (ue, CL_COMPLETE));
4394
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (ue));
4395
+
4396
+ // Should have recorded that we loaded the program.
4397
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4398
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4399
+
4400
+ // submit device global forced REPROGRAM +
4401
+ // set REPROGRAM to RUNNING +
4402
+ // set REPROGRAM to COMPLETE +
4403
+ // set MEM_MIGRATE 1 to RUNNING +
4404
+ // set MEM_MIGRATE 1 to COMPLETE +
4405
+ // set MEM_MIGRATE 2 to RUNNING +
4406
+ // set MEM_MIGRATE 2 to COMPLETE +
4407
+ // submit KERNEL = 8
4408
+ CHECK_EQUAL (offset + 8 , m_devlog.num_ops );
4409
+ const acl_device_op_t *op0submit = &(m_devlog.before [3 ]);
4410
+ const acl_device_op_t *op0running = &(m_devlog.before [4 ]);
4411
+ const acl_device_op_t *op0complete = &(m_devlog.before [5 ]);
4412
+
4413
+ // Device global forced reprogram
4414
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0submit->info .type );
4415
+ CHECK_EQUAL (0 , op0submit->id );
4416
+ CHECK (op0submit->info .event );
4417
+ CHECK_EQUAL (CL_SUBMITTED, op0submit->status );
4418
+ CHECK_EQUAL (0 , op0submit->info .num_printf_bytes_pending );
4419
+ CHECK_EQUAL (1 , op0submit->first_in_group );
4420
+ CHECK_EQUAL (0 , op0submit->last_in_group );
4421
+
4422
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0running->info .type );
4423
+ CHECK_EQUAL (0 , op0running->id );
4424
+ CHECK (op0running->info .event );
4425
+ CHECK_EQUAL (CL_RUNNING, op0running->status );
4426
+ CHECK_EQUAL (0 , op0running->info .num_printf_bytes_pending );
4427
+ CHECK_EQUAL (1 , op0running->first_in_group );
4428
+ CHECK_EQUAL (0 , op0running->last_in_group );
4429
+
4430
+ CHECK_EQUAL (ACL_DEVICE_OP_REPROGRAM, op0complete->info .type );
4431
+ CHECK_EQUAL (0 , op0complete->id );
4432
+ CHECK (op0complete->info .event );
4433
+ CHECK_EQUAL (CL_COMPLETE, op0complete->status );
4434
+ CHECK_EQUAL (0 , op0complete->info .num_printf_bytes_pending );
4435
+ CHECK_EQUAL (1 , op0complete->first_in_group );
4436
+ CHECK_EQUAL (0 , op0complete->last_in_group );
4437
+
4438
+ // The device is still programmed with the same program.
4439
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->last_bin );
4440
+ CHECK_EQUAL (&(dp0->device_binary ), m_device->loaded_bin );
4441
+
4442
+ const acl_device_op_t *op1submit = &(m_devlog.before [10 ]);
4443
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op1submit->info .type );
4444
+ CHECK_EQUAL (k_e, op1submit->info .event );
4445
+ CHECK_EQUAL (CL_SUBMITTED, op1submit->status );
4446
+ CHECK_EQUAL (0 , op1submit->info .num_printf_bytes_pending );
4447
+ CHECK_EQUAL (0 , op1submit->first_in_group ); // reprogram is first
4448
+ CHECK_EQUAL (1 , op1submit->last_in_group );
4449
+
4450
+ // The user-level event is linked to the kernel device op now.
4451
+ CHECK_EQUAL (op1submit->id , k_e->current_device_op ->id );
4452
+
4453
+ // Pretend to start the kernel
4454
+ acl_print_debug_msg (" Say kernel is running\n " );
4455
+ ACL_LOCKED (acl_receive_kernel_update (k_e->current_device_op ->id , CL_RUNNING));
4456
+ CHECK_EQUAL (CL_RUNNING, k_e->current_device_op ->execution_status );
4457
+
4458
+ ACL_LOCKED (acl_idle_update (m_context));
4459
+
4460
+ // Now we have a "running" transition
4461
+ CHECK_EQUAL (offset + 9 , m_devlog.num_ops );
4462
+ const acl_device_op_t *op2a = &(m_devlog.after [11 ]);
4463
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op2a->info .type );
4464
+ CHECK_EQUAL (k_e, op2a->info .event );
4465
+ CHECK_EQUAL (CL_RUNNING, op2a->status );
4466
+ CHECK_EQUAL (0 , op2a->info .num_printf_bytes_pending );
4467
+ CHECK_EQUAL (0 , op2a->first_in_group );
4468
+ CHECK_EQUAL (1 , op2a->last_in_group );
4469
+
4470
+ // The running status was propagated up to the user-level event.
4471
+ CHECK_EQUAL (CL_RUNNING, k_e->execution_status );
4472
+
4473
+ acl_print_debug_msg (" Say kernel is complete\n " );
4474
+ ACL_LOCKED (
4475
+ acl_receive_kernel_update (k_e->current_device_op ->id , CL_COMPLETE));
4476
+ CHECK_EQUAL (CL_COMPLETE, k_e->current_device_op ->execution_status );
4477
+
4478
+ ACL_LOCKED (acl_idle_update (m_context));
4479
+ // Now we have a "complete" transition
4480
+ CHECK_EQUAL (offset + 10 , m_devlog.num_ops );
4481
+ const acl_device_op_t *op3a = &(m_devlog.after [12 ]);
4482
+ CHECK_EQUAL (ACL_DEVICE_OP_KERNEL, op3a->info .type );
4483
+ CHECK_EQUAL (k_e, op3a->info .event );
4484
+ CHECK_EQUAL (CL_COMPLETE, op3a->status );
4485
+ CHECK_EQUAL (0 , op3a->info .num_printf_bytes_pending );
4486
+ CHECK_EQUAL (0 , op3a->first_in_group );
4487
+ CHECK_EQUAL (1 , op3a->last_in_group );
4488
+
4489
+ // Completion timestamp has propagated up to the user level event.
4490
+ CHECK_EQUAL (acl_platform.device_op_queue .op [op3a->id ].timestamp [CL_COMPLETE],
4491
+ k_e->timestamp [CL_COMPLETE]);
4492
+
4493
+ // Completion wipes out the downlink.
4494
+ CHECK_EQUAL (0 , k_e->current_device_op );
4495
+
4496
+ // And let go.
4497
+ // (Don't check for CL_INVALID_EVENT on a second release of each of
4498
+ // these events because the events might be reused.)
4499
+ CHECK_EQUAL (CL_SUCCESS, clReleaseMemObject (mem));
4500
+ CHECK_EQUAL (CL_SUCCESS, clReleaseEvent (k_e));
4501
+ CHECK_EQUAL (CL_SUCCESS, clReleaseKernel (k));
4502
+
4503
+ // Clean up device global
4504
+ m_device->def .autodiscovery_def .device_global_mem_defs .clear ();
4505
+
4506
+ acl_print_debug_msg (" DONE!\n " );
4507
+ }
4508
+
4335
4509
TEST (acl_kernel_reprogram_scheduler, use_host_buf_as_arg) {
4336
4510
// Must be able to use a host-side buffer as a kernel argument.
4337
4511
cl_int status = 0 ;
0 commit comments