Skip to content

Commit 8cfe0db

Browse files
committed
Device global Dedicated interface Runtime changes
1. Added new command ops and device ops for the clEnqueueReadGlobalVariableINTEL and clEnqueueWriteGlobalVariableINTEL. 2. Both ops are mainly calling the new MMD read/write functions in the simulator MMD. 3. Added a brief unit test for testing the enqueue functionality.
1 parent dd1af57 commit 8cfe0db

16 files changed

+983
-51
lines changed

include/CL/cl_ext.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,6 +2442,67 @@ clEnqueueWriteHostPipeINTEL(
24422442
const cl_event* event_wait_list,
24432443
cl_event* event) CL_API_SUFFIX__VERSION_1_0;
24442444

2445+
2446+
/**********************************
2447+
* cl_intel_global_variable_access *
2448+
***********************************/
2449+
2450+
#define CL_COMMAND_READ_GLOBAL_VARIABLE_INTEL 0x418E
2451+
#define CL_COMMAND_WRITE_GLOBAL_VARIABLE_INTEL 0x418F
2452+
2453+
extern CL_API_ENTRY cl_int CL_API_CALL
2454+
clEnqueueReadGlobalVariableINTEL(
2455+
cl_command_queue command_queue,
2456+
cl_program program,
2457+
const char* name,
2458+
cl_bool blocking_read,
2459+
size_t size,
2460+
size_t offset,
2461+
void* ptr,
2462+
cl_uint num_events_in_wait_list,
2463+
const cl_event* event_wait_list,
2464+
cl_event* event) CL_API_SUFFIX__VERSION_1_0;
2465+
2466+
2467+
typedef cl_int (CL_API_CALL *
2468+
clEnqueueReadGlobalVariableINTEL_fn)(
2469+
cl_command_queue command_queue,
2470+
cl_program program,
2471+
const char* name,
2472+
cl_bool blocking_read,
2473+
size_t size,
2474+
size_t offset,
2475+
const void* ptr,
2476+
cl_uint num_events_in_wait_list,
2477+
const cl_event* event_wait_list,
2478+
cl_event* event) CL_API_SUFFIX__VERSION_1_0;
2479+
2480+
extern CL_API_ENTRY cl_int CL_API_CALL
2481+
clEnqueueWriteGlobalVariableINTEL(
2482+
cl_command_queue command_queue,
2483+
cl_program program,
2484+
const char* name,
2485+
cl_bool blocking_write,
2486+
size_t size,
2487+
size_t offset,
2488+
const void* ptr,
2489+
cl_uint num_events_in_wait_list,
2490+
const cl_event* event_wait_list,
2491+
cl_event* event) CL_API_SUFFIX__VERSION_1_0;
2492+
2493+
typedef cl_int (CL_API_CALL *
2494+
clEnqueueWriteGlobalVariableINTEL_fn)(
2495+
cl_command_queue command_queue,
2496+
cl_program program,
2497+
const char* name,
2498+
cl_bool blocking_read,
2499+
size_t size,
2500+
size_t offset,
2501+
void* ptr,
2502+
cl_uint num_events_in_wait_list,
2503+
const cl_event* event_wait_list,
2504+
cl_event* event) CL_API_SUFFIX__VERSION_1_0;
2505+
24452506
/******************************************
24462507
* cl_intel_mem_channel_property extension *
24472508
*******************************************/

include/acl_hal.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,15 @@ typedef struct {
256256

257257
size_t (*write_csr)(unsigned int physical_device_id, uintptr_t offset,
258258
const void *ptr, size_t size);
259+
260+
/// device global read and write function pointers
261+
int (*simulation_device_global_interface_read)(
262+
unsigned int physical_device_id, const char *interface_name,
263+
void *host_addr, size_t dev_addr, size_t size);
264+
int (*simulation_device_global_interface_write)(
265+
unsigned int physical_device_id, const char *interface_name,
266+
const void *host_addr, size_t dev_addr, size_t size);
267+
259268
} acl_hal_t;
260269

261270
/// Linked list of MMD library names to load.

include/acl_hal_mmd.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ typedef struct {
155155
// has been written by the runtime.
156156
void (*aocl_mmd_simulation_set_kernel_cra_address_map)(
157157
int handle, const std::vector<uintptr_t> &kernel_csr_address_map);
158+
159+
// Read and Write from/into the specific device global address. They are only
160+
// supported on the simulator device for now.
161+
int (*aocl_mmd_simulation_device_global_interface_read)(
162+
int handle, const char *interface_name, void *host_addr, size_t dev_addr,
163+
size_t size);
164+
int (*aocl_mmd_simulation_device_global_interface_write)(
165+
int handle, const char *interface_name, const void *host_addr,
166+
size_t dev_addr, size_t size);
158167
} acl_mmd_dispatch_t;
159168

160169
typedef struct {

include/acl_mem.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,17 @@ cl_bool acl_is_sub_or_parent_buffer(cl_mem mem);
9393
void CL_CALLBACK acl_free_allocation_after_event_completion(
9494
cl_event event, cl_int event_command_exec_status, void *callback_data);
9595

96+
// Submit a device global read operation to the device op queue
97+
cl_int acl_submit_read_device_global_device_op(cl_event event);
98+
// Submit a device global write device operation to the device op queue
99+
cl_int acl_submit_write_device_global_device_op(cl_event event);
100+
101+
// Read from a device global
102+
void acl_read_device_global(void *user_data, acl_device_op_t *op);
103+
104+
// Write into a device global
105+
void acl_write_device_global(void *user_data, acl_device_op_t *op);
106+
96107
#ifdef __GNUC__
97108
#pragma GCC visibility pop
98109
#endif

include/acl_types.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,17 @@ typedef struct {
613613
// constructor malloc related
614614
} host_pipe_info;
615615

616+
struct {
617+
// Used for device global ops
618+
size_t offset;
619+
void *read_ptr;
620+
const void *write_ptr;
621+
uint64_t device_global_addr;
622+
const char *name;
623+
size_t size;
624+
unsigned int physical_device_id;
625+
} device_global_info;
626+
616627
// Reprogram the device, without an associated kernel enqueue.
617628
// This is used to hide the latency of device programming on host
618629
// program startup.
@@ -1327,6 +1338,10 @@ typedef enum {
13271338
// Progrgam based hostpipe read or write
13281339
ACL_DEVICE_OP_HOSTPIPE_READ,
13291340
ACL_DEVICE_OP_HOSTPIPE_WRITE,
1341+
1342+
// Device Global read or write
1343+
ACL_DEVICE_OP_DEVICE_GLOBAL_READ,
1344+
ACL_DEVICE_OP_DEVICE_GLOBAL_WRITE,
13301345
ACL_NUM_DEVICE_OP_TYPES
13311346

13321347
} acl_device_op_type_t;
@@ -1359,6 +1374,9 @@ typedef enum {
13591374
ACL_CONFLICT_HOSTPIPE_WRITE // Acts like a hostpipe write from the host
13601375
// channel
13611376
,
1377+
ACL_CONFLICT_DEVICE_GLOBAL_READ, // Acts like a Device Global Read
1378+
ACL_CONFLICT_DEVICE_GLOBAL_WRITE, // Acts likes a Device Global Write
1379+
13621380
ACL_NUM_CONFLICT_TYPES
13631381
} acl_device_op_conflict_type_t;
13641382

@@ -1575,6 +1593,8 @@ typedef struct acl_device_op_queue_t {
15751593
void (*log_update)(void *, acl_device_op_t *, int new_status);
15761594
void (*hostpipe_read)(void *, acl_device_op_t *);
15771595
void (*hostpipe_write)(void *, acl_device_op_t *);
1596+
void (*device_global_read)(void *, acl_device_op_t *);
1597+
void (*device_global_write)(void *, acl_device_op_t *);
15781598
void *user_data; // The first argument provided to the callbacks.
15791599

15801600
} acl_device_op_queue_t;

src/acl_command.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,14 @@ int acl_submit_command(cl_event event) {
380380
result = acl_submit_write_program_hostpipe_device_op(event);
381381
break;
382382

383+
case CL_COMMAND_READ_GLOBAL_VARIABLE_INTEL:
384+
result = acl_submit_read_device_global_device_op(event);
385+
break;
386+
387+
case CL_COMMAND_WRITE_GLOBAL_VARIABLE_INTEL:
388+
result = acl_submit_write_device_global_device_op(event);
389+
break;
390+
383391
default:
384392
acl_print_debug_msg(" acl_submit_command: unknown cmd type %d\n",
385393
event->cmd.type);

src/acl_device_op.cpp

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,55 +109,65 @@ static unsigned char conflict_matrix_half_duplex
109109
[ACL_NUM_CONFLICT_TYPES][ACL_NUM_CONFLICT_TYPES] = {
110110

111111
// NONE, MEM_READ, MEM_WRITE, MEM_RW, KERNEL,
112-
// PROGRAM, HOSTPIPE_READ, HOSTPIPE_WRITE
112+
// PROGRAM, HOSTPIPE_READ, HOSTPIPE_WRITE,
113+
// DEVICE_GLOBAL_READ, DEVICE_GLOBAL_WRITE
113114
// NONE vs.
114-
{0, 0, 0, 0, 0, 1, 0, 0}
115+
{0, 0, 0, 0, 0, 1, 0, 0, 0, 0}
115116
// MEM_READ vs.
116117
,
117-
{0, 1, 1, 1, 0, 1, 1, 1}
118+
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1}
118119
// MEM_WRITE vs.
119120
,
120-
{0, 1, 1, 1, 0, 1, 1, 1}
121+
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1}
121122
// MEM_RW vs.
122123
,
123-
{0, 1, 1, 1, 0, 1, 1, 1}
124+
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1}
124125
// KERNEL vs.
125126
,
126-
{0, 0, 0, 0, 0, 1, 0, 0}
127+
{0, 0, 0, 0, 0, 1, 0, 0, 0, 0}
127128
// PROGRAM vs.
128129
,
129-
{1, 1, 1, 1, 1, 1, 1, 1},
130+
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
130131
// HOSTPIPE_READ vs.
131-
{0, 1, 1, 1, 0, 1, 0, 0},
132+
{0, 1, 1, 1, 0, 1, 0, 0, 1, 1},
132133
// HOSTPIPE_WRITE vs.
133-
{0, 1, 1, 1, 0, 1, 0, 0}};
134+
{0, 1, 1, 1, 0, 1, 0, 0, 1, 1},
135+
// DEVICE_GLOBAL_READ vs.
136+
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1},
137+
// DEVICE_GLOBAL_WRITE vs.
138+
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1}};
134139

135140
static unsigned char conflict_matrix_full_duplex
136141
[ACL_NUM_CONFLICT_TYPES][ACL_NUM_CONFLICT_TYPES] = {
137142

138143
// NONE, MEM_READ, MEM_WRITE, MEM_RW, KERNEL,
139144
// PROGRAM, HOSTPIPE_READ, HOSTPIPE_WRITE
145+
// DEVICE_GLOBAL_READ, DEVICE_GLOBAL_WRITE
140146
// NONE vs.
141-
{0, 0, 0, 0, 0, 1, 0, 0}
147+
{0, 0, 0, 0, 0, 1, 0, 0, 0, 0}
142148
// MEM_READ vs.
143149
,
144-
{0, 1, 0, 1, 0, 1, 1, 1}
150+
{0, 1, 0, 1, 0, 1, 1, 1, 1, 0}
145151
// MEM_WRITE vs.
146152
,
147-
{0, 0, 1, 1, 0, 1, 1, 1}
153+
{0, 0, 1, 1, 0, 1, 1, 1, 0, 1}
148154
// MEM_RW vs.
149155
,
150-
{0, 1, 1, 1, 0, 1, 1, 1}
156+
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1}
151157
// KERNEL vs.
152158
,
153-
{0, 0, 0, 0, 0, 1, 0, 0}
159+
{0, 0, 0, 0, 0, 1, 0, 0, 0, 0}
154160
// PROGRAM vs.
155161
,
156-
{1, 1, 1, 1, 1, 1, 1, 1},
162+
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
157163
// HOSTPIPE_READ vs.
158-
{0, 1, 1, 1, 0, 1, 0, 0},
164+
{0, 1, 1, 1, 0, 1, 0, 0, 0, 0},
159165
// HOSTPIPE_WRITE vs.
160-
{0, 1, 1, 1, 0, 1, 0, 0}};
166+
{0, 1, 1, 1, 0, 1, 0, 0, 0, 0},
167+
// DEVICE_GLOBAL_READ vs.
168+
{0, 1, 0, 1, 0, 1, 1, 1, 1, 0},
169+
// DEVICE_GLOBAL_WRITE vs.
170+
{0, 0, 1, 1, 0, 1, 1, 1, 0, 1}};
161171

162172
static const char *l_type_name(int op_type) {
163173
switch (op_type) {
@@ -191,6 +201,12 @@ static const char *l_type_name(int op_type) {
191201
case ACL_DEVICE_OP_HOSTPIPE_WRITE:
192202
return "HOSTPIPE_WRITE";
193203
break;
204+
case ACL_DEVICE_OP_DEVICE_GLOBAL_READ:
205+
return "DEVICE_GLOBAL_READ";
206+
break;
207+
case ACL_DEVICE_OP_DEVICE_GLOBAL_WRITE:
208+
return "DEVICE_GLOBAL_WRITE";
209+
break;
194210
default:
195211
return "<err>";
196212
break;
@@ -279,6 +295,8 @@ void acl_init_device_op_queue_limited(acl_device_op_queue_t *doq,
279295
doq->usm_memcpy = acl_usm_memcpy;
280296
doq->hostpipe_read = acl_read_program_hostpipe;
281297
doq->hostpipe_write = acl_write_program_hostpipe;
298+
doq->device_global_read = acl_read_device_global;
299+
doq->device_global_write = acl_write_device_global;
282300
doq->log_update = 0;
283301

284302
for (i = 0; i < ACL_MAX_DEVICE; i++) {
@@ -334,6 +352,12 @@ acl_device_op_conflict_type_t acl_device_op_conflict_type(acl_device_op_t *op) {
334352
case ACL_DEVICE_OP_HOSTPIPE_WRITE:
335353
result = ACL_CONFLICT_HOSTPIPE_WRITE;
336354
break;
355+
case ACL_DEVICE_OP_DEVICE_GLOBAL_READ:
356+
result = ACL_CONFLICT_DEVICE_GLOBAL_READ;
357+
break;
358+
case ACL_DEVICE_OP_DEVICE_GLOBAL_WRITE:
359+
result = ACL_CONFLICT_DEVICE_GLOBAL_WRITE;
360+
break;
337361
case ACL_DEVICE_OP_NONE:
338362
case ACL_NUM_DEVICE_OP_TYPES:
339363
result = ACL_CONFLICT_NONE;
@@ -630,6 +654,15 @@ l_get_devices_affected_for_op(acl_device_op_t *op, unsigned int physical_ids[],
630654
num_devices_affected = 1;
631655
}
632656
break;
657+
case ACL_DEVICE_OP_DEVICE_GLOBAL_READ:
658+
case ACL_DEVICE_OP_DEVICE_GLOBAL_WRITE:
659+
if (acl_event_is_valid(event) &&
660+
acl_command_queue_is_valid(event->command_queue)) {
661+
physical_ids[0] = event->command_queue->device->def.physical_device_id;
662+
conflicts[0] = acl_device_op_conflict_type(op);
663+
num_devices_affected = 1;
664+
}
665+
break;
633666
case ACL_DEVICE_OP_NONE:
634667
case ACL_NUM_DEVICE_OP_TYPES:
635668
break;
@@ -995,7 +1028,9 @@ unsigned l_update_device_op_queue_once(acl_device_op_queue_t *doq) {
9951028
op->info.type == ACL_DEVICE_OP_MEM_TRANSFER_WRITE ||
9961029
op->info.type == ACL_DEVICE_OP_MEM_TRANSFER_COPY ||
9971030
op->info.type == ACL_DEVICE_OP_HOSTPIPE_READ ||
998-
op->info.type == ACL_DEVICE_OP_HOSTPIPE_WRITE)) {
1031+
op->info.type == ACL_DEVICE_OP_HOSTPIPE_WRITE ||
1032+
op->info.type == ACL_DEVICE_OP_DEVICE_GLOBAL_READ ||
1033+
op->info.type == ACL_DEVICE_OP_DEVICE_GLOBAL_WRITE)) {
9991034
if (!acl_mem_op_requires_transfer(op->info.event->cmd)) {
10001035
is_conflicting = 0;
10011036
}
@@ -1347,6 +1382,12 @@ void acl_submit_device_op(acl_device_op_queue_t *doq, acl_device_op_t *op) {
13471382
case ACL_DEVICE_OP_HOSTPIPE_WRITE:
13481383
DOIT(hostpipe_write, op);
13491384
break;
1385+
case ACL_DEVICE_OP_DEVICE_GLOBAL_READ:
1386+
DOIT(device_global_read, op);
1387+
break;
1388+
case ACL_DEVICE_OP_DEVICE_GLOBAL_WRITE:
1389+
DOIT(device_global_write, op);
1390+
break;
13501391
default:
13511392
break;
13521393
}

src/acl_event.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,10 @@ static void l_release_command_resources(acl_command_info_t &cmd) {
654654
case CL_COMMAND_WRITE_HOST_PIPE_INTEL:
655655
// Nothing to cleanup
656656
break;
657+
case CL_COMMAND_READ_GLOBAL_VARIABLE_INTEL:
658+
case CL_COMMAND_WRITE_GLOBAL_VARIABLE_INTEL:
659+
// Cleanup is handled by the host free.
660+
break;
657661
default:
658662
break;
659663
}

src/acl_hal_mmd.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,13 @@ size_t acl_hal_mmd_read_csr(unsigned int physical_device_id, uintptr_t offset,
171171
size_t acl_hal_mmd_write_csr(unsigned int physical_device_id, uintptr_t offset,
172172
const void *ptr, size_t size);
173173

174+
int acl_hal_mmd_simulation_device_global_interface_read(
175+
unsigned int physical_device_id, const char *interface_name,
176+
void *host_addr, size_t dev_addr, size_t size);
177+
int acl_hal_mmd_simulation_device_global_interface_write(
178+
unsigned int physical_device_id, const char *interface_name,
179+
const void *host_addr, size_t dev_addr, size_t size);
180+
174181
static size_t acl_kernel_if_read(acl_bsp_io *io, dev_addr_t src, char *dest,
175182
size_t size);
176183
static size_t acl_kernel_if_write(acl_bsp_io *io, dev_addr_t dest,
@@ -367,6 +374,8 @@ static acl_hal_t acl_hal_mmd = {
367374
acl_hal_mmd_simulation_set_kernel_cra_address_map, // simulation_set_kernel_cra_address_map
368375
acl_hal_mmd_read_csr, // read_csr
369376
acl_hal_mmd_write_csr, // write_csr
377+
acl_hal_mmd_simulation_device_global_interface_read, // simulation_device_global_interface_read
378+
acl_hal_mmd_simulation_device_global_interface_write, // simulation_device_global_interface_write
370379
};
371380

372381
// This will contain the device physical id to tell us which device across all
@@ -2891,3 +2900,21 @@ size_t acl_hal_mmd_write_csr(unsigned int physical_device_id, uintptr_t offset,
28912900
device_info[physical_device_id].handle, NULL, size, (const void *)ptr,
28922901
kernel_interface, (size_t)offset);
28932902
}
2903+
2904+
int acl_hal_mmd_simulation_device_global_interface_read(
2905+
unsigned int physical_device_id, const char *interface_name,
2906+
void *host_addr, size_t dev_addr, size_t size) {
2907+
return device_info[physical_device_id]
2908+
.mmd_dispatch->aocl_mmd_simulation_device_global_interface_read(
2909+
device_info[physical_device_id].handle, interface_name, host_addr,
2910+
dev_addr, size);
2911+
}
2912+
2913+
int acl_hal_mmd_simulation_device_global_interface_write(
2914+
unsigned int physical_device_id, const char *interface_name,
2915+
const void *host_addr, size_t dev_addr, size_t size) {
2916+
return device_info[physical_device_id]
2917+
.mmd_dispatch->aocl_mmd_simulation_device_global_interface_write(
2918+
device_info[physical_device_id].handle, interface_name, host_addr,
2919+
dev_addr, size);
2920+
}

0 commit comments

Comments
 (0)