diff --git a/src/acl_mem.cpp b/src/acl_mem.cpp index 39539fad..52979348 100644 --- a/src/acl_mem.cpp +++ b/src/acl_mem.cpp @@ -1521,6 +1521,10 @@ ACL_EXPORT CL_API_ENTRY cl_mem CL_API_CALL clCreateImageIntelFPGA( BAIL(local_errcode_ret); } + if (image_desc == NULL) { + BAIL_INFO(CL_INVALID_IMAGE_DESCRIPTOR, context, "image_desc is NULL"); + } + local_errcode_ret = clGetSupportedImageFormats( context, flags, image_desc->image_type, 0, NULL, &num_image_formats); if (local_errcode_ret != CL_SUCCESS) { @@ -2026,14 +2030,14 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA( "Pointer argument cannot be NULL"); } - if (image != NULL) { - src_element_size = acl_get_image_element_size( - image->context, image->fields.image_objs.image_format, &errcode_ret); - if (errcode_ret != CL_SUCCESS) { - return errcode_ret; - } - } else { - src_element_size = 0; + if (image == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + src_element_size = acl_get_image_element_size( + image->context, image->fields.image_objs.image_format, &errcode_ret); + if (errcode_ret != CL_SUCCESS) { + return errcode_ret; } tmp_src_offset[0] = origin[0]; @@ -2060,8 +2064,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImageIntelFPGA( image->fields.image_objs.image_desc->image_width * src_element_size; } - if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D && - image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY && + if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || + image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { tmp_slice_pitch = tmp_row_pitch; } else { @@ -2123,16 +2127,16 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA( size_t dst_element_size; std::scoped_lock lock{acl_mutex_wrapper}; - if (image != NULL) { - dst_element_size = acl_get_image_element_size( - image->context, image->fields.image_objs.image_format, &errcode_ret); - if (errcode_ret != CL_SUCCESS) { - return errcode_ret; - } - } else { + if (image == NULL) { return CL_INVALID_MEM_OBJECT; } + dst_element_size = acl_get_image_element_size( + image->context, image->fields.image_objs.image_format, &errcode_ret); + if (errcode_ret != CL_SUCCESS) { + return errcode_ret; + } + if (!acl_command_queue_is_valid(command_queue)) { return CL_INVALID_COMMAND_QUEUE; } @@ -2161,8 +2165,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImageIntelFPGA( image->fields.image_objs.image_desc->image_width * dst_element_size; } - if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D && - image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY && + if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || + image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { tmp_slice_pitch = tmp_row_pitch; } else { @@ -2227,16 +2231,16 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( cl_event tmp_event; std::scoped_lock lock{acl_mutex_wrapper}; - if (image != NULL) { - dst_element_size = acl_get_image_element_size( - image->context, image->fields.image_objs.image_format, &errcode_ret); - if (errcode_ret != CL_SUCCESS) { - return errcode_ret; - } - } else { + if (image == NULL) { return CL_INVALID_MEM_OBJECT; } + dst_element_size = acl_get_image_element_size( + image->context, image->fields.image_objs.image_format, &errcode_ret); + if (errcode_ret != CL_SUCCESS) { + return errcode_ret; + } + if (!acl_command_queue_is_valid(command_queue)) { return CL_INVALID_COMMAND_QUEUE; } @@ -2351,8 +2355,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImageIntelFPGA( region[0] * dst_element_size; // Width of each row of the memory that ptr points to. - if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D && - image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY && + if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || + image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { tmp_slice_pitch = tmp_row_pitch; } else { @@ -2554,8 +2558,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBufferIntelFPGA( tmp_row_pitch = src_image->fields.image_objs.image_desc->image_width * src_element_size; - if (src_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D && - src_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY && + if (src_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || + src_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || src_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { tmp_slice_pitch = tmp_row_pitch; } else { @@ -2649,8 +2653,8 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImageIntelFPGA( tmp_row_pitch = dst_image->fields.image_objs.image_desc->image_width * dst_element_size; - if (dst_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D && - dst_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY && + if (dst_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || + dst_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || dst_image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { tmp_slice_pitch = tmp_row_pitch; } else { @@ -2706,7 +2710,7 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( cl_int status; size_t element_size; size_t tmp_row_pitch; - size_t tmp_slice_pitch; + size_t tmp_slice_pitch = 0; std::scoped_lock lock{acl_mutex_wrapper}; if (image != NULL) { @@ -2767,19 +2771,22 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapImageIntelFPGA( image_slice_pitch == NULL) { BAIL_INFO(CL_INVALID_VALUE, command_queue->context, "Invalid slice pitch provided"); - } else { - if (image->mem_object_type == CL_MEM_OBJECT_IMAGE2D || - image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || - image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { - if (image_slice_pitch != NULL) { - *image_slice_pitch = 0; - } - } else if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { - *image_slice_pitch = tmp_row_pitch; - } else { - *image_slice_pitch = - image->fields.image_objs.image_desc->image_height * tmp_row_pitch; + } + + if (image->mem_object_type == CL_MEM_OBJECT_IMAGE2D || + image->mem_object_type == CL_MEM_OBJECT_IMAGE1D || + image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + if (image_slice_pitch != NULL) { + *image_slice_pitch = 0; } + } else if (image->mem_object_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + *image_slice_pitch = tmp_row_pitch; + } else { + *image_slice_pitch = + image->fields.image_objs.image_desc->image_height * tmp_row_pitch; + } + + if (image_slice_pitch != NULL) { tmp_slice_pitch = *image_slice_pitch; } @@ -2954,9 +2961,9 @@ CL_API_ENTRY void *CL_API_CALL clEnqueueMapBufferIntelFPGA( ~(CL_MAP_READ | CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { BAIL_INFO(CL_INVALID_VALUE, context, "Invalid or unsupported flags"); } - if (((map_flags & CL_MAP_READ) & + if (((map_flags & CL_MAP_READ) && (map_flags & CL_MAP_WRITE_INVALIDATE_REGION)) || - ((map_flags & CL_MAP_WRITE) & + ((map_flags & CL_MAP_WRITE) && (map_flags & CL_MAP_WRITE_INVALIDATE_REGION))) { BAIL_INFO( CL_INVALID_VALUE, context, @@ -4062,7 +4069,6 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( cl_event local_event = 0; unsigned int physical_id; unsigned int mem_id; - int *needs_release_on_fail; std::scoped_lock lock{acl_mutex_wrapper}; @@ -4107,10 +4113,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( // Try to reserve space for all the buffers to be moved. If we fail, we need // to know which buffers to deallocate: - needs_release_on_fail = (int *)malloc(sizeof(int) * num_mem_objects); - for (i = 0; i < num_mem_objects; ++i) { - needs_release_on_fail[i] = 0; - } + std::vector needs_release_on_fail(num_mem_objects, false); status = CL_SUCCESS; for (i = 0; i < num_mem_objects; ++i) { @@ -4125,7 +4128,7 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( status = CL_MEM_OBJECT_ALLOCATION_FAILURE; break; } - needs_release_on_fail[i] = 1; + needs_release_on_fail[i] = true; } mem_objects[i]->reserved_allocations_count[physical_id][mem_id]++; } @@ -4141,7 +4144,6 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( } mem_objects[i]->reserved_allocations_count[physical_id][mem_id]--; } - free(needs_release_on_fail); return status; } @@ -4152,7 +4154,6 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( CL_COMMAND_MIGRATE_MEM_OBJECTS, &local_event); if (status != CL_SUCCESS) { - free(needs_release_on_fail); return status; // already signalled callback } @@ -4198,8 +4199,6 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA( acl_idle_update(command_queue->context); // Clean up early } - free(needs_release_on_fail); - return CL_SUCCESS; } @@ -6820,12 +6819,12 @@ static void acl_dump_mem_internal(cl_mem mem) { (mem->block_allocation->region->uses_host_system_malloc ? "is malloc" : "not malloc")); + printf(" .begin %p\n", + mem->block_allocation->range.begin); + printf(" .end %p\n", + mem->block_allocation->range.next); } printf(" .mappings %d\n", mem->mapping_count); - printf(" .begin %p\n", - mem->block_allocation->range.begin); - printf(" .end %p\n", - mem->block_allocation->range.next); acl_print_debug_msg(" .size %lu\n", mem->size); printf(" .host_ptr %p\n", mem->fields.buffer_objs.host_ptr); diff --git a/test/acl_mem_test.cpp b/test/acl_mem_test.cpp index 2ca3cd40..d38d54ee 100644 --- a/test/acl_mem_test.cpp +++ b/test/acl_mem_test.cpp @@ -2270,6 +2270,65 @@ MT_TEST(acl_mem, read_write_image) { size_t origin[3]; size_t region[3]; + // 1D Image + { + cl_image_format image_format_1D = {CL_R, CL_FLOAT}; + cl_image_desc image_desc_1D = { + CL_MEM_OBJECT_IMAGE1D, width, 0, 0, 0, 0, 0, 0, 0, {NULL}}; + image1 = clCreateImage(m_context, 0, &image_format_1D, &image_desc_1D, 0, + &status); + CHECK_EQUAL(CL_SUCCESS, status); + image2 = clCreateImage(m_context, 0, &image_format_1D, &image_desc_1D, 0, + &status); + CHECK_EQUAL(CL_SUCCESS, status); + + origin[0] = 0; + origin[1] = 0; + origin[2] = 0; + + region[0] = width; + region[1] = 1; + region[2] = 1; + + input_ptr = (float *)acl_malloc(sizeof(float) * (width)); + CHECK(input_ptr != 0); + output_ptr = (float *)acl_malloc(sizeof(float) * (width)); + CHECK(output_ptr != 0); + for (size_t row = 0; row < width; ++row) { + input_ptr[row] = (float)row + (float)1.5; + output_ptr[row] = 0.0; + } + + status = clEnqueueWriteImage(m_cq, image1, CL_TRUE, origin, region, 0, 0, + input_ptr, 0, NULL, NULL); + CHECK_EQUAL(CL_SUCCESS, status); + + buffer1 = clCreateBuffer(m_context, 0, sizeof(float) * width, 0, &status); + CHECK_EQUAL(CL_SUCCESS, status); + + // Copy from 1D image to buffer + status = clEnqueueCopyImageToBuffer(m_cq, image1, buffer1, origin, region, + 0, 0, NULL, NULL); + CHECK_EQUAL(CL_SUCCESS, status); + status = clEnqueueCopyBufferToImage(m_cq, buffer1, image2, 0, origin, + region, 0, NULL, NULL); + CHECK_EQUAL(CL_SUCCESS, status); + status = clEnqueueReadImage(m_cq, image2, CL_TRUE, origin, region, 0, 0, + output_ptr, 0, NULL, NULL); + CHECK_EQUAL(CL_SUCCESS, status); + + // Compare contents of first pointer with second pointer + for (size_t row = 0; row < width; ++row) { + CHECK(input_ptr[row] == output_ptr[row]); + } + + CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(image1)); + CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(image2)); + CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(buffer1)); + acl_free(input_ptr); + acl_free(output_ptr); + } + // 2D Image cl_image_format image_format = {CL_R, CL_FLOAT}; cl_image_desc image_desc = {