From 8f3027387a18bb0dbf0c298b8efdf617b7085833 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 18 May 2023 09:29:27 +0100 Subject: Fix some Wformat size_t warnings (#1726) Printing a `size_t` requires the `%zu` specifier. Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_get_linear_ids.cpp | 14 +++++++++----- .../cl_khr_command_buffer/basic_command_buffer.h | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp index 3496fd0b..ee7dfb2f 100644 --- a/test_conformance/basic/test_get_linear_ids.cpp +++ b/test_conformance/basic/test_get_linear_ids.cpp @@ -104,15 +104,19 @@ test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue qu switch (dims) { case 1: - log_info(" testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]); + log_info(" testing offset=%zu global=%zu local=%zu...\n", gwo[0], + gws[0], lws[0]); break; case 2: - log_info(" testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n", - gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]); + log_info(" testing offset=(%zu,%zu) global=(%zu,%zu) " + "local=(%zu,%zu)...\n", + gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]); break; case 3: - log_info(" testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n", - gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]); + log_info(" testing offset=(%zu,%zu,%zu) global=(%zu,%zu,%zu) " + "local=(%zu,%zu,%zu)...\n", + gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], + lws[1], lws[2]); break; } diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index a20229e0..b1d36024 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -28,7 +28,7 @@ { \ if (reference != result) \ { \ - log_error("Expected %d was %d at index %u\n", reference, result, \ + log_error("Expected %d was %d at index %zu\n", reference, result, \ index); \ return TEST_FAIL; \ } \ -- cgit v1.2.3 From 4f62adf1ca2cadc864dac844e643a20aa8b76b29 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 23 May 2023 09:43:25 +0100 Subject: computeinfo: fix use of uninitialized config_size_ret (#1727) The variable `config_size_ret` is only assigned to inside the `if`. If the condition is false, we would read uninitialized data. Signed-off-by: Sven van Haastregt --- test_conformance/computeinfo/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp index 9cecabea..b1d73af3 100644 --- a/test_conformance/computeinfo/main.cpp +++ b/test_conformance/computeinfo/main.cpp @@ -439,8 +439,8 @@ int getPlatformConfigInfo(cl_platform_id platform, config_info* info) err = clGetPlatformInfo(platform, info->opcode, config_size_set, &info->config.cl_name_version_single, &config_size_ret); + size_err = config_size_set != config_size_ret; } - size_err = config_size_set != config_size_ret; break; default: log_error("Unknown config type: %d\n", info->config_type); @@ -585,8 +585,8 @@ int getConfigInfo(cl_device_id device, config_info* info) err = clGetDeviceInfo(device, info->opcode, config_size_set, &info->config.cl_name_version_single, &config_size_ret); + size_err = config_size_set != config_size_ret; } - size_err = config_size_set != config_size_ret; break; default: log_error("Unknown config type: %d\n", info->config_type); -- cgit v1.2.3 From 957e3b398500c5436283300b56d74466cfc36338 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 24 May 2023 11:11:23 +0100 Subject: Convert some if-else chains to switch statements (#1730) All of these if-else chains compare against enums, which is better done using switch statements. This helps avoid some `-Wsometimes-uninitialized` warnings of variables that are assigned inside the switch. Signed-off-by: Sven van Haastregt --- .../allocations/allocation_execute.cpp | 36 +++++--- test_conformance/api/test_null_buffer_arg.cpp | 25 +++--- test_conformance/images/clCopyImage/test_loops.cpp | 98 ++++++++++------------ test_conformance/images/clFillImage/test_loops.cpp | 55 ++++++------ 4 files changed, 110 insertions(+), 104 deletions(-) diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp index 9d0e8777..5a77c3a7 100644 --- a/test_conformance/allocations/allocation_execute.cpp +++ b/test_conformance/allocations/allocation_execute.cpp @@ -79,20 +79,30 @@ int check_image(cl_command_queue queue, cl_mem mem) { return -1; } - if (type == CL_MEM_OBJECT_BUFFER) { - log_error("Expected image object, not buffer.\n"); - return -1; - } else if (type == CL_MEM_OBJECT_IMAGE2D) { - error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL); - if (error) { - print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH."); - return -1; - } - error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL); - if (error) { - print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT."); + switch (type) + { + case CL_MEM_OBJECT_BUFFER: + log_error("Expected image object, not buffer.\n"); return -1; - } + case CL_MEM_OBJECT_IMAGE2D: + error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, + NULL); + if (error) + { + print_error(error, + "clGetMemObjectInfo failed for CL_IMAGE_WIDTH."); + return -1; + } + error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), + &height, NULL); + if (error) + { + print_error(error, + "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT."); + return -1; + } + break; + default: log_error("unexpected object type"); return -1; } diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp index 75bdd479..83fcb636 100644 --- a/test_conformance/api/test_null_buffer_arg.cpp +++ b/test_conformance/api/test_null_buffer_arg.cpp @@ -64,16 +64,21 @@ static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel, cl_int status; const char *typestr; - if (type == NON_NULL_PATH) { - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); - typestr = "non-NULL"; - } else if (type == ADDROF_NULL_PATH) { - test_buf = NULL; - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); - typestr = "&NULL"; - } else if (type == NULL_PATH) { - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL); - typestr = "NULL"; + switch (type) + { + case NON_NULL_PATH: + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); + typestr = "non-NULL"; + break; + case ADDROF_NULL_PATH: + test_buf = NULL; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); + typestr = "&NULL"; + break; + case NULL_PATH: + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL); + typestr = "NULL"; + break; } log_info("Testing setKernelArgs with %s buffer.\n", typestr); diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp index 6ee1e536..e839cfdf 100644 --- a/test_conformance/images/clCopyImage/test_loops.cpp +++ b/test_conformance/images/clCopyImage/test_loops.cpp @@ -41,60 +41,52 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q } } - if( testMethod == k1D ) + switch (testMethod) { - name = "1D -> 1D"; - imageType = CL_MEM_OBJECT_IMAGE1D; - } - else if( testMethod == k2D ) - { - name = "2D -> 2D"; - imageType = CL_MEM_OBJECT_IMAGE2D; - } - else if( testMethod == k3D ) - { - name = "3D -> 3D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k1DArray ) - { - name = "1D array -> 1D array"; - imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; - } - else if( testMethod == k2DArray ) - { - name = "2D array -> 2D array"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - } - else if( testMethod == k2DTo3D ) - { - name = "2D -> 3D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k3DTo2D ) - { - name = "3D -> 2D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k2DArrayTo2D ) - { - name = "2D array -> 2D"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - } - else if( testMethod == k2DTo2DArray ) - { - name = "2D -> 2D array"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - } - else if( testMethod == k2DArrayTo3D ) - { - name = "2D array -> 3D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k3DTo2DArray ) - { - name = "3D -> 2D array"; - imageType = CL_MEM_OBJECT_IMAGE3D; + case k1D: + name = "1D -> 1D"; + imageType = CL_MEM_OBJECT_IMAGE1D; + break; + case k2D: + name = "2D -> 2D"; + imageType = CL_MEM_OBJECT_IMAGE2D; + break; + case k3D: + name = "3D -> 3D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k1DArray: + name = "1D array -> 1D array"; + imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + break; + case k2DArray: + name = "2D array -> 2D array"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case k2DTo3D: + name = "2D -> 3D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k3DTo2D: + name = "3D -> 2D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k2DArrayTo2D: + name = "2D array -> 2D"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case k2DTo2DArray: + name = "2D -> 2D array"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case k2DArrayTo3D: + name = "2D array -> 3D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k3DTo2DArray: + name = "3D -> 2D array"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; } if(gTestMipmaps) diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp index 759f48d2..126ea0eb 100644 --- a/test_conformance/images/clFillImage/test_loops.cpp +++ b/test_conformance/images/clFillImage/test_loops.cpp @@ -33,35 +33,34 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q cl_mem_object_type imageType; test_func test_fn; - if ( testMethod == k1D ) + switch (testMethod) { - name = "1D Image Fill"; - imageType = CL_MEM_OBJECT_IMAGE1D; - test_fn = &test_fill_image_set_1D; - } - else if ( testMethod == k2D ) - { - name = "2D Image Fill"; - imageType = CL_MEM_OBJECT_IMAGE2D; - test_fn = &test_fill_image_set_2D; - } - else if ( testMethod == k1DArray ) - { - name = "1D Image Array Fill"; - imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; - test_fn = &test_fill_image_set_1D_array; - } - else if ( testMethod == k2DArray ) - { - name = "2D Image Array Fill"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - test_fn = &test_fill_image_set_2D_array; - } - else if ( testMethod == k3D ) - { - name = "3D Image Fill"; - imageType = CL_MEM_OBJECT_IMAGE3D; - test_fn = &test_fill_image_set_3D; + case k1D: + name = "1D Image Fill"; + imageType = CL_MEM_OBJECT_IMAGE1D; + test_fn = &test_fill_image_set_1D; + break; + case k2D: + name = "2D Image Fill"; + imageType = CL_MEM_OBJECT_IMAGE2D; + test_fn = &test_fill_image_set_2D; + break; + case k1DArray: + name = "1D Image Array Fill"; + imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + test_fn = &test_fill_image_set_1D_array; + break; + case k2DArray: + name = "2D Image Array Fill"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + test_fn = &test_fill_image_set_2D_array; + break; + case k3D: + name = "3D Image Fill"; + imageType = CL_MEM_OBJECT_IMAGE3D; + test_fn = &test_fill_image_set_3D; + break; + default: log_error("Unhandled method\n"); return -1; } log_info( "Running %s tests...\n", name ); -- cgit v1.2.3 From 3e8898ffeb1478c96c440a158db2e7d662b26a30 Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Wed, 24 May 2023 16:55:25 +0100 Subject: Deduplicate test_basic int2float/float2int (#1537) Merge int2float,float2int. Signed-off-by: John Kesapides --- test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/test_int2float.cpp | 183 +++++++++++++++--------------- 2 files changed, 91 insertions(+), 94 deletions(-) diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index dde3311d..adf24bd8 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp test_imagedim.cpp test_vloadstore.cpp - test_int2float.cpp test_float2int.cpp + test_int2float.cpp test_createkernelsinprogram.cpp test_hostptr.cpp test_explicit_s2v.cpp diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp index 3a8458c9..c5afc244 100644 --- a/test_conformance/basic/test_int2float.cpp +++ b/test_conformance/basic/test_int2float.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,123 +21,120 @@ #include #include +#include +#include #include "procs.h" -const char *int2float_kernel_code = -"__kernel void test_int2float(__global int *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (float)src[tid];\n" -"\n" -"}\n"; +namespace { +const char *int2float_kernel_code = R"( +__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) +{ + int tid = get_global_id(0); + + dst[tid] = (TYPE_Y)src[tid]; +})"; -int -verify_int2float(cl_int *inptr, cl_float *outptr, int n) +template const char *Type2str() { return ""; } +template <> const char *Type2str() { return "int"; } +template <> const char *Type2str() { return "float"; } + +template void generate_random_inputs(std::vector &v) { - int i; + RandomSeed seed(gRandomSeed); - for (i=0; i bool equal_value(Tx a, Ty b) { - cl_mem streams[2]; - cl_int *input_ptr; - cl_float *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - int err; - int i; - MTdata d; - - input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + return a == (Tx)b; +} - d = init_genrand( gRandomSeed ); - for (i=0; i +int verify_X2Y(std::vector input, std::vector output, + const char *test_name) +{ - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) + if (!std::equal(output.begin(), output.end(), input.begin(), + equal_value)) { - log_error("clWriteArray failed\n"); + log_error("%s test failed\n", test_name); return -1; } - err = create_single_kernel_helper(context, &program, &kernel, 1, &int2float_kernel_code, "test_int2float"); - if (err != CL_SUCCESS) - { - log_error("create_single_kernel_helper failed\n"); - return -1; - } + log_info("%s test passed\n", test_name); + return 0; +} +template +int test_X2Y(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements, const char *test_name) +{ + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - threads[0] = (size_t)num_elements; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } + std::vector input(num_elements); + std::vector output(num_elements); - err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Tx) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Ty) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); - err = verify_int2float(input_ptr, output_ptr, num_elements); + generate_random_inputs(input); - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(output_ptr); + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(Tx) * num_elements, input.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); - return err; -} + std::string build_options; + build_options.append("-DTYPE_X=").append(Type2str()); + build_options.append(" -DTYPE_Y=").append(Type2str()); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &int2float_kernel_code, "test_X2Y", + build_options.c_str()); + test_error(err, "create_single_kernel_helper failed."); + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + test_error(err, "clSetKernelArg failed."); + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(Ty) * num_elements, output.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); + err = verify_X2Y(input, output, test_name); + return err; +} +} +int test_int2float(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_X2Y(device, context, queue, num_elements, + "INT2FLOAT"); +} +int test_float2int(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_X2Y(device, context, queue, num_elements, + "FLOAT2INT"); +} -- cgit v1.2.3 From 4dece20f7db75819eafa35981ca7c01cee70ca4b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Mon, 29 May 2023 15:04:04 +0200 Subject: Added cl_khr_fp16 extension support for test_vector_swizzle from basic (#1729) * Added cl_khr_fp16 extension support for vector_swizzle from basic (issue #142, basic) * Added code review related fix --- test_conformance/basic/test_vector_swizzle.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp index 884bcf36..fdbc8919 100644 --- a/test_conformance/basic/test_vector_swizzle.cpp +++ b/test_conformance/basic/test_vector_swizzle.cpp @@ -22,6 +22,8 @@ #include "procs.h" #include "harness/testHarness.h" +static std::string pragma_extension; + template struct TestInfo { }; @@ -629,7 +631,9 @@ static int test_vectype(const char* type_name, cl_device_id device, clProgramWrapper program; clKernelWrapper kernel; - const char* xyzw_source = TestInfo::kernel_source_xyzw; + std::string program_src = + pragma_extension + std::string(TestInfo::kernel_source_xyzw); + const char* xyzw_source = program_src.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw", buildOptions.c_str()); @@ -643,7 +647,9 @@ static int test_vectype(const char* type_name, cl_device_id device, clProgramWrapper program; clKernelWrapper kernel; - const char* sN_source = TestInfo::kernel_source_sN; + std::string program_src = + pragma_extension + std::string(TestInfo::kernel_source_sN); + const char* sN_source = program_src.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN", buildOptions.c_str()); @@ -660,7 +666,9 @@ static int test_vectype(const char* type_name, cl_device_id device, const Version device_version = get_device_cl_version(device); if (device_version >= Version(3, 0)) { - const char* rgba_source = TestInfo::kernel_source_rgba; + std::string program_src = + pragma_extension + std::string(TestInfo::kernel_source_rgba); + const char* rgba_source = program_src.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &rgba_source, "test_vector_swizzle_rgba", buildOptions.c_str()); @@ -689,6 +697,7 @@ int test_vector_swizzle(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { int hasDouble = is_extension_available(device, "cl_khr_fp64"); + int hasHalf = is_extension_available(device, "cl_khr_fp16"); int result = TEST_PASS; result |= test_type("char", device, context, queue); @@ -703,8 +712,14 @@ int test_vector_swizzle(cl_device_id device, cl_context context, result |= test_type("ulong", device, context, queue); } result |= test_type("float", device, context, queue); + if (hasHalf) + { + pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + result |= test_type("half", device, context, queue); + } if (hasDouble) { + pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; result |= test_type("double", device, context, queue); } return result; -- cgit v1.2.3 From 35b8db35c657c8ed14ba564e2fb65490cf0a2c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 30 May 2023 17:43:58 +0200 Subject: Add check for CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR after completion. (#1740) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add check for CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR after completion. Added check for state CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR in state info tests. Signed-off-by: Paweł Jastrzębski * Add wait for event. Wait for event to guarantee that a command-buffer has finished executing by this point. Signed-off-by: Paweł Jastrzębski * Add new event to wait for. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../command_buffer_get_command_buffer_info.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index 3ce410c0..d46b2888 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -240,9 +240,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest clEventWrapper trigger_event = clCreateUserEvent(context, &error); test_error(error, "clCreateUserEvent failed"); + clEventWrapper execute_event; // enqueued command buffer blocked on user event error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1, - &trigger_event, nullptr); + &trigger_event, &execute_event); test_error(error, "clEnqueueCommandBufferKHR failed"); // verify pending state @@ -255,6 +256,13 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest test_error(signal_error, "clSetUserEventStatus failed"); + error = clWaitForEvents(1, &execute_event); + test_error(error, "Unable to wait for execute event"); + + // verify executable state + error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR); + test_error(error, "verify_state failed"); + return CL_SUCCESS; } -- cgit v1.2.3 From 4cb39b8c140563a5fda7b375ee919f084cd3bc11 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 30 May 2023 17:48:09 +0200 Subject: Added cl_khr_fp16 extension support for test_hiloeo from basic (#1721) * Added cl_khr_fp16 support for hiloeo test from basic (issue #142, basic) * Added correction due to compiler warrning * Cosmetic correction --- test_conformance/basic/test_hiloeo.cpp | 363 +++++++++++---------------------- 1 file changed, 119 insertions(+), 244 deletions(-) diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp index 3470ad00..4e921a6e 100644 --- a/test_conformance/basic/test_hiloeo.cpp +++ b/test_conformance/basic/test_hiloeo.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,14 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - +#include +#include #include #include -#include #include #include - +#include #include "procs.h" @@ -31,9 +30,10 @@ int odd_offset( int index, int vectorSize ) { return index * 2 + 1; } typedef int (*OffsetFunc)( int index, int vectorSize ); static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset }; -typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName ); static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" }; -static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" }; +static const char *test_str_names[] = { "char", "uchar", "short", "ushort", + "int", "uint", "long", "ulong", + "half", "float", "double" }; static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16}; static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16}; @@ -45,43 +45,41 @@ static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4}; // strcat(gentype, vector_size_names[out_vector_idx[i]]); static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"}; -static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 }; +static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 2, 4, 8 }; static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse ); int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { - cl_int *input_ptr, *output_ptr, *p; int err; - cl_uint i; int hasDouble = is_extension_available( device, "cl_khr_fp64" ); + int hasHalf = is_extension_available(device, "cl_khr_fp16"); cl_uint vectorSize, operatorToUse; cl_uint type; - MTdata d; + MTdataHolder d(gRandomSeed); int expressionMode; int numExpressionModes = 2; size_t length = sizeof(cl_int) * 4 * n_elems; - input_ptr = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); + std::vector input_ptr(4 * n_elems); + std::vector output_ptr(4 * n_elems); - p = input_ptr; - d = init_genrand( gRandomSeed ); - for (i=0; i<4 * (cl_uint) n_elems; i++) - p[i] = genrand_int32(d); - free_mtdata(d); d = NULL; + for (cl_uint i = 0; i < 4 * (cl_uint)n_elems; i++) + input_ptr[i] = genrand_int32(d); for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ ) { // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes size_t elementCount = length / kSizes[type]; - cl_mem streams[2]; + clMemWrapper streams[2]; // skip double if unavailable if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" ))) continue; + if (!hasHalf && (0 == strcmp(test_str_names[type], "half"))) continue; + if( !gHasLong && (( 0 == strcmp( test_str_names[type], "long" )) || ( 0 == strcmp( test_str_names[type], "ulong" )))) @@ -104,12 +102,9 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, return -1; } - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, + input_ptr.data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed\n"); for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ ) { @@ -118,8 +113,8 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) { for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) { - cl_program program = NULL; - cl_kernel kernel = NULL; + clProgramWrapper program; + clKernelWrapper kernel; cl_uint outVectorSize = out_vector_idx[vectorSize]; char expression[1024]; @@ -139,92 +134,64 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, "}\n" }; - if(expressionMode == 0) { - sprintf(expression, "srcA[tid]"); - } else if(expressionMode == 1) { - switch(vector_sizes[vectorSize]) { - case 16: - sprintf(expression, - "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))", - test_str_names[type] - ); - break; - case 8: - sprintf(expression, - "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))", - test_str_names[type] - ); - break; - case 4: - sprintf(expression, - "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))", - test_str_names[type] - ); - break; - case 3: - sprintf(expression, - "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))", - test_str_names[type] - ); - break; - case 2: - sprintf(expression, - "((%s2)(srcA[tid].s0, srcA[tid].s1))", - test_str_names[type] - ); - break; - default : - sprintf(expression, "srcA[tid]"); - log_info("Default\n"); - } - } else { - sprintf(expression, "srcA[tid]"); + if (expressionMode == 1 && vector_sizes[vectorSize] != 1) + { + std::ostringstream sstr; + const char *index_chars[] = { "0", "1", "2", "3", + "4", "5", "6", "7", + "8", "9", "A", "B", + "C", "D", "E", "f" }; + sstr << "((" << test_str_names[type] + << std::to_string(vector_sizes[vectorSize]) + << ")("; + for (unsigned i = 0; i < vector_sizes[vectorSize]; i++) + sstr << " srcA[tid].s" << index_chars[i] << ","; + sstr.seekp(-1, sstr.cur); + sstr << "))"; + std::snprintf(expression, sizeof(expression), "%s", + sstr.str().c_str()); + } + else + { + std::snprintf(expression, sizeof(expression), + "srcA[tid]"); } if (0 == strcmp( test_str_names[type], "double" )) source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + if (0 == strcmp(test_str_names[type], "half")) + source[0] = + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + char kernelName[128]; snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] ); err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName ); - if (err) - return -1; + test_error(err, "create_single_kernel_helper failed\n"); err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } + test_error(err, "clSetKernelArg failed\n"); //Wipe the output buffer clean uint32_t pattern = 0xdeadbeef; - memset_pattern4( output_ptr, &pattern, length ); - err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + memset_pattern4(output_ptr.data(), &pattern, length); + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, + length, output_ptr.data(), 0, + NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed\n"); size_t size = elementCount / (vector_aligns[vectorSize]); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } + test_error(err, "clEnqueueNDRangeKernel failed\n"); - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + length, output_ptr.data(), 0, + NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed\n"); - char *inP = (char *)input_ptr; - char *outP = (char *)output_ptr; + char *inP = (char *)input_ptr.data(); + char *outP = (char *)output_ptr.data(); outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) - ( vector_sizes[ out_vector_idx[vectorSize] ] ) ); // was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) ); @@ -240,180 +207,88 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, inP += kSizes[type] * ( vector_aligns[vectorSize] ); outP += kSizes[type] * ( vector_aligns[outVectorSize] ); } - - clReleaseKernel( kernel ); - clReleaseProgram( program ); log_info( "." ); fflush( stdout ); } } } - - clReleaseMemObject( streams[0] ); - clReleaseMemObject( streams[1] ); log_info( "done\n" ); } log_info("HiLoEO test passed\n"); - - free(input_ptr); - free(output_ptr); - return err; } -static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse ) +template +cl_int verify(void *in, void *out, size_t elementCount, int type, + int vectorSize, int operatorToUse, size_t cmpVectorSize) { - cl_ulong array[8]; + size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]]; + size_t elementSize = kSizes[type]; + OffsetFunc f = offsetFuncs[operatorToUse]; + cl_ulong array[8]; void *p = array; - size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]]; - size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]]; - // was 1 << (vectorSize-1); - OffsetFunc f = offsetFuncs[ operatorToUse ]; - size_t elementSize = kSizes[type]; - - if(vector_size_names[vectorSize][0] == '3') { - if(operatorToUse_names[operatorToUse][0] == 'h' || - operatorToUse_names[operatorToUse][0] == 'o') // hi or odd - { - cmpVectorSize = 1; // special case for vec3 ignored values - } - } - switch( elementSize ) - { - case 1: - { - char *i = (char*)in; - char *o = (char*)out; - size_t j; - cl_uint k; - OffsetFunc f = offsetFuncs[ operatorToUse ]; - - for( k = 0; k < elementCount; k++ ) - { - char *o2 = (char*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", %d", i[j] ); - log_info( " } --> { %d", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", %d", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; + std::ostringstream ss; - case 2: - { - short *i = (short*)in; - short *o = (short*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - short *o2 = (short*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", %d", i[j] ); - log_info( " } --> { %d", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", %d", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; + T *i = (T *)in, *o = (T *)out; - case 4: - { - int *i = (int*)in; - int *o = (int*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - int *o2 = (int *)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - for( j = 0; j < cmpVectorSize; j++ ) + for (cl_uint k = 0; k < elementCount; k++) + { + T *o2 = (T *)p; + for (size_t j = 0; j < halfVectorSize; j++) + o2[j] = i[f((int)j, (int)halfVectorSize * 2)]; + + if (memcmp(o, o2, elementSize * cmpVectorSize)) { - /* Allow float nans to be binary different */ - if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j]))) - { - log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", 0x%8.8x", i[j] ); - log_info( " } --> { 0x%8.8x", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", 0x%8.8x", o[j] ); - log_info( " }\n" ); + ss << "\n" + << k << ") Failure for" << test_str_names[type] + << vector_size_names[vectorSize] << '.' + << operatorToUse_names[operatorToUse] << " { " + << "0x" << std::setfill('0') << std::setw(elementSize * 2) + << std::hex << i[0]; + + for (size_t j = 1; j < halfVectorSize * 2; j++) ss << ", " << i[j]; + ss << " } --> { " << o[0]; + for (size_t j = 1; j < halfVectorSize; j++) ss << ", " << o[j]; + ss << " }\n"; return -1; - } } i += 2 * halfVectorSize; o += halfVectorSize; - } - } - break; - - case 8: - { - cl_ulong *i = (cl_ulong*)in; - cl_ulong *o = (cl_ulong*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - cl_ulong *o2 = (cl_ulong*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", 0x%16.16llx", i[j] ); - log_info( " } --> { 0x%16.16llx", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", 0x%16.16llx", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; - - default: - log_info( "Internal error. Unknown data type\n" ); - return -2; } - return 0; } +static int CheckResults(void *in, void *out, size_t elementCount, int type, + int vectorSize, int operatorToUse) +{ + size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]]; + size_t elementSize = kSizes[type]; + if (vector_size_names[vectorSize][0] == '3') + { + if (operatorToUse_names[operatorToUse][0] == 'h' + || operatorToUse_names[operatorToUse][0] == 'o') // hi or odd + { + cmpVectorSize = 1; // special case for vec3 ignored values + } + } + switch (elementSize) + { + case 1: + return verify(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + case 2: + return verify(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + case 4: + return verify(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + case 8: + return verify(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + default: log_info("Internal error. Unknown data type\n"); return -2; + } +} -- cgit v1.2.3 From 969238050574393c0cd2ba28f595e4b078af0b59 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 30 May 2023 09:49:31 -0600 Subject: Semaphore test: Use blocking semaphores (#1675) Semaphore spec has been updated to reflect the fact that semaphores will be in the appropriate - pending signal or pending wait - state when returning from clEnqueueSignalSemaphore or clEnqueueWaitSemaphore commands: KhronosGroup/OpenCL-Docs#882 Deleted the following tests to match the updated spec: semaphores_order_1 - Test calls EnqueueWaitSemaphore before calling EnqueueSignalSemaphore and expects this wait to succeed. This behavior is not compatible with the recent spec updates to semaphores. semaphores_order_2 & semaphores_order_3 - Calling clEnqueueSignalSemaphoresKHR with a dependency on a user event may cause the implementation to block until the user event is complete. This is unsafe usage of clEnqueueSignalSemaphoresKHR and may lead to deadlock. semaphores_invalid_command - This test checks for specific behavior when waiting on a semaphore in an invalid state. According to the spec, this is undefined behavior, and therefore cannot be tested directly. Co-authored-by: Joshua Kelly --- .../extensions/cl_khr_semaphore/main.cpp | 6 +- .../extensions/cl_khr_semaphore/procs.h | 12 +- .../cl_khr_semaphore/test_semaphores.cpp | 403 +-------------------- 3 files changed, 5 insertions(+), 416 deletions(-) diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp index ab9699b0..0ae7206a 100644 --- a/test_conformance/extensions/cl_khr_semaphore/main.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,11 +34,7 @@ test_definition test_list[] = { ADD_TEST_VERSION(semaphores_multi_signal, Version(1, 2)), ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)), ADD_TEST_VERSION(semaphores_queries, Version(1, 2)), - ADD_TEST_VERSION(semaphores_order_1, Version(1, 2)), - ADD_TEST_VERSION(semaphores_order_2, Version(1, 2)), - ADD_TEST_VERSION(semaphores_order_3, Version(1, 2)), ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)), - ADD_TEST_VERSION(semaphores_invalid_command, Version(1, 2)), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h index 06651af4..f7c1aaa3 100644 --- a/test_conformance/extensions/cl_khr_semaphore/procs.h +++ b/test_conformance/extensions/cl_khr_semaphore/procs.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,17 +41,7 @@ extern int test_semaphores_multi_wait(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_semaphores_queries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); -extern int test_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); -extern int test_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); extern int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_semaphores_invalid_command(cl_device_id deviceID, - cl_context context, - cl_command_queue queue, - int num_elements); diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp index 7d03bff3..36bb8ad5 100644 --- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2022 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -646,303 +646,6 @@ int test_semaphores_queries(cl_device_id deviceID, cl_context context, return TEST_PASS; } -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first one deferred wait, then one non deferred signal. -int test_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Wait semaphore (dependency on user_event) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event, - &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr, - &signal_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure signal event is completed while wait event is not - test_assert_event_complete(signal_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then one deferred wait. Unblock -// signal, then unblock wait. When wait completes, unblock the other signal. -int test_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_1, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_2, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3, - &wait_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for second signal - test_assert_event_complete(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_complete(wait_event); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then two deferred waits. Unblock -// one signal and one wait (both blocked by the same user event). When wait -// completes, unblock the other signal. Then unblock the other wait. -int test_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_1, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_2, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_1_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3, - &wait_1_event); - test_error(err, "Could not wait semaphore"); - - // Wait semaphore (dependency on user_event_2) - clEventWrapper wait_2_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_2, - &wait_2_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure only second signal and second wait completed - cl_event event_list[] = { signal_2_event, wait_2_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait for events"); - - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(wait_1_event); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_1_event); - test_assert_event_complete(wait_2_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; -} - // Test it is possible to export a semaphore to a sync fd and import the same // sync fd to a new semaphore int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, @@ -985,6 +688,8 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), static_cast( CL_SEMAPHORE_HANDLE_SYNC_FD_KHR), + static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), 0 }; cl_semaphore_khr sema_1 = @@ -1039,107 +744,5 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, err = clReleaseSemaphoreKHR(sema_2); test_error(err, "Could not release semaphore"); - return TEST_PASS; -} - -// Test that an invalid semaphore command results in the invalidation of the -// command's event and the dependencies' events -int test_semaphores_invalid_command(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphores - cl_semaphore_properties_khr sema_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema_1 = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - cl_semaphore_khr sema_2 = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore_1 (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 1, - &user_event_1, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore_1 and semaphore_2 (dependency on user_event_1) - clEventWrapper wait_event; - cl_semaphore_khr sema_list[] = { sema_1, sema_2 }; - err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1, - &user_event_1, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore_1 (dependency on wait_event and user_event_2) - clEventWrapper signal_2_event; - cl_event wait_list[] = { user_event_2, wait_event }; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 2, wait_list, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are not completed - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event_1 (expect failure as waiting on semaphore_2 is not - // allowed (unsignaled) - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_assert_error(err != CL_SUCCESS, - "signal_2_event completed unexpectedly"); - - // Ensure signal_1 is completed while others failed (the second signal - // should fail as it depends on wait) - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - test_assert_event_complete(signal_1_event); - test_assert_event_terminated(wait_event); - test_assert_event_terminated(signal_2_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema_1); - test_error(err, "Could not release semaphore"); - - err = clReleaseSemaphoreKHR(sema_2); - test_error(err, "Could not release semaphore"); - return TEST_PASS; } \ No newline at end of file -- cgit v1.2.3 From c58ead9aeaa7baaf8bcbec8642e79d3ce1cc1e09 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 30 May 2023 17:52:06 +0200 Subject: Added cl_khr_fp16 extension support for test_astype from basic (#1706) * Added support for cl_khr_fp16 extenstion in test_astype from basic (issue #142, basic) * Added correction to iterate over vector of types * Fixed case with both fp16 and fp64 supported * Cosmetic corrections due to code review * Cosmetic corrections due to code review --- test_conformance/basic/test_astype.cpp | 211 ++++++++++++++------------------- test_conformance/basic/utils.h | 41 +++++++ 2 files changed, 130 insertions(+), 122 deletions(-) create mode 100644 test_conformance/basic/utils.h diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 7281f904..08a4cb85 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -15,61 +15,39 @@ // #include "harness/compat.h" +#include #include #include -#include #include #include +#include - -#include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" +#include "procs.h" +#include "utils.h" -static const char *astype_kernel_pattern = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( src[ tid ] );\n" -" dst[ tid ] = tmp;\n" -"}\n"; - -static const char *astype_kernel_pattern_V3srcV3dst = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the third and fifth argument, each of which -// should be a "3", when copying from the printf for astype_kernel_pattern - -static const char *astype_kernel_pattern_V3dst = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s3 tmp = as_%s3( src[ tid ] );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the fifth argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern +// clang-format off +static char extension[128] = { 0 }; +static char strLoad[128] = { 0 }; +static char strStore[128] = { 0 }; +static const char *regLoad = "as_%s%s(src[tid]);\n"; +static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n"; +static const char *regStore = "dst[tid] = tmp;\n"; +static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n"; -static const char *astype_kernel_pattern_V3src = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n" +static const char* astype_kernel_pattern[] = { +extension, +"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n" "{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" dst[ tid ] = tmp;\n" -"}\n"; -// in the printf, remove the third argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern +" int tid = get_global_id( 0 );\n", +" %s%s tmp = ", strLoad, +" ", strStore, +"}\n"}; +// clang-format on int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType, unsigned int vecSize, unsigned int outVecSize, @@ -81,68 +59,60 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q clKernelWrapper kernel; clMemWrapper streams[ 2 ]; - char programSrc[ 10240 ]; size_t threads[ 1 ], localThreads[ 1 ]; size_t typeSize = get_explicit_type_size( inVecType ); size_t outTypeSize = get_explicit_type_size(outVecType); char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - MTdata d; - - - - // Create program - if(outVecSize == 3 && vecSize == 3) { - // astype_kernel_pattern_V3srcV3dst - sprintf( programSrc, astype_kernel_pattern_V3srcV3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), // sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] ); - } else if(outVecSize == 3) { - // astype_kernel_pattern_V3dst - sprintf( programSrc, astype_kernel_pattern_V3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType )); - - } else if(vecSize == 3) { - // astype_kernel_pattern_V3src - sprintf( programSrc, astype_kernel_pattern_V3src, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ),// sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } else { - sprintf( programSrc, astype_kernel_pattern, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } - - const char *ptr = programSrc; + MTdataHolder d(gRandomSeed); + + std::ostringstream sstr; + if (outVecType == kDouble || inVecType == kDouble) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (outVecType == kHalf || inVecType == kHalf) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + strcpy(extension, sstr.str().c_str()); + + if (vecSize == 3) + std::snprintf(strLoad, sizeof(strLoad), v3Load, + get_explicit_type_name(outVecType), sizeNames[outVecSize], + get_explicit_type_name(inVecType)); + else + std::snprintf(strLoad, sizeof(strLoad), regLoad, + get_explicit_type_name(outVecType), + sizeNames[outVecSize]); + + if (outVecSize == 3) + std::snprintf(strStore, sizeof(strStore), v3Store, + get_explicit_type_name(outVecType)); + else + std::snprintf(strStore, sizeof(strStore), "%s", regStore); + + auto str = + concat_kernel(astype_kernel_pattern, + sizeof(astype_kernel_pattern) / sizeof(const char *)); + std::string kernelSource = + str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize], + get_explicit_type_name(outVecType), sizeNames[outVecSize], + get_explicit_type_name(outVecType), sizeNames[outVecSize]); + + const char *ptr = kernelSource.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); test_error( error, "Unable to create testing kernel" ); - // Create some input values size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize; - char *inBuffer = (char*)malloc( inBufferSize ); + std::vector inBuffer(inBufferSize); size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize; - char *outBuffer = (char*)malloc( outBufferSize ); + std::vector outBuffer(outBufferSize); - d = init_genrand( gRandomSeed ); - generate_random_data( inVecType, numElements * vecSize, - d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(inVecType, numElements * vecSize, d, + &inBuffer.front()); // Create I/O streams and set arguments - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, + &inBuffer.front(), &error); test_error( error, "Unable to create I/O stream" ); streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error ); test_error( error, "Unable to create I/O stream" ); @@ -161,15 +131,15 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); test_error( error, "Unable to run kernel" ); - // Get the results and compare // The beauty is that astype is supposed to return the bit pattern as a different type, which means // the output should have the exact same bit pattern as the input. No interpretation necessary! - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize, + &outBuffer.front(), 0, NULL, NULL); test_error( error, "Unable to read results" ); - char *expected = inBuffer; - char *actual = outBuffer; + char *expected = &inBuffer.front(); + char *actual = &outBuffer.front(); size_t compSize = typeSize*vecSize; if(outTypeSize*outVecSize < compSize) { compSize = outTypeSize*outVecSize; @@ -178,8 +148,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q if(outVecSize == 4 && vecSize == 3) { // as_type4(vec3) should compile but produce undefined results?? - free(inBuffer); - free(outBuffer); return 0; } @@ -188,8 +156,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q // as_typen(vecm) should compile and run but produce // implementation-defined results for m != n // and n*sizeof(type) = sizeof(vecm) - free(inBuffer); - free(outBuffer); return 0; } @@ -203,17 +169,14 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q GetDataVectorString( expected, typeSize, vecSize, expectedString ), GetDataVectorString( actual, typeSize, vecSize, actualString ) ); log_error("Src is :\n%s\n----\n%d threads %d localthreads\n", - programSrc, (int)threads[0],(int) localThreads[0]); - free(inBuffer); - free(outBuffer); + kernelSource.c_str(), (int)threads[0], + (int)localThreads[0]); return 1; } expected += typeSize * vecSize; actual += outTypeSize * outVecSize; } - free(inBuffer); - free(outBuffer); return 0; } @@ -223,31 +186,39 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way // for us to verify what is "valid". So the only thing we can test are types that match in size independent // of the element count (char -> uchar, etc) - ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + const std::vector vecTypes = { kChar, kUChar, kShort, + kUShort, kInt, kUInt, + kLong, kULong, kFloat, + kHalf, kDouble }; + const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx; size_t inTypeSize, outTypeSize; int error = 0; - for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ ) + bool fp16Support = is_extension_available(device, "cl_khr_fp16"); + bool fp64Support = is_extension_available(device, "cl_khr_fp64"); + + auto skip_type = [&](ExplicitType et) { + if ((et == kLong || et == kULong) && !gHasLong) + return true; + else if (et == kDouble && !fp64Support) + return true; + else if (et == kHalf && !fp16Support) + return true; + return false; + }; + + for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++) { inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]); - if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) - continue; - - if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong ) - continue; + if (skip_type(vecTypes[inTypeIdx])) continue; - for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ ) + for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++) { outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]); - if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) { - continue; - } - if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong ) - continue; + if (skip_type(vecTypes[outTypeIdx])) continue; // change this check if( inTypeIdx == outTypeIdx ) { @@ -259,7 +230,6 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) { - for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++) { if(vecSizes[sizeIdx]*inTypeSize != @@ -268,10 +238,7 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, continue; } error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems ); - - } - } if(get_explicit_type_size(vecTypes[inTypeIdx]) == get_explicit_type_size(vecTypes[outTypeIdx])) { diff --git a/test_conformance/basic/utils.h b/test_conformance/basic/utils.h new file mode 100644 index 00000000..3f6bf64d --- /dev/null +++ b/test_conformance/basic/utils.h @@ -0,0 +1,41 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef BASIC_UTILS_H +#define BASIC_UTILS_H + +#include +#include + +inline std::string concat_kernel(const char *sstr[], int num) +{ + std::string res; + for (int i = 0; i < num; i++) res += std::string(sstr[i]); + return res; +} + +template +inline std::string str_sprintf(const std::string &str, Args... args) +{ + int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1; + if (str_size <= 0) throw std::runtime_error("Formatting error."); + size_t s = static_cast(str_size); + std::unique_ptr buffer(new char[s]); + std::snprintf(buffer.get(), s, str.c_str(), args...); + return std::string(buffer.get(), buffer.get() + s - 1); +} + +#endif // BASIC_UTIL_H -- cgit v1.2.3 From b3c1401d482252f7f65110a4bff8721c02a34b72 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 30 May 2023 17:52:27 +0200 Subject: Added cl_khr_fp16 extension support for test_async_strided_copy from basic (issue #142, basic) (#1711) --- test_conformance/basic/test_async_strided_copy.cpp | 87 ++++++++++++---------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp index c456f38d..4a848c0f 100644 --- a/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/basic/test_async_strided_copy.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,15 +20,16 @@ #include #include #include - - +#include #include "procs.h" #include "harness/conversions.h" +// clang-format off + static const char *async_strided_global_to_local_kernel = "%s\n" // optional pragma string -"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" +"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" "{\n" " int i;\n" // Zero the local storage first @@ -46,7 +47,7 @@ static const char *async_strided_global_to_local_kernel = static const char *async_strided_local_to_global_kernel = "%s\n" // optional pragma string -"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" +"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" "{\n" " int i;\n" // Zero the local storage first @@ -63,6 +64,7 @@ static const char *async_strided_local_to_global_kernel = " wait_group_events( 1, &event );\n" "}\n" ; +// clang-format on int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride) { @@ -71,8 +73,7 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu clKernelWrapper kernel; clMemWrapper streams[ 2 ]; size_t threads[ 1 ], localThreads[ 1 ]; - void *inBuffer, *outBuffer; - MTdata d; + MTdataHolder d(gRandomSeed); char vecNameString[64]; vecNameString[0] = 0; if (vecSize == 1) @@ -94,10 +95,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu char programSource[4096]; programSource[0]=0; char *programPtr; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - "", - vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString); + std::string extStr = ""; + if (vecType == kDouble) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"; + else if (vecType == kHalf) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"; + + sprintf(programSource, kernelCode, extStr.c_str(), vecNameString, + vecNameString, vecNameString, vecNameString, + get_explicit_type_name(vecType), vecNameString, vecNameString); //log_info("program: %s\n", programSource); programPtr = programSource; @@ -151,9 +157,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride; size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; - inBuffer = (void*)malloc(globalBufferSize); - outBuffer = (void*)malloc(globalBufferSize); - memset(outBuffer, 0, globalBufferSize); + std::vector inBuffer(globalBufferSize); + std::vector outBuffer(globalBufferSize); + memset(outBuffer.data(), 0, globalBufferSize); cl_int copiesPerWorkItemInt, copiesPerWorkgroup; copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; @@ -165,13 +171,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand( gRandomSeed ); - generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(vecType, + globalBufferSize / get_explicit_type_size(vecType), d, + inBuffer.data()); - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + inBuffer.data(), &error); test_error( error, "Unable to create input buffer" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + outBuffer.data(), &error); test_error( error, "Unable to create output buffer" ); error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); @@ -192,17 +200,20 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu test_error( error, "Unable to queue kernel" ); // Read - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize, + outBuffer.data(), 0, NULL, NULL); test_error( error, "Unable to read results" ); // Verify size_t typeSize = get_explicit_type_size(vecType)* vecSize; for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride) { - if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 ) + if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0) { - unsigned char * inchar = (unsigned char*)inBuffer + i; - unsigned char * outchar = (unsigned char*)outBuffer + i; + unsigned char *inchar = + static_cast(inBuffer.data()); + unsigned char *outchar = + static_cast(outBuffer.data()); char values[4096]; values[0] = 0; @@ -215,34 +226,35 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu sprintf(values + strlen( values), "%2x ", outchar[j]); sprintf(values + strlen(values), "]"); log_error("%s\n", values); - free(inBuffer); - free(outBuffer); return -1; } } - free(inBuffer); - free(outBuffer); - return 0; } int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int strideSizes[] = { 1, 3, 4, 5, 0 }; + const std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + const unsigned int strideSizes[] = { 1, 3, 4, 5, 0 }; unsigned int size, typeIndex, stride; int errors = 0; - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) - { - if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; + bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++) + { if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) continue; + else if (vecType[typeIndex] == kDouble && !fp64Support) + continue; + else if (vecType[typeIndex] == kHalf && !fp16Support) + continue; for( size = 0; vecSizes[ size ] != 0; size++ ) { @@ -260,9 +272,6 @@ int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_co return 0; } - - - int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel ); -- cgit v1.2.3 From 38abfc7d24fdfaf7311810fcc4c5560b019757cf Mon Sep 17 00:00:00 2001 From: Zhaoyu Zhang Date: Tue, 30 May 2023 23:54:32 +0800 Subject: select: using clEnqueueReadBuffer rather than clEnqueueMapBuffer (#1712) * select: using clEnqueueReadBuffer rather than clEnqueueMapBuffer * Update code to be compatible with clang-format * update code again to be compatible with clang-format * update code again to comply with clang-format * updata code again to be compatible with clang-format The clang-format tool is so weird, it's not even consistent between runs... --- test_conformance/select/test_select.cpp | 92 +++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 15 deletions(-) diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 7fa3bc08..b0cda09f 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -303,6 +303,10 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cl_mem dest = NULL; void *ref = NULL; void *sref = NULL; + void *src1_host = NULL; + void *src2_host = NULL; + void *cmp_host = NULL; + void *dest_host = NULL; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; @@ -359,6 +363,30 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; } + src1_host = malloc(BUFFER_SIZE); + if (NULL == src1_host) + { + log_error("Error: could not allocate src1_host buffer\n"); + goto exit; + } + src2_host = malloc(BUFFER_SIZE); + if (NULL == src2_host) + { + log_error("Error: could not allocate src2_host buffer\n"); + goto exit; + } + cmp_host = malloc(BUFFER_SIZE); + if (NULL == cmp_host) + { + log_error("Error: could not allocate cmp_host buffer\n"); + goto exit; + } + dest_host = malloc(BUFFER_SIZE); + if (NULL == dest_host) + { + log_error("Error: could not allocate dest_host buffer\n"); + goto exit; + } // We block the test as we are running over the range of compare values // "block the test" means "break the test into blocks" @@ -387,13 +415,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c // Setup the input data to change for each block initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements); - // Create the reference result - Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1]; - (*sfunc)(ref, s1, s2, s3, block_elements); - - sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1]; - (*sfunc)(sref, s1, s2, s3, block_elements); - if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL ))) { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; } if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL ))) @@ -401,6 +422,40 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL ))) { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; } + // Create the reference result + err = clEnqueueReadBuffer(queue, src1, CL_TRUE, 0, BUFFER_SIZE, + src1_host, 0, NULL, NULL); + if (err) + { + log_error("Error: Reading buffer from src1 to src1_host failed\n"); + ++s_test_fail; + goto exit; + } + err = clEnqueueReadBuffer(queue, src2, CL_TRUE, 0, BUFFER_SIZE, + src2_host, 0, NULL, NULL); + if (err) + { + log_error("Error: Reading buffer from src2 to src2_host failed\n"); + ++s_test_fail; + goto exit; + } + err = clEnqueueReadBuffer(queue, cmp, CL_TRUE, 0, BUFFER_SIZE, cmp_host, + 0, NULL, NULL); + if (err) + { + log_error("Error: Reading buffer from cmp to cmp_host failed\n"); + ++s_test_fail; + goto exit; + } + + Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] + : vrefSelects[stype][1]; + (*sfunc)(ref, src1_host, src2_host, cmp_host, block_elements); + + sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] + : refSelects[stype][1]; + (*sfunc)(sref, src1_host, src2_host, cmp_host, block_elements); + for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { size_t vector_size = element_count[vecsize] * type_size[stype]; @@ -415,7 +470,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) )) { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - // Wipe destination void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; } @@ -429,18 +483,22 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c goto exit; } - d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_READ, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest # 2" ); ++s_test_fail; goto exit; } - - if ((*checkResults[stype])(d, vecsize == 0 ? sref : ref, block_elements, element_count[vecsize])!=0){ - log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i); + err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE, + dest_host, 0, NULL, NULL); + if (err) + { + log_error( + "Error: Reading buffer from dest to dest_host failed\n"); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ) + if ((*checkResults[stype])(dest_host, vecsize == 0 ? sref : ref, + block_elements, element_count[vecsize]) + != 0) { - log_error( "Error: Could not unmap dest" ); + log_error("vec_size:%d indx: 0x%16.16llx\n", + (int)element_count[vecsize], i); ++s_test_fail; goto exit; } @@ -459,6 +517,10 @@ exit: if( dest) clReleaseMemObject( dest ); if( ref ) free(ref ); if( sref ) free(sref ); + if (src1_host) free(src1_host); + if (src2_host) free(src2_host); + if (cmp_host) free(cmp_host); + if (dest_host) free(dest_host); for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) { clReleaseKernel(kernels[vecsize]); -- cgit v1.2.3 From 19f4fc3f3d69bea530182164dd061c7dfda166b0 Mon Sep 17 00:00:00 2001 From: Chris Gearing <109952055+MeyeChris@users.noreply.github.com> Date: Tue, 30 May 2023 18:05:18 +0200 Subject: Make extended_async_copy tests type agnostic (#1619) The latest version of the cl_khr_extended_async_copies extension uses element size rather the element type as its base. The means it can be called with arbitrary and in particular non power of 2 sizes, such as 3 or 13. Update the test_async_copy2D and test_async_copy3D tests to make them element size based rather than type based. As well as this run all tests that can fit into the memory of the target rather than presumed large elements cannot fit. Make some addtional good practice changes in terms of const usage, declaring variables where they are use, and usage of iterators. The test coverage increases from 1224 cases to 1332 cases for the test_async_copy2D and test_async_copy3D cases. Ticket: #1579 Signed-off-by: Chris Gearing Co-authored-by: Chris Gearing --- test_conformance/basic/test_async_copy2D.cpp | 182 +++++++++++------------- test_conformance/basic/test_async_copy3D.cpp | 204 ++++++++++++--------------- 2 files changed, 171 insertions(+), 215 deletions(-) diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp index bf3f1552..11ef84bd 100644 --- a/test_conformance/basic/test_async_copy2D.cpp +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -27,17 +27,25 @@ static const char *async_global_to_local_kernel2D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, - __local %s *localBuffer, int numElementsPerLine, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, + __local VarSizeStruct *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem, int srcStride, int dstStride) { // Zero the local storage first for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } @@ -45,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, // try the copy barrier( CLK_LOCAL_MEM_FENCE ); event_t event = async_work_group_copy_2D2D(localBuffer, 0, src, - lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s), + lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0); // Wait for the copy to complete, then verify by manually copying to the dest @@ -63,16 +71,24 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, static const char *async_local_to_global_kernel2D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem, int srcStride, int dstStride) { // Zero the local storage first for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } @@ -90,36 +106,22 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca // Do this to verify all kernels are done copying to the local buffer before we try the copy barrier(CLK_LOCAL_MEM_FENCE); event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride, - localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, + localBuffer, 0, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 ); wait_group_events(1, &event); }; )OpenCLC"; -int test_copy2D(cl_device_id deviceID, cl_context context, - cl_command_queue queue, const char *kernelCode, - ExplicitType vecType, int vecSize, int srcMargin, int dstMargin, - bool localIsDst) +int test_copy2D(const cl_device_id deviceID, const cl_context context, + const cl_command_queue queue, const char *const kernelCode, + const size_t elementSize, const int srcMargin, + const int dstMargin, const bool localIsDst) { int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - void *inBuffer, *outBuffer, *outBufferCopy; - MTdata d; - char vecNameString[64]; - vecNameString[0] = 0; - if (vecSize == 1) - sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); - else - sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), - vecSize); - size_t elementSize = get_explicit_type_size(vecType) * vecSize; - log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString, - srcMargin, dstMargin); + log_info("Testing %d byte element with srcMargin = %d, dstMargin = %d\n", + elementSize, srcMargin, dstMargin); cl_long max_local_mem_size; error = @@ -139,6 +141,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed."); + cl_long max_work_group_size; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_group_size), &max_work_group_size, + NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed."); + if (max_alloc_size > max_global_mem_size / 2) max_alloc_size = max_global_mem_size / 2; @@ -149,20 +158,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); - char programSource[4096]; - programSource[0] = 0; - char *programPtr; + char programSource[4096] = { 0 }; + const char *programPtr = programSource; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" - : "", - vecNameString, vecNameString, vecNameString, vecNameString, - get_explicit_type_name(vecType), vecNameString); + sprintf(programSource, kernelCode, elementSize); // log_info("program: %s\n", programSource); - programPtr = programSource; + + clProgramWrapper program; + clKernelWrapper kernel; error = create_single_kernel_helper(context, &program, &kernel, 1, - (const char **)&programPtr, "test_fn"); + &programPtr, "test_fn"); test_error(error, "Unable to create testing kernel"); size_t max_workgroup_size; @@ -188,9 +194,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context, const cl_int dstStride = numElementsPerLine + dstMargin; const cl_int srcStride = numElementsPerLine + srcMargin; - elementSize = - get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); - const size_t lineCopiesPerWorkItem = 13; const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize * (localIsDst ? dstStride : srcStride); @@ -208,7 +211,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context, if (maxLocalWorkgroupSize > max_workgroup_size) localWorkgroupSize = max_workgroup_size; - const size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcMargin) / srcStride; const size_t maxTotalLinesOut = @@ -231,9 +233,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context, const size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; - inBuffer = (void *)malloc(inBufferSize); - outBuffer = (void *)malloc(outBufferSize); - outBufferCopy = (void *)malloc(outBufferSize); + if ((localBufferSize / 4) > max_work_group_size) + { + log_info("Skipping due to resource requirements local:%db " + "max_work_group_size:%d\n", + localBufferSize, max_work_group_size); + return 0; + } + + void *const inBuffer = (void *)malloc(inBufferSize); + void *const outBuffer = (void *)malloc(outBufferSize); + void *const outBufferCopy = (void *)malloc(outBufferSize); const cl_int lineCopiesPerWorkItemInt = static_cast(lineCopiesPerWorkItem); @@ -250,18 +260,20 @@ int test_copy2D(cl_device_id deviceID, cl_context context, (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup, lineCopiesPerWorkItemInt); + size_t threads[1], localThreads[1]; + threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand(gRandomSeed); - generate_random_data( - vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); - generate_random_data( - vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + MTdata d = init_genrand(gRandomSeed); + generate_random_data(kChar, inBufferSize, d, inBuffer); + generate_random_data(kChar, outBufferSize, d, outBuffer); free_mtdata(d); d = NULL; memcpy(outBufferCopy, outBuffer, outBufferSize); + clMemWrapper streams[2]; + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error); test_error(error, "Unable to create input buffer"); @@ -301,8 +313,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context, // Verify int failuresPrinted = 0; - // Verify - size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize; i += elementSize) @@ -313,13 +324,12 @@ int test_copy2D(cl_device_id deviceID, cl_context context, int inIdx = i * srcStride + j; int outIdx = i * dstStride + j; if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx, - typeSize) + elementSize) != 0) { unsigned char *inchar = (unsigned char *)inBuffer + inIdx; unsigned char *outchar = (unsigned char *)outBuffer + outIdx; - char values[4096]; - values[0] = 0; + char values[4096] = { 0 }; if (failuresPrinted == 0) { @@ -382,16 +392,14 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, bool localIsDst) { - ExplicitType vecType[] = { - kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, - kULong, kFloat, kDouble, kNumExplicitTypes - }; + const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7, + 8, 13, 16, 32, 47, 64 }; // The margins below represent the number of elements between the end of // one line and the start of the next. The strides are equivalent to the // length of the line plus the chosen margin. - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcMargin, dstMargin; + // These have to be multipliers, because the margin must be a multiple of + // element size. + const unsigned int marginMultipliers[] = { 0, 10, 100 }; int errors = 0; @@ -399,55 +407,27 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, { log_info( "Device does not support extended async copies. Skipping test.\n"); - return 0; } - - for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + else { - if (vecType[typeIndex] == kDouble - && !is_extension_available(deviceID, "cl_khr_fp64")) - continue; - - if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) - && !gHasLong) - continue; - - for (size = 0; vecSizes[size] != 0; size++) + for (const unsigned int elemSize : elemSizes) { - if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] - <= 2) // small type + for (const unsigned int srcMarginMultiplier : marginMultipliers) { - for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcMargin++) + for (const unsigned int dstMarginMultiplier : marginMultipliers) { - for (dstMargin = 0; - dstMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstMargin++) + if (test_copy2D(deviceID, context, queue, kernelCode, + elemSize, srcMarginMultiplier * elemSize, + dstMarginMultiplier * elemSize, localIsDst)) { - if (test_copy2D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], - smallTypesMarginSizes[srcMargin], - smallTypesMarginSizes[dstMargin], - localIsDst)) - { - errors++; - } + errors++; } } } - // not a small type, check only zero stride - else if (test_copy2D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], 0, 0, - localIsDst)) - { - errors++; - } } } - if (errors) return -1; - return 0; + + return errors ? -1 : 0; } int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context, diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp index 5eb41ebc..aa22f3a2 100644 --- a/test_conformance/basic/test_async_copy3D.cpp +++ b/test_conformance/basic/test_async_copy3D.cpp @@ -27,9 +27,14 @@ static const char *async_global_to_local_kernel3D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, int dstLineStride, int srcPlaneStride, int dstPlaneStride ) { @@ -38,7 +43,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca for (int j = 0; j < numLines; j++) { for (int k = 0; k < numElementsPerLine; k++) { const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } } @@ -48,7 +55,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca event_t event = async_work_group_copy_3D3D(localBuffer, 0, src, planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride, - sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, + sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0); @@ -69,9 +76,14 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca static const char *async_local_to_global_kernel3D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, int dstLineStride, int srcPlaneStride, int dstPlaneStride) { @@ -80,7 +92,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca for (int j = 0; j < numLines; j++) { for (int k = 0; k < numElementsPerLine; k++) { const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } } @@ -103,39 +117,26 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca event_t event = async_work_group_copy_3D3D(dst, planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0, - sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup, + sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0); wait_group_events(1, &event); } )OpenCLC"; -int test_copy3D(cl_device_id deviceID, cl_context context, - cl_command_queue queue, const char *kernelCode, - ExplicitType vecType, int vecSize, int srcLineMargin, - int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin, - bool localIsDst) +int test_copy3D(const cl_device_id deviceID, const cl_context context, + const cl_command_queue queue, const char *const kernelCode, + const size_t elementSize, const int srcLineMargin, + const int dstLineMargin, const int srcPlaneMargin, + const int dstPlaneMargin, const bool localIsDst) { int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - void *inBuffer, *outBuffer, *outBufferCopy; - MTdata d; - char vecNameString[64]; - vecNameString[0] = 0; - if (vecSize == 1) - sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); - else - sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), - vecSize); - size_t elementSize = get_explicit_type_size(vecType) * vecSize; - log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, " - "srcPlaneMargin = %d, dstPlaneMargin = %d\n", - vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin, - dstPlaneMargin); + log_info( + "Testing %d byte element with srcLineMargin = %d, dstLineMargin = %d, " + "srcPlaneMargin = %d, dstPlaneMargin = %d\n", + elementSize, srcLineMargin, dstLineMargin, srcPlaneMargin, + dstPlaneMargin); cl_long max_local_mem_size; error = @@ -165,20 +166,16 @@ int test_copy3D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); - char programSource[4096]; - programSource[0] = 0; - char *programPtr; + char programSource[4096] = { 0 }; + const char *programPtr = programSource; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" - : "", - vecNameString, vecNameString, vecNameString, vecNameString, - get_explicit_type_name(vecType), vecNameString, vecNameString); + sprintf(programSource, kernelCode, elementSize); // log_info("program: %s\n", programSource); - programPtr = programSource; + clProgramWrapper program; + clKernelWrapper kernel; error = create_single_kernel_helper(context, &program, &kernel, 1, - (const char **)&programPtr, "test_fn"); + &programPtr, "test_fn"); test_error(error, "Unable to create testing kernel"); size_t max_workgroup_size; @@ -196,6 +193,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + cl_long max_work_group_size; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_group_size), &max_work_group_size, + NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed."); + // Pick the minimum of the device and the kernel if (max_workgroup_size > max_local_workgroup_size[0]) max_workgroup_size = max_local_workgroup_size[0]; @@ -208,8 +212,6 @@ int test_copy3D(cl_device_id deviceID, cl_context context, const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin; const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin; - elementSize = - get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); const size_t planesCopiesPerWorkItem = 2; const size_t localStorageSpacePerWorkitem = elementSize * planesCopiesPerWorkItem @@ -251,9 +253,17 @@ int test_copy3D(cl_device_id deviceID, cl_context context, const size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; - inBuffer = (void *)malloc(inBufferSize); - outBuffer = (void *)malloc(outBufferSize); - outBufferCopy = (void *)malloc(outBufferSize); + if ((localBufferSize / 4) > max_work_group_size) + { + log_info("Skipping due to resource requirements local:%db " + "max_work_group_size:%d\n", + localBufferSize, max_work_group_size); + return 0; + } + + void *const inBuffer = (void *)malloc(inBufferSize); + void *const outBuffer = (void *)malloc(outBufferSize); + void *const outBufferCopy = (void *)malloc(outBufferSize); const cl_int planesCopiesPerWorkItemInt = static_cast(planesCopiesPerWorkItem); @@ -270,18 +280,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context, (int)localBufferSize, (int)inBufferSize, (int)outBufferSize, planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt); + size_t threads[1], localThreads[1]; + threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand(gRandomSeed); - generate_random_data( - vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); - generate_random_data( - vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + MTdata d = init_genrand(gRandomSeed); + generate_random_data(kChar, inBufferSize, d, inBuffer); + generate_random_data(kChar, outBufferSize, d, outBuffer); free_mtdata(d); d = NULL; memcpy(outBufferCopy, outBuffer, outBufferSize); + clMemWrapper streams[2]; + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error); test_error(error, "Unable to create input buffer"); @@ -327,8 +339,7 @@ int test_copy3D(cl_device_id deviceID, cl_context context, // Verify int failuresPrinted = 0; - // Verify - size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize; i += elementSize) @@ -341,14 +352,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context, int inIdx = i * srcPlaneStride + j * srcLineStride + k; int outIdx = i * dstPlaneStride + j * dstLineStride + k; if (memcmp(((char *)inBuffer) + inIdx, - ((char *)outBuffer) + outIdx, typeSize) + ((char *)outBuffer) + outIdx, elementSize) != 0) { unsigned char *inchar = (unsigned char *)inBuffer + inIdx; unsigned char *outchar = (unsigned char *)outBuffer + outIdx; - char values[4096]; - values[0] = 0; + char values[4096] = { 0 }; if (failuresPrinted == 0) { @@ -439,17 +449,14 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, bool localIsDst) { - ExplicitType vecType[] = { - kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, - kULong, kFloat, kDouble, kNumExplicitTypes - }; + const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7, + 8, 13, 16, 32, 47, 64 }; // The margins below represent the number of elements between the end of - // one line or plane and the start of the next. The strides are equivalent - // to the size of the line or plane plus the chosen margin. - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin, - dstPlaneMargin; + // one line and the start of the next. The strides are equivalent to the + // size of the line or plane plus the chosen margin. + // These have to be multipliers, because the margin must be a multiple of + // element size. + const unsigned int marginMultipliers[] = { 0, 10, 100 }; int errors = 0; @@ -457,67 +464,36 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context, { log_info( "Device does not support extended async copies. Skipping test.\n"); - return 0; } - - for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + else { - if (vecType[typeIndex] == kDouble - && !is_extension_available(deviceID, "cl_khr_fp64")) - continue; - - if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) - && !gHasLong) - continue; - - for (size = 0; vecSizes[size] != 0; size++) + for (const unsigned int elemSize : elemSizes) { - if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] - <= 2) // small type + for (const unsigned int srcLineMarginMultiplier : marginMultipliers) { - for (srcLineMargin = 0; - srcLineMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcLineMargin++) + for (const unsigned int dstLineMarginMultiplier : + marginMultipliers) { - for (dstLineMargin = 0; - dstLineMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstLineMargin++) + for (const unsigned int srcPlaneMarginMultiplier : + marginMultipliers) { - for (srcPlaneMargin = 0; - srcPlaneMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcPlaneMargin++) + for (const unsigned int dstPlaneMarginMultiplier : + marginMultipliers) { - for (dstPlaneMargin = 0; - dstPlaneMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstPlaneMargin++) + if (test_copy3D(deviceID, context, queue, + kernelCode, elemSize, + srcLineMarginMultiplier * elemSize, + dstLineMarginMultiplier * elemSize, + srcPlaneMarginMultiplier * elemSize, + dstPlaneMarginMultiplier * elemSize, + localIsDst)) { - if (test_copy3D( - deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], - smallTypesMarginSizes[srcLineMargin], - smallTypesMarginSizes[dstLineMargin], - smallTypesMarginSizes[srcPlaneMargin], - smallTypesMarginSizes[dstPlaneMargin], - localIsDst)) - { - errors++; - } + errors++; } } } } } - // not a small type, check only zero stride - else if (test_copy3D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], 0, 0, 0, 0, - localIsDst)) - { - errors++; - } } } if (errors) return -1; -- cgit v1.2.3 From abd556f7a2ad98d1b08b145b6fd5c5e813fda125 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 31 May 2023 10:02:54 +0100 Subject: relationals: add missing virtual destructor (#1739) `RelationalsFPTest` contains a vector of `RelTestBase` pointers to `RelTestParams` instances, so the base class destructor should be virtual to avoid undefined behaviour. Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1731 Signed-off-by: Sven van Haastregt --- test_conformance/relationals/test_comparisons_fp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h index 7faca1c5..66c62c2d 100644 --- a/test_conformance/relationals/test_comparisons_fp.h +++ b/test_conformance/relationals/test_comparisons_fp.h @@ -32,6 +32,7 @@ template using VerifyFunc = bool (*)(const T &, const T &); struct RelTestBase { explicit RelTestBase(const ExplicitTypes &dt): dataType(dt) {} + virtual ~RelTestBase() = default; ExplicitTypes dataType; }; -- cgit v1.2.3 From ce1754981212e540ad5a5c4645139fca77f89b2b Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 2 Jun 2023 10:57:10 +0100 Subject: cmake: do not suppress -Wsometimes-uninitialized globally (#1741) Fix an instance of this warning in mem_host_flags. Only disable `-Wsometimes-uninitialized` for the SVM test, which does not compile cleanly with this warning enabled. Re-enable the warning for the other tests, so that it can catch any new occurrences. Signed-off-by: Sven van Haastregt --- CMakeLists.txt | 1 - test_conformance/SVM/CMakeLists.txt | 2 ++ test_conformance/mem_host_flags/checker.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fce58d8..8d56b64d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,7 +105,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" add_cxx_flag_if_supported(-Wall) # Suppress warnings that currently trigger on the code base. # This list should shrink over time when warnings are fixed. - add_cxx_flag_if_supported(-Wno-sometimes-uninitialized) add_cxx_flag_if_supported(-Wno-sign-compare) endif() add_cxx_flag_if_supported(-Wno-narrowing) diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt index 2d01a825..efa597d1 100644 --- a/test_conformance/SVM/CMakeLists.txt +++ b/test_conformance/SVM/CMakeLists.txt @@ -17,4 +17,6 @@ set(${MODULE_NAME}_SOURCES test_migrate.cpp ) +set_gnulike_module_compile_flags("-Wno-sometimes-uninitialized") + include(../CMakeCommon.txt) diff --git a/test_conformance/mem_host_flags/checker.h b/test_conformance/mem_host_flags/checker.h index 835f120b..0bb826f4 100644 --- a/test_conformance/mem_host_flags/checker.h +++ b/test_conformance/mem_host_flags/checker.h @@ -219,7 +219,7 @@ cl_int cBuffer_checker::SetupASSubBuffer(cl_mem_flags parent_buffer_flag) err = CL_SUCCESS; } - cl_mem_flags f; + cl_mem_flags f = 0; if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY) f = CL_MEM_HOST_READ_ONLY; else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY) -- cgit v1.2.3 From 63a8cb6b9d3345feec7621c7e0b0a4ca21cf545a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 5 Jun 2023 11:45:25 +0100 Subject: non_uniform_work_group: fix unused-but-set variables (#1733) Remove the unused `nonRemainderGlobalSize` array. Inspect the result of the `clGetDeviceInfo` call. As this fixes all occurrences of this warning, remove the suppression flag from this test. Signed-off-by: Sven van Haastregt --- test_conformance/non_uniform_work_group/CMakeLists.txt | 2 -- .../non_uniform_work_group/TestNonUniformWorkGroup.cpp | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt index f78dd195..30c3a846 100644 --- a/test_conformance/non_uniform_work_group/CMakeLists.txt +++ b/test_conformance/non_uniform_work_group/CMakeLists.txt @@ -10,8 +10,6 @@ set(${MODULE_NAME}_SOURCES tools.cpp ) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") - include(../CMakeCommon.txt) # end of file # diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp index a4a6a744..44781ca8 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp @@ -448,13 +448,8 @@ void TestNonUniformWorkGroup::verifyData (DataContainerAttrib * reference, DataC } void TestNonUniformWorkGroup::calculateExpectedValues () { - size_t nonRemainderGlobalSize[MAX_DIMS]; size_t numberOfPossibleRegions[MAX_DIMS]; - nonRemainderGlobalSize[0] = _globalSize[0] - (_globalSize[0] % _enqueuedLocalSize[0]); - nonRemainderGlobalSize[1] = _globalSize[1] - (_globalSize[1] % _enqueuedLocalSize[1]); - nonRemainderGlobalSize[2] = _globalSize[2] - (_globalSize[2] % _enqueuedLocalSize[2]); - numberOfPossibleRegions[0] = (_globalSize[0]>1)?2:1; numberOfPossibleRegions[1] = (_globalSize[1]>1)?2:1; numberOfPossibleRegions[2] = (_globalSize[2]>1)?2:1; @@ -502,6 +497,11 @@ size_t TestNonUniformWorkGroup::getMaxLocalWorkgroupSize (const cl_device_id &de if (TestNonUniformWorkGroup::_maxLocalWorkgroupSize == 0) { err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(TestNonUniformWorkGroup::_maxLocalWorkgroupSize), &TestNonUniformWorkGroup::_maxLocalWorkgroupSize, NULL); + if (err) + { + log_error("clGetDeviceInfo failed\n"); + return 0; + } } return TestNonUniformWorkGroup::_maxLocalWorkgroupSize; -- cgit v1.2.3 From c467391680ff31eee1853b7c02b42669a98ee4b6 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 6 Jun 2023 17:46:56 +0200 Subject: Added support for cl_khr_fp16 extension in test_async_copy from basic (issue #142, basic) (#1707) --- test_conformance/basic/test_async_copy.cpp | 69 +++++++++++++++++------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/test_conformance/basic/test_async_copy.cpp b/test_conformance/basic/test_async_copy.cpp index a537c8fe..bb529bce 100644 --- a/test_conformance/basic/test_async_copy.cpp +++ b/test_conformance/basic/test_async_copy.cpp @@ -20,8 +20,7 @@ #include #include #include - - +#include #include "procs.h" #include "harness/conversions.h" @@ -86,8 +85,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, clKernelWrapper kernel; clMemWrapper streams[ 2 ]; size_t threads[ 1 ], localThreads[ 1 ]; - void *inBuffer, *outBuffer; - MTdata d; + MTdataHolder d(gRandomSeed); char vecNameString[64]; vecNameString[0] = 0; if (vecSize == 1) sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); @@ -109,9 +107,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, char programSource[4096]; programSource[0]=0; char *programPtr; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString); + std::string extStr = ""; + if (vecType == kDouble) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"; + else if (vecType == kHalf) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"; + + sprintf(programSource, kernelCode, extStr.c_str(), vecNameString, + vecNameString, vecNameString, vecNameString, + get_explicit_type_name(vecType), vecNameString, vecNameString); //log_info("program: %s\n", programSource); programPtr = programSource; @@ -150,9 +154,10 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize; size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; - inBuffer = (void*)malloc(globalBufferSize); - outBuffer = (void*)malloc(globalBufferSize); - memset(outBuffer, 0, globalBufferSize); + std::vector inBuffer(globalBufferSize); + std::vector outBuffer(globalBufferSize); + + outBuffer.assign(globalBufferSize, 0); cl_int copiesPerWorkItemInt, copiesPerWorkgroup; copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; @@ -164,13 +169,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand( gRandomSeed ); - generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(vecType, + globalBufferSize / get_explicit_type_size(vecType), d, + &inBuffer.front()); - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + &inBuffer.front(), &error); test_error( error, "Unable to create input buffer" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + &outBuffer.front(), &error); test_error( error, "Unable to create output buffer" ); error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); @@ -189,16 +196,18 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, test_error( error, "Unable to queue kernel" ); // Read - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize, + &outBuffer.front(), 0, NULL, NULL); test_error( error, "Unable to read results" ); // Verify int failuresPrinted = 0; - if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 ) + if (memcmp(&inBuffer.front(), &outBuffer.front(), globalBufferSize) != 0) { size_t typeSize = get_explicit_type_size(vecType)* vecSize; - unsigned char * inchar = (unsigned char*)inBuffer; - unsigned char * outchar = (unsigned char*)outBuffer; + unsigned char *inchar = static_cast(&inBuffer.front()); + unsigned char *outchar = + static_cast(&outBuffer.front()); for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) { if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 ) { @@ -226,26 +235,29 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, } } - free(inBuffer); - free(outBuffer); - return failuresPrinted ? -1 : 0; } int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + const std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int size, typeIndex; int errors = 0; - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) - { - if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; + bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++) + { if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) continue; + else if (vecType[typeIndex] == kDouble && !fp64Support) + continue; + else if (vecType[typeIndex] == kHalf && !fp16Support) + continue; for( size = 0; vecSizes[ size ] != 0; size++ ) { @@ -259,9 +271,6 @@ int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_qu return 0; } - - - int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel ); -- cgit v1.2.3 From b843b3bd8c064f0f3d966eaf17809dd9ea03b3a6 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 7 Jun 2023 10:51:38 +0100 Subject: relationals: fix missing includes (#1753) With GCC 13 some headers are no longer included transitively through C++ Standard Library headers. Signed-off-by: Sven van Haastregt --- test_conformance/relationals/test_comparisons_fp.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index 580b7422..c3d8f67a 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -14,6 +14,8 @@ // limitations under the License. // +#include +#include #include #include #include -- cgit v1.2.3 From 658a8b62fbfd01e15fcd45a85c0e48565addf3f6 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 7 Jun 2023 15:07:25 +0100 Subject: [CI] Build Vulkan loader with USE_GAS=ON (#1756) https://github.com/KhronosGroup/Vulkan-Loader/pull/1212 broke builds that set `USE_GAS=OFF`. Signed-off-by: Sven van Haastregt --- presubmit.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/presubmit.sh b/presubmit.sh index 605c10b0..10354abf 100755 --- a/presubmit.sh +++ b/presubmit.sh @@ -77,7 +77,6 @@ cmake .. -G Ninja \ -DBUILD_WSI_XLIB_SUPPORT=OFF \ -DBUILD_WSI_XCB_SUPPORT=OFF \ -DBUILD_WSI_WAYLAND_SUPPORT=OFF \ - -DUSE_GAS=OFF \ -C helper.cmake .. cmake --build . -j2 -- cgit v1.2.3 From 1011f8ea815fbfc0bcc3333490b583c977f36787 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Wed, 7 Jun 2023 16:53:12 +0200 Subject: fix async strided test outputing error during verify (#1754) This bug was introduced by https://github.com/KhronosGroup/OpenCL-CTS/pull/1711 Ref google/clspv#1127 --- test_conformance/basic/test_async_strided_copy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp index 4a848c0f..932e9b8c 100644 --- a/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/basic/test_async_strided_copy.cpp @@ -211,9 +211,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0) { unsigned char *inchar = - static_cast(inBuffer.data()); + static_cast(&inBuffer.at(i)); unsigned char *outchar = - static_cast(outBuffer.data()); + static_cast(&outBuffer.at(i)); char values[4096]; values[0] = 0; -- cgit v1.2.3 From c8061ab21ad2f3d6a0e058a56c98b5bb968acf41 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 8 Jun 2023 13:27:20 +0100 Subject: mem_host_flags: use size_t for element count (#1755) More recent GCC versions (e.g. 12.2, 13.1) report that the argument to `new[]` in the `Init` methods exceeds the maximum object size, seemingly related to the negative range of the widened `int`. Use an unsigned type to avoid the warning and propagate the signedness change to other uses of the `num_elements` member. Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1582 Signed-off-by: Sven van Haastregt --- .../mem_host_flags/C_host_memory_block.h | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h index 78692d17..0784c2c2 100644 --- a/test_conformance/mem_host_flags/C_host_memory_block.h +++ b/test_conformance/mem_host_flags/C_host_memory_block.h @@ -24,14 +24,14 @@ template class C_host_memory_block { public: - int num_elements; + size_t num_elements; int element_size; T *pData; C_host_memory_block(); ~C_host_memory_block(); - void Init(int num_elem, T &value); - void Init(int num_elem); + void Init(size_t num_elem, T &value); + void Init(size_t num_elem); void Set_to(T &val); void Set_to_zero(); bool Equal_to(T &val); @@ -40,7 +40,7 @@ public: bool Equal_rect(C_host_memory_block &another, size_t *host_origin, size_t *region, size_t host_row_pitch, size_t host_slice_pitch); - bool Equal(T *pData, int num_elements); + bool Equal(T *pData, size_t num_elements); bool Equal_rect_from_orig(C_host_memory_block &another, size_t *soffset, size_t *region, size_t host_row_pitch, @@ -63,20 +63,20 @@ template C_host_memory_block::~C_host_memory_block() num_elements = 0; } -template void C_host_memory_block::Init(int num_elem, T &value) +template void C_host_memory_block::Init(size_t num_elem, T &value) { if (pData != NULL) delete[] pData; pData = new T[num_elem]; - for (int i = 0; i < num_elem; i++) pData[i] = value; + for (size_t i = 0; i < num_elem; i++) pData[i] = value; num_elements = num_elem; } -template void C_host_memory_block::Init(int num_elem) +template void C_host_memory_block::Init(size_t num_elem) { if (pData != NULL) delete[] pData; pData = new T[num_elem]; - for (int i = 0; i < num_elem; i++) pData[i] = (T)i; + for (size_t i = 0; i < num_elem; i++) pData[i] = (T)i; num_elements = num_elem; } @@ -88,14 +88,14 @@ template void C_host_memory_block::Set_to_zero() template void C_host_memory_block::Set_to(T &val) { - for (int i = 0; i < num_elements; i++) pData[i] = val; + for (size_t i = 0; i < num_elements; i++) pData[i] = val; } template bool C_host_memory_block::Equal_to(T &val) { - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == val) count++; } @@ -106,9 +106,9 @@ template bool C_host_memory_block::Equal_to(T &val) template bool C_host_memory_block::Equal(C_host_memory_block &another) { - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == another.pData[i]) count++; } @@ -117,13 +117,13 @@ bool C_host_memory_block::Equal(C_host_memory_block &another) } template -bool C_host_memory_block::Equal(T *pIn_Data, int Innum_elements) +bool C_host_memory_block::Equal(T *pIn_Data, size_t Innum_elements) { if (this->num_elements != Innum_elements) return false; - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == pIn_Data[i]) count++; } @@ -134,7 +134,7 @@ bool C_host_memory_block::Equal(T *pIn_Data, int Innum_elements) template size_t C_host_memory_block::Count(T &val) { size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == val) count++; } -- cgit v1.2.3 From 475a37abbfa22a55fe47bf76d5c7904b3a37730a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 9 Jun 2023 11:25:20 +0100 Subject: [NFC] Do not use reserved names for include guards (#1737) Names that begin with an underscore followed by an uppercase letter are reserved for the C++ implementation. Signed-off-by: Sven van Haastregt --- test_common/harness/compat.h | 6 +++--- test_common/harness/crc32.h | 4 ++-- test_conformance/c11_atomics/common.h | 6 +++--- test_conformance/c11_atomics/host_atomics.h | 6 +++--- test_conformance/d3d10/harness.h | 4 ++-- .../extensions/cl_khr_command_buffer/basic_command_buffer.h | 6 +++--- .../cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h | 6 +++--- .../cl_khr_command_buffer_mutable_dispatch/procs.h | 6 +++--- .../extensions/cl_khr_command_buffer/command_buffer_test_base.h | 6 +++--- test_conformance/extensions/cl_khr_command_buffer/procs.h | 6 +++--- test_conformance/extensions/cl_khr_external_semaphore/procs.h | 6 +++--- .../images/kernel_read_write/test_cl_ext_image_buffer.hpp | 6 +++--- test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h | 7 +++---- test_conformance/non_uniform_work_group/tools.h | 6 +++--- test_conformance/pipes/kernels.h | 6 +++--- test_conformance/relationals/test_comparisons_fp.h | 6 +++--- 16 files changed, 46 insertions(+), 47 deletions(-) diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index 4053b7ee..a42f2917 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _COMPAT_H_ -#define _COMPAT_H_ +#ifndef COMPAT_H_ +#define COMPAT_H_ #if defined(_WIN32) && defined(_MSC_VER) #include @@ -398,4 +398,4 @@ EXTERN_C int __builtin_clz(unsigned int pattern); #define sleep(sec) Sleep((sec)*1000) #endif -#endif // _COMPAT_H_ +#endif // COMPAT_H_ diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h index 65ca15ee..69587011 100644 --- a/test_common/harness/crc32.h +++ b/test_common/harness/crc32.h @@ -15,8 +15,8 @@ Agreement or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient. ******************************************************************/ -#ifndef _CRC32_H_ -#define _CRC32_H_ +#ifndef CRC32_H_ +#define CRC32_H_ #include #include diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 6c7d0b12..37c37e87 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _COMMON_H_ -#define _COMMON_H_ +#ifndef COMMON_H_ +#define COMMON_H_ #include "harness/testHarness.h" #include "harness/typeWrappers.h" @@ -1567,4 +1567,4 @@ int CBasicTest::ExecuteSingleTest( return 0; } -#endif //_COMMON_H_ +#endif // COMMON_H_ diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 6c4e783a..b865970f 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _HOST_ATOMICS_H_ -#define _HOST_ATOMICS_H_ +#ifndef HOST_ATOMICS_H_ +#define HOST_ATOMICS_H_ #include "harness/testHarness.h" @@ -247,4 +247,4 @@ CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingTyp bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); -#endif //_HOST_ATOMICS_H_ +#endif // HOST_ATOMICS_H_ diff --git a/test_conformance/d3d10/harness.h b/test_conformance/d3d10/harness.h index 184e52cb..afeb4966 100644 --- a/test_conformance/d3d10/harness.h +++ b/test_conformance/d3d10/harness.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _HARNESS_H_ -#define _HARNESS_H_ +#ifndef HARNESS_H_ +#define HARNESS_H_ #define _CRT_SECURE_NO_WARNINGS diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index b1d36024..44f4cc63 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_BASIC_COMMAND_BUFFER_H -#define _CL_KHR_BASIC_COMMAND_BUFFER_H +#ifndef CL_KHR_BASIC_COMMAND_BUFFER_H +#define CL_KHR_BASIC_COMMAND_BUFFER_H #include "command_buffer_test_base.h" #include "harness/typeWrappers.h" @@ -99,4 +99,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return TEST_PASS; } -#endif // _CL_KHR_BASIC_COMMAND_BUFFER_H +#endif // CL_KHR_BASIC_COMMAND_BUFFER_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 9056a00d..96669583 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_MUTABLE_COMMAND_BASIC_H -#define _CL_KHR_MUTABLE_COMMAND_BASIC_H +#ifndef CL_KHR_MUTABLE_COMMAND_BASIC_H +#define CL_KHR_MUTABLE_COMMAND_BASIC_H #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" @@ -104,4 +104,4 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest const size_t global_work_size = 4 * sizeof(cl_int); }; -#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H \ No newline at end of file +#endif // CL_KHR_MUTABLE_COMMAND_BASIC_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 08512cae..4b6dacb6 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H -#define _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +#ifndef CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H #include @@ -59,4 +59,4 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/ +#endif // CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h index 0fd2e4ec..48abe25d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H -#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#ifndef CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#define CL_KHR_COMMAND_BUFFER_TEST_BASE_H #include #include "harness/deviceInfo.h" @@ -174,4 +174,4 @@ public: } -#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#endif // CL_KHR_COMMAND_BUFFER_TEST_BASE_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 63e004a7..53a7d934 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H -#define _CL_KHR_COMMAND_BUFFER_PROCS_H +#ifndef CL_KHR_COMMAND_BUFFER_PROCS_H +#define CL_KHR_COMMAND_BUFFER_PROCS_H #include @@ -131,4 +131,4 @@ extern int test_event_info_reference_count(cl_device_id device, cl_command_queue queue, int num_elements); -#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/ +#endif // CL_KHR_COMMAND_BUFFER_PROCS_H diff --git a/test_conformance/extensions/cl_khr_external_semaphore/procs.h b/test_conformance/extensions/cl_khr_external_semaphore/procs.h index 753c8fe2..7e1c4caf 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/procs.h +++ b/test_conformance/extensions/cl_khr_external_semaphore/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H -#define _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H +#ifndef CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H +#define CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H #include @@ -79,4 +79,4 @@ extern int test_external_semaphores_invalid_command(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -#endif /* CL_KHR_EXTERNAL_SEMAPHORE */ +#endif // CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp index c6646330..56d15808 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef _TEST_CL_EXT_IMAGE_BUFFER -#define _TEST_CL_EXT_IMAGE_BUFFER +#ifndef TEST_CL_EXT_IMAGE_BUFFER +#define TEST_CL_EXT_IMAGE_BUFFER #define TEST_IMAGE_SIZE 20 @@ -121,4 +121,4 @@ static inline void image_desc_init(cl_image_desc* desc, } } -#endif /* _TEST_CL_EXT_IMAGE_BUFFER */ \ No newline at end of file +#endif // TEST_CL_EXT_IMAGE_BUFFER diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h index 414d1004..f5846061 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _TESTNONUNIFORMWORKGROUP_H -#define _TESTNONUNIFORMWORKGROUP_H +#ifndef TESTNONUNIFORMWORKGROUP_H +#define TESTNONUNIFORMWORKGROUP_H #include "procs.h" #include @@ -147,5 +147,4 @@ private: unsigned int _overallCounter; }; -#endif // _TESTNONUNIFORMWORKGROUP_H - +#endif // TESTNONUNIFORMWORKGROUP_H diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h index 2e63c3dd..ba01fc99 100644 --- a/test_conformance/non_uniform_work_group/tools.h +++ b/test_conformance/non_uniform_work_group/tools.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _TOOLS_H -#define _TOOLS_H +#ifndef TOOLS_H +#define TOOLS_H #include "procs.h" #include @@ -106,4 +106,4 @@ namespace Error { }; } -#endif // _TOOLS_H +#endif // TOOLS_H diff --git a/test_conformance/pipes/kernels.h b/test_conformance/pipes/kernels.h index a2fb70c0..a897e5e8 100644 --- a/test_conformance/pipes/kernels.h +++ b/test_conformance/pipes/kernels.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _KERNELS_H_ -#define _KERNELS_H_ +#ifndef KERNELS_H_ +#define KERNELS_H_ static const char* pipe_readwrite_struct_kernel_code = { "typedef struct{\n" @@ -127,4 +127,4 @@ static const char* pipe_convenience_readwrite_struct_kernel_code = { " read_pipe(in_pipe, &dst[gid]);\n" "}\n" }; -#endif //_KERNELS_H_ +#endif // KERNELS_H_ diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h index 66c62c2d..3401163e 100644 --- a/test_conformance/relationals/test_comparisons_fp.h +++ b/test_conformance/relationals/test_comparisons_fp.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef _TEST_COMPARISONS_FP_H -#define _TEST_COMPARISONS_FP_H +#ifndef TEST_COMPARISONS_FP_H +#define TEST_COMPARISONS_FP_H #include #include @@ -225,4 +225,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return TEST_PASS; } -#endif // _TEST_COMPARISONS_FP_H +#endif // TEST_COMPARISONS_FP_H -- cgit v1.2.3 From 095091bc5755fb3a239f049a6a8ade1d82169fc6 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 08:39:22 +0200 Subject: Added cl_khr_fp16 extension support for test_vec_type_hint from basic (#1724) * Added cl_khr_fp16 extension support for test_vec_type_hint from basic (issue #142, basic) * Added correction to fix casting problem --- test_conformance/basic/test_vec_type_hint.cpp | 152 ++++++++++++++------------ 1 file changed, 85 insertions(+), 67 deletions(-) diff --git a/test_conformance/basic/test_vec_type_hint.cpp b/test_conformance/basic/test_vec_type_hint.cpp index 33168b13..0ba105db 100644 --- a/test_conformance/basic/test_vec_type_hint.cpp +++ b/test_conformance/basic/test_vec_type_hint.cpp @@ -13,28 +13,27 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include #include - +#include #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" - static const char *sample_kernel = { - "%s\n" // optional pragma string - "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = src[tid];\n" - "\n" - "}\n" + "%s\n" + "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global " + "int *src, __global int *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = src[tid];\n" + "\n" + "}\n" }; int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) @@ -42,66 +41,85 @@ int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_que int error; int vec_type_index, vec_size_index; - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - const char *size_names[] = {"", "2", "4", "8", "16"}; - char *program_source; - - program_source = (char*)malloc(sizeof(char)*4096); + ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, + kLong, kULong, kFloat, kHalf, kDouble }; + const char *size_names[] = { "", "2", "4", "8", "16" }; + std::vector program_source(4096); + + for (vec_type_index = 0; + vec_type_index < sizeof(vecType) / sizeof(vecType[0]); vec_type_index++) + { + + if (vecType[vec_type_index] == kHalf + && !is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + continue; + } + else if (vecType[vec_type_index] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info( + "Extension cl_khr_fp64 not supported; skipping double tests.\n"); + continue; + } + else if ((vecType[vec_type_index] == kLong + || vecType[vec_type_index] == kULong) + && !gHasLong) + { + log_info( + "Extension cl_khr_int64 not supported; skipping long tests.\n"); + continue; + } - for (vec_type_index=0; vec_type_index<10; vec_type_index++) { - if (vecType[vec_type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); - continue; + for (vec_size_index = 0; vec_size_index < 5; vec_size_index++) + { + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper in, out; + size_t global[] = { 1, 1, 1 }; + + log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + char extension[128] = { 0 }; + if (vecType[vec_type_index] == kDouble) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); + else if (vecType[vec_type_index] == kHalf) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + + sprintf(program_source.data(), sample_kernel, extension, + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + + const char *src = &program_source.front(); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &src, "sample_test"); + test_error(error, "create_single_kernel_helper failed"); + + in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + + error = clSetKernelArg(kernel, 0, sizeof(in), &in); + test_error(error, "clSetKernelArg failed"); + error = clSetKernelArg(kernel, 1, sizeof(out), &out); + test_error(error, "clSetKernelArg failed"); + + error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, + 0, NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); } - log_info("Testing doubles.\n"); - } - - if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong) - { - if (!gHasLong) - { - log_info("Extension cl_khr_int64 not supported; skipping long tests.\n"); - continue; - } - } - - for (vec_size_index=0; vec_size_index<5; vec_size_index++) { - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper in, out; - size_t global[] = {1,1,1}; - - log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - program_source[0] = '\0'; - sprintf(program_source, sample_kernel, - (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" ); - if( error != 0 ) - return error; - - in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - error = clSetKernelArg(kernel, 0, sizeof(in), &in); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 1, sizeof(out), &out); - test_error(error, "clSetKernelArg failed"); - - error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - } } - free(program_source); - return 0; } -- cgit v1.2.3 From 16a75dc0af2e0c55d27a91ffefd0aa1b97b3f484 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 17:41:39 +0200 Subject: Added cl_khr_fp16 extension support for test_vector_creation from basic (#1728) * Added cl_khr_fp16 extension support for vector_creation test from basic * Added corrections related to vendor's review * Added protection to avoid similar creation cases * Added comment for recent correction * cosmetics * Corrected factor array to restore lost capacity of original test.. leaving only 16-sizes vector tests limited. --- test_conformance/basic/test_vector_creation.cpp | 489 ++++++++++++++---------- 1 file changed, 294 insertions(+), 195 deletions(-) diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index d9530b4e..801c72b1 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,48 +17,41 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include - - +#include #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. -#define MAX_CODE_SIZE (1024*32) - -const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1}; -const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"}; - -// Creates a kernel by enumerating all possible ways of building the vector out of vloads -// skip_to_results will skip results up to a given number. If the amount of code generated -// is greater than MAX_CODE_SIZE, this function will return the number of results used, -// which can then be used as the skip_to_result value to continue where it left off. -int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) { +#define MAX_CODE_SIZE (1024 * 32) + +static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 }; +static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 }; +static const char *size_names[] = { "", "2", "3", "4", "8", + "16", "!!a", "!!b", "!!c", "!!d" }; +static char extension[128] = { 0 }; + +// Creates a kernel by enumerating all possible ways of building the vector out +// of vloads skip_to_results will skip results up to a given number. If the +// amount of code generated is greater than MAX_CODE_SIZE, this function will +// return the number of results used, which can then be used as the +// skip_to_result value to continue where it left off. +int create_kernel(ExplicitType type, int output_size, char *program, + int *number_of_results, int skip_to_result) +{ int number_of_sizes; - switch (output_size) { - case 1: - number_of_sizes = 1; - break; - case 2: - number_of_sizes = 2; - break; - case 3: - number_of_sizes = 3; - break; - case 4: - number_of_sizes = 4; - break; - case 8: - number_of_sizes = 5; - break; - case 16: - number_of_sizes = 6; - break; - default: - log_error("Invalid size: %d\n", output_size); - return -1; + switch (output_size) + { + case 1: number_of_sizes = 1; break; + case 2: number_of_sizes = 2; break; + case 3: number_of_sizes = 3; break; + case 4: number_of_sizes = 4; break; + case 8: number_of_sizes = 5; break; + case 16: number_of_sizes = 6; break; + default: log_error("Invalid size: %d\n", output_size); return -1; } int total_results = 0; @@ -67,102 +60,125 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number int total_program_length = 0; int aborted_due_to_size = 0; - if (skip_to_result < 0) - skip_to_result = 0; + if (skip_to_result < 0) skip_to_result = 0; // The line of code for the vector creation char line[1024]; - // Keep track of what size vector we are using in each position so we can iterate through all fo them + // Keep track of what size vector we are using in each position so we can + // iterate through all fo them int pos[DEPTH]; int max_size = output_size; if (DEBUG > 1) log_info("max_size: %d\n", max_size); program[0] = '\0'; - sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n", - type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]); + sprintf(program, + "%s\n__kernel void test_vector_creation(__global %s *src, __global " + "%s%s *result) {\n", + extension, get_explicit_type_name(type), + get_explicit_type_name(type), + (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]); total_program_length += (int)strlen(program); - char storePrefix[ 128 ], storeSuffix[ 128 ]; + char storePrefix[128], storeSuffix[128]; - // Start out trying sizes 1,1,1,1,1... - for (int i=0; i 1) { + while (!done) + { + if (DEBUG > 1) + { log_info("pos size[] = ["); - for (int k=0; k 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); + if (DEBUG > 1) + log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); - // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations + // If they did not fit the required size exactly it is too long, so + // there is no point in checking any other combinations // of the sizes to the right. Prune them from the search. - if (size_so_far != max_size) { + if (size_so_far != max_size) + { // Zero all the sizes to the right - for (int k=vloads+1; k=0; d--) { + for (int d = vloads; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over then we are done done = 1; break; } - } else { + } + else + { break; } } - // Go on to the next size since this one (and all others "under" it) didn't fit + // Go on to the next size since this one (and all others "under" it) + // didn't fit continue; } // Generate the actual load line if we are building this part - line[0]= '\0'; - if (skip_to_result == 0 || total_results >= skip_to_result) { - if( number_of_sizes == 3 ) + line[0] = '\0'; + if (skip_to_result == 0 || total_results >= skip_to_result) + { + if (number_of_sizes == 3) { - sprintf( storePrefix, "vstore3( " ); - sprintf( storeSuffix, ", %d, result )", current_result ); + sprintf(storePrefix, "vstore3( "); + sprintf(storeSuffix, ", %d, result )", current_result); } else { - sprintf( storePrefix, "result[%d] = ", current_result ); - storeSuffix[ 0 ] = 0; + sprintf(storePrefix, "result[%d] = ", current_result); + storeSuffix[0] = 0; } - sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size); + sprintf(line, "\t%s(%s%d)(", storePrefix, + get_explicit_type_name(type), output_size); current_result++; int offset = 0; - for (int i=0; i MAX_CODE_SIZE) { + if (total_program_length > MAX_CODE_SIZE) + { aborted_due_to_size = 1; done = 1; } @@ -179,132 +196,194 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number if (DEBUG) log_info("line is: %s", line); - // If we did not use all of them, then we ignore any changes further to the right. - // We do this by causing those loops to skip on the next iteration. - if (vloads < DEPTH) { + // If we did not use all of them, then we ignore any changes further to + // the right. We do this by causing those loops to skip on the next + // iteration. + if (vloads < DEPTH) + { if (DEBUG > 1) log_info("done with this depth\n"); - for (int k=vloads; k=0; d--) { + for (int d = DEPTH - 1; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over at the far-left then we are done done = 1; break; } - } else { + } + else + { break; } } - if (done) - break; + if (done) break; // Continue until we are done. } - strcat(program, "}\n\n"); //log_info("%s\n", program); + strcat(program, "}\n\n"); // log_info("%s\n", program); total_program_length += 3; - if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n", - get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads); + if (DEBUG) + log_info( + "\t\t(Program for vector type %s%s contains %d vector creations, " + "of total program length %gkB, with a total of %d vloads.)\n", + get_explicit_type_name(type), size_names[number_of_sizes - 1], + total_results, total_program_length / 1024.0, total_vloads); *number_of_results = current_result; - if (aborted_due_to_size) - return total_results; + if (aborted_due_to_size) return total_results; return 0; } - - -int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_vector_creation(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16}; + const std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + // should be in sync with global array size_names + const std::vector vecSizes = { 1, 2, 3, 4, 8, 16 }; - char *program_source; - int error; + int error = CL_SUCCESS; int total_errors = 0; + int number_of_results = 0; - cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - void *input_data_converted; - void *output_data; - - int number_of_results;; - - input_data_converted = malloc(sizeof(cl_double)*16); - program_source = (char*)malloc(sizeof(char)*1024*1024*4); + std::vector input_data_converted(sizeof(cl_double) * 16); + std::vector program_source(sizeof(char) * 1024 * 1024 * 4); + std::vector output_data; // Iterate over all the types - for (int type_index=0; type_index<10; type_index++) { - if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong))) + for (int type_index = 0; type_index < vecType.size(); type_index++) { - log_info("Long/ULong data type not supported on this device\n"); - continue; - } - - clMemWrapper input; - if (vecType[type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); + if (!gHasLong + && ((vecType[type_index] == kLong) + || (vecType[type_index] == kULong))) + { + log_info("Long/ULong data type not supported on this device\n"); + continue; + } + else if (vecType[type_index] == kDouble) + { + if (!is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info("Extension cl_khr_fp64 not supported; skipping double " + "tests.\n"); continue; } - log_info("Testing doubles.\n"); + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); } + else if (vecType[type_index] == kHalf) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + continue; + } + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + } + + log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index])); // Convert the data to the right format for the test. - memset(input_data_converted, 0xff, sizeof(cl_double)*16); - if (vecType[type_index] != kDouble) { - for (int j=0; j<16; j++) { - convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j, - kInt, 0, kRoundToEven, vecType[type_index]); + memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16); + if (vecType[type_index] == kDouble) + { + const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15 }; + memcpy(input_data_converted.data(), &input_data_double, + sizeof(cl_double) * 16); + } + else if (vecType[type_index] == kHalf) + { + cl_half *buf = + reinterpret_cast(input_data_converted.data()); + for (int j = 0; j < 16; j++) + buf[j] = cl_half_from_float(float(j), CL_HALF_RTE); + } + else + { + for (int j = 0; j < 16; j++) + { + convert_explicit_value( + &j, + ((char *)input_data_converted.data()) + + get_explicit_type_size(vecType[type_index]) * j, + kInt, 0, kRoundToEven, vecType[type_index]); } - } else { - memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16); } - input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16, - (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error); - if (error) { + clMemWrapper input = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + get_explicit_type_size(vecType[type_index]) * 16, + input_data_converted.data(), &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; continue; } // Iterate over all the vector sizes. - for (int size_index=1; size_index< 5; size_index++) { - size_t global[] = {1,1,1}; + for (int size_index = 1; size_index < vecSizes.size(); size_index++) + { + size_t global[] = { 1, 1, 1 }; int number_generated = -1; int previous_number_generated = 0; - log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]); - while (number_generated != 0) { + log_info("Testing %s%s...\n", + get_explicit_type_name(vecType[type_index]), + size_names[size_index]); + while (number_generated != 0) + { clMemWrapper output; clKernelWrapper kernel; clProgramWrapper program; - number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated); - if (number_generated != 0) { + number_generated = + create_kernel(vecType[type_index], vecSizes[size_index], + program_source.data(), &number_of_results, + number_generated); + if (number_generated != 0) + { if (previous_number_generated == 0) - log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0); - log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1); + log_info("Code size greater than %gkB; splitting test " + "into multiple kernels.\n", + MAX_CODE_SIZE / 1024.0); + log_info("\tExecuting vector permutations %d to %d...\n", + previous_number_generated, number_generated - 1); } - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation"); - if (error) { + char *src = program_source.data(); + error = create_single_kernel_helper(context, &program, &kernel, + 1, (const char **)&src, + "test_vector_creation"); + if (error) + { log_error("create_single_kernel_helper failed.\n"); total_errors++; break; } - output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - NULL, &error); - if (error) { + output = clCreateBuffer( + context, CL_MEM_WRITE_ONLY, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + NULL, &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; break; @@ -312,95 +391,115 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_q error = clSetKernelArg(kernel, 0, sizeof(input), &input); error |= clSetKernelArg(kernel, 1, sizeof(output), &output); - if (error) { + if (error) + { print_error(error, "clSetKernelArg failed"); total_errors++; break; } - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - if (error) { + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, + NULL, 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueNDRangeKernel failed"); total_errors++; break; } error = clFinish(queue); - if (error) { + if (error) + { print_error(error, "clFinish failed"); total_errors++; break; } - output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - if (output_data == NULL) { - log_error("Failed to allocate memory for output data.\n"); - total_errors++; - break; - } - memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - output_data, 0, NULL, NULL); - if (error) { + output_data.resize(number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + memset(output_data.data(), 0xff, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + error = clEnqueueReadBuffer( + queue, output, CL_TRUE, 0, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + output_data.data(), 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueReadBuffer failed"); total_errors++; - free(output_data); break; } // Check the results - char *res = (char *)output_data; - char *exp = (char *)input_data_converted; - for (int i=0; i Date: Fri, 16 Jun 2023 10:53:08 +0100 Subject: basic: fix unused-but-set variables (#1764) Remove the unused `numItems` variable. As this fixes all occurrences of this warning in test_basic, remove the suppression flag. Signed-off-by: Sven van Haastregt --- test_conformance/basic/CMakeLists.txt | 2 -- test_conformance/basic/test_work_item_functions.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index adf24bd8..c07d32b6 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -70,6 +70,4 @@ if(APPLE) list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp) endif(APPLE) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") - include(../CMakeCommon.txt) diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp index d95915cf..9683a834 100644 --- a/test_conformance/basic/test_work_item_functions.cpp +++ b/test_conformance/basic/test_work_item_functions.cpp @@ -91,7 +91,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma { for( int i = 0; i < NUM_TESTS; i++ ) { - size_t numItems = 1; for( size_t j = 0; j < dim; j++ ) { // All of our thread sizes should be within the max local sizes, since they're all <= 20 @@ -100,8 +99,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) ) localThreads[ j ]--; - numItems *= threads[ j ]; - // Hack for now: localThreads > 1 are iffy localThreads[ j ] = 1; } -- cgit v1.2.3 From 0e229b8f01afc9e16ca83234b656830c26f11215 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:42:57 +0200 Subject: Added cl_khr_fp16 extension support for test_fpmath from basic (#1718) * Added half and double support for fpmath test from basic (issue #142, basic) * Cosmetic corrections due to code review * Removed unnecessary casting * Added corrections due to code review * Tuning range of input generation to avoid hitting infinity * Moved string helpers procedures due to request from test_commonfns PR #1695 --- test_common/harness/stringHelpers.h | 41 +++ test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 37 ++- test_conformance/basic/procs.h | 10 +- test_conformance/basic/test_astype.cpp | 7 +- test_conformance/basic/test_fpmath.cpp | 386 +++++++++++++++++++++++++++ test_conformance/basic/test_fpmath_float.cpp | 196 -------------- test_conformance/basic/utils.h | 41 --- 8 files changed, 468 insertions(+), 252 deletions(-) create mode 100644 test_common/harness/stringHelpers.h create mode 100644 test_conformance/basic/test_fpmath.cpp delete mode 100644 test_conformance/basic/test_fpmath_float.cpp delete mode 100644 test_conformance/basic/utils.h diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h new file mode 100644 index 00000000..3f6bf64d --- /dev/null +++ b/test_common/harness/stringHelpers.h @@ -0,0 +1,41 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef BASIC_UTILS_H +#define BASIC_UTILS_H + +#include +#include + +inline std::string concat_kernel(const char *sstr[], int num) +{ + std::string res; + for (int i = 0; i < num; i++) res += std::string(sstr[i]); + return res; +} + +template +inline std::string str_sprintf(const std::string &str, Args... args) +{ + int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1; + if (str_size <= 0) throw std::runtime_error("Formatting error."); + size_t s = static_cast(str_size); + std::unique_ptr buffer(new char[s]); + std::snprintf(buffer.get(), s, str.c_str(), args...); + return std::string(buffer.get(), buffer.get() + s - 1); +} + +#endif // BASIC_UTIL_H diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c07d32b6..c89a93cf 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -2,7 +2,7 @@ set(MODULE_NAME BASIC) set(${MODULE_NAME}_SOURCES main.cpp - test_fpmath_float.cpp + test_fpmath.cpp test_intmath.cpp test_hiloeo.cpp test_local.cpp test_pointercast.cpp test_if.cpp test_loop.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 86c3cec3..24262dbf 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,14 +22,15 @@ #include #include #include + +#include + #include "harness/testHarness.h" #include "procs.h" test_definition test_list[] = { ADD_TEST(hostptr), - ADD_TEST(fpmath_float), - ADD_TEST(fpmath_float2), - ADD_TEST(fpmath_float4), + ADD_TEST(fpmath), ADD_TEST(intmath_int), ADD_TEST(intmath_int2), ADD_TEST(intmath_int4), @@ -164,9 +165,35 @@ test_definition test_list[] = { }; const int test_num = ARRAY_SIZE( test_list ); +cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE; + +test_status InitCL(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + + return TEST_PASS; +} int main(int argc, const char *argv[]) { - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index c14340de..9cbc373a 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // + #include "harness/kernelHelpers.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -21,9 +22,8 @@ #include "harness/rounding_mode.h" extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fpmath(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 08a4cb85..45669a7c 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/conversions.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" #include #include @@ -22,11 +25,7 @@ #include #include -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - #include "procs.h" -#include "utils.h" // clang-format off diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp new file mode 100644 index 00000000..6719e728 --- /dev/null +++ b/test_conformance/basic/test_fpmath.cpp @@ -0,0 +1,386 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/stringHelpers.h" + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "procs.h" + +static const char *fp_kernel_code = R"( +%s +__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) +{ + int tid = get_global_id(0); + + dst[tid] = srcA[tid] OP srcB[tid]; +})"; + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +template double toDouble(T val) +{ + if (std::is_same::value) + return HTF(val); + else + return val; +} + +bool isHalfNan(cl_half v) +{ + // Extract FP16 exponent and mantissa + uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = v & 0x3FF; + + // NaN test + return (h_exp == 0x1F && h_mant != 0); +} + +cl_half half_plus(cl_half a, cl_half b) +{ + return HFF(std::plus()(HTF(a), HTF(b))); +} + +cl_half half_minus(cl_half a, cl_half b) +{ + return HFF(std::minus()(HTF(a), HTF(b))); +} + +cl_half half_mult(cl_half a, cl_half b) +{ + return HFF(std::multiplies()(HTF(a), HTF(b))); +} + +template struct TestDef +{ + const char op; + std::function ref; + std::string type_str; + size_t vec_size; +}; + +template +int verify_fp(std::vector (&input)[2], std::vector &output, + const TestDef &test) +{ + auto &inA = input[0]; + auto &inB = input[1]; + for (int i = 0; i < output.size(); i++) + { + bool nan_test = false; + + T r = test.ref(inA[i], inB[i]); + + if (std::is_same::value) + nan_test = !(isHalfNan(r) && isHalfNan(output[i])); + + if (r != output[i] && nan_test) + { + log_error("FP math test for type: %s, vec size: %zu, failed at " + "index %d, %a '%c' %a, expected %a, get %a\n", + test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]), + test.op, toDouble(inB[i]), toDouble(r), + toDouble(output[i])); + return -1; + } + } + + return 0; +} + +template void generate_random_inputs(std::vector (&input)[2]) +{ + RandomSeed seed(gRandomSeed); + + if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + seed)); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } +} + +struct TypesIterator +{ + using TypeIter = std::tuple; + + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems) + : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0), + num_elements(num_elems) + { + // typeid().name one day + type2name[sizeof(cl_half)] = "half"; + type2name[sizeof(cl_float)] = "float"; + type2name[sizeof(cl_double)] = "double"; + + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + fpConfigFloat = get_default_rounding_mode(deviceID); + + if (fp16Support) + fpConfigHalf = + get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG); + + for_each_elem(it); + } + + template int test_fpmath(TestDef &test) + { + constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 }; + cl_int err = CL_SUCCESS; + + std::ostringstream sstr; + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string program_source = + str_sprintf(std::string(fp_kernel_code), sstr.str().c_str()); + + for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++) + { + test.vec_size = vecSizes[i]; + + std::ostringstream vecNameStr; + vecNameStr << test.type_str; + if (test.vec_size != 1) vecNameStr << test.vec_size; + + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t length = sizeof(T) * num_elements * test.vec_size; + + bool isRTZ = false; + RoundingMode oldMode = kDefaultRoundingMode; + + + // If we only support rtz mode + if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigHalf) + { + isRTZ = true; + oldMode = get_round(); + } + } + else if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigFloat) + { + isRTZ = true; + oldMode = get_round(); + } + } + + std::vector inputs[]{ + std::vector(test.vec_size * num_elements), + std::vector(test.vec_size * num_elements) + }; + std::vector output = + std::vector(test.vec_size * num_elements); + + generate_random_inputs(inputs); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, + NULL, &err); + test_error(err, "clCreateBuffer failed."); + } + for (int i = 0; i < ARRAY_SIZE(inputs); i++) + { + err = + clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, + inputs[i].data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed."); + } + + std::string build_options = "-DTYPE="; + build_options.append(vecNameStr.str()) + .append(" -DOP=") + .append(1, test.op); + + const char *ptr = program_source.c_str(); + err = + create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_fp", build_options.c_str()); + + test_error(err, "create_single_kernel_helper failed"); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + err = + clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(err, "clSetKernelArgs failed."); + } + + size_t threads[] = { static_cast(num_elements) }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, + output.data(), 0, NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed."); + + if (isRTZ) set_round(kRoundTowardZero, kfloat); + + err = verify_fp(inputs, output, test); + + if (isRTZ) set_round(oldMode, kfloat); + + test_error(err, "test verification failed"); + log_info("FP '%c' '%s' test passed\n", test.op, + vecNameStr.str().c_str()); + } + + return err; + } + + template int test_fpmath_common() + { + int err = TEST_PASS; + if (std::is_same::value) + { + TestDef tests[] = { { '+', half_plus, type2name[sizeof(T)] }, + { '-', half_minus, type2name[sizeof(T)] }, + { '*', half_mult, type2name[sizeof(T)] } }; + for (auto &test : tests) err |= test_fpmath(test); + } + else + { + TestDef tests[] = { + { '+', std::plus(), type2name[sizeof(T)] }, + { '-', std::minus(), type2name[sizeof(T)] }, + { '*', std::multiplies(), type2name[sizeof(T)] } + }; + for (auto &test : tests) err |= test_fpmath(test); + } + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template + void iterate_type(const Type &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + if (test_fpmath_common()) + { + throw std::runtime_error("test_fpmath_common failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + TypeIter it; + + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + int num_elements; + std::map type2name; +}; + +int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp deleted file mode 100644 index fced0f4e..00000000 --- a/test_conformance/basic/test_fpmath_float.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include -#include "harness/rounding_mode.h" - -#include -#include -#include -#include - -#include "procs.h" - -struct TestDef -{ - const char op; - std::function ref; -}; - -static const char *fp_kernel_code = R"( -__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) -{ - int tid = get_global_id(0); - - dst[tid] = srcA[tid] OP srcB[tid]; -})"; - -static int verify_fp(std::vector (&input)[2], std::vector &output, - const TestDef &test) -{ - - auto &inA = input[0]; - auto &inB = input[1]; - for (int i = 0; i < output.size(); i++) - { - float r = test.ref(inA[i], inB[i]); - if (r != output[i]) - { - log_error("FP '%c' float test failed\n", test.op); - return -1; - } - } - - log_info("FP '%c' float test passed\n", test.op); - return 0; -} - - -void generate_random_inputs(std::vector (&input)[2]) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - for (auto &v : input) - { - std::generate(v.begin(), v.end(), random_generator); - } -} - -template -int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const std::string type_str, - const TestDef &test) -{ - clMemWrapper streams[3]; - clProgramWrapper program; - clKernelWrapper kernel; - - int err; - - size_t length = sizeof(cl_float) * num_elements * N; - - int isRTZ = 0; - RoundingMode oldMode = kDefaultRoundingMode; - - // If we only support rtz mode - if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) - { - isRTZ = 1; - oldMode = get_round(); - } - - - std::vector inputs[]{ std::vector(N * num_elements), - std::vector(N * num_elements) }; - std::vector output = std::vector(N * num_elements); - - generate_random_inputs(inputs); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err); - test_error(err, "clCreateBuffer failed."); - } - for (int i = 0; i < ARRAY_SIZE(inputs); i++) - { - err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, - inputs[i].data(), 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed."); - } - - std::string build_options = "-DTYPE="; - build_options.append(type_str).append(" -DOP=").append(1, test.op); - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &fp_kernel_code, "test_fp", - build_options.c_str()); - - test_error(err, "create_single_kernel_helper failed"); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); - test_error(err, "clSetKernelArgs failed."); - } - - size_t threads[] = { static_cast(num_elements) }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, - NULL); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, - output.data(), 0, NULL, NULL); - test_error(err, "clEnqueueReadBuffer failed."); - - if (isRTZ) set_round(kRoundTowardZero, kfloat); - - err = verify_fp(inputs, output, test); - - if (isRTZ) set_round(oldMode, kfloat); - - return err; -} - - -template -int test_fpmath_common(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const std::string type_str) -{ - TestDef tests[] = { { '+', std::plus() }, - { '-', std::minus() }, - { '*', std::multiplies() } }; - int err = TEST_PASS; - - for (const auto &test : tests) - { - err |= test_fpmath(device, context, queue, num_elements, type_str, - test); - } - - return err; -} - -int test_fpmath_float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<1>(device, context, queue, num_elements, "float"); -} - -int test_fpmath_float2(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<2>(device, context, queue, num_elements, - "float2"); -} - -int test_fpmath_float4(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<4>(device, context, queue, num_elements, - "float4"); -} diff --git a/test_conformance/basic/utils.h b/test_conformance/basic/utils.h deleted file mode 100644 index 3f6bf64d..00000000 --- a/test_conformance/basic/utils.h +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright (c) 2023 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#ifndef BASIC_UTILS_H -#define BASIC_UTILS_H - -#include -#include - -inline std::string concat_kernel(const char *sstr[], int num) -{ - std::string res; - for (int i = 0; i < num; i++) res += std::string(sstr[i]); - return res; -} - -template -inline std::string str_sprintf(const std::string &str, Args... args) -{ - int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1; - if (str_size <= 0) throw std::runtime_error("Formatting error."); - size_t s = static_cast(str_size); - std::unique_ptr buffer(new char[s]); - std::snprintf(buffer.get(), s, str.c_str(), args...); - return std::string(buffer.get(), buffer.get() + s - 1); -} - -#endif // BASIC_UTIL_H -- cgit v1.2.3 From df3ec8deecdb81661ee61c3c97ae63419b5f4822 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:44:45 +0200 Subject: Added cl_khr_fp16 extension support for test_int2fp from basic (#1742) * Added cl_khr_fp16 and cl_khr_fp64 support for float2int and int2float tests from basic * removed debug output * Replaced procedure to generate random half values in specific range (issue #142, basic) * Added cosmetic fixes due to code review comments * Moved string helper procedures due to request for test_commonfns PR #1695 --- test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 6 +- test_conformance/basic/procs.h | 6 +- test_conformance/basic/test_int2float.cpp | 140 ------------- test_conformance/basic/test_int2fp.cpp | 324 ++++++++++++++++++++++++++++++ 5 files changed, 332 insertions(+), 146 deletions(-) delete mode 100644 test_conformance/basic/test_int2float.cpp create mode 100644 test_conformance/basic/test_int2fp.cpp diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c89a93cf..47c1c980 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp test_imagedim.cpp test_vloadstore.cpp - test_int2float.cpp + test_int2fp.cpp test_createkernelsinprogram.cpp test_hostptr.cpp test_explicit_s2v.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 24262dbf..d1901f95 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -59,8 +59,8 @@ test_definition test_list[] = { ADD_TEST(image_r8), ADD_TEST(barrier), ADD_TEST_VERSION(wg_barrier, Version(2, 0)), - ADD_TEST(int2float), - ADD_TEST(float2int), + ADD_TEST(int2fp), + ADD_TEST(fp2int), ADD_TEST(imagereadwrite), ADD_TEST(imagereadwrite3d), ADD_TEST(readimage3d), @@ -156,7 +156,7 @@ test_definition test_list[] = { ADD_TEST(simple_read_image_pitch), ADD_TEST(simple_write_image_pitch), -#if defined( __APPLE__ ) +#if defined(__APPLE__) ADD_TEST(queue_priority), #endif diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index 9cbc373a..b685ecd5 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -52,8 +52,10 @@ extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_comm extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_int2fp(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_fp2int(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp deleted file mode 100644 index c5afc244..00000000 --- a/test_conformance/basic/test_int2float.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include "procs.h" - -namespace { -const char *int2float_kernel_code = R"( -__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) -{ - int tid = get_global_id(0); - - dst[tid] = (TYPE_Y)src[tid]; - -})"; - -template const char *Type2str() { return ""; } -template <> const char *Type2str() { return "int"; } -template <> const char *Type2str() { return "float"; } - -template void generate_random_inputs(std::vector &v) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - std::generate(v.begin(), v.end(), random_generator); -} - -template bool equal_value(Tx a, Ty b) -{ - return a == (Tx)b; -} - -template -int verify_X2Y(std::vector input, std::vector output, - const char *test_name) -{ - - if (!std::equal(output.begin(), output.end(), input.begin(), - equal_value)) - { - log_error("%s test failed\n", test_name); - return -1; - } - - log_info("%s test passed\n", test_name); - return 0; -} -template -int test_X2Y(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const char *test_name) -{ - clMemWrapper streams[2]; - clProgramWrapper program; - clKernelWrapper kernel; - int err; - - - std::vector input(num_elements); - std::vector output(num_elements); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(Tx) * num_elements, nullptr, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(Ty) * num_elements, nullptr, &err); - test_error(err, "clCreateBuffer failed."); - - generate_random_inputs(input); - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, - sizeof(Tx) * num_elements, input.data(), 0, - nullptr, nullptr); - test_error(err, "clEnqueueWriteBuffer failed."); - - std::string build_options; - build_options.append("-DTYPE_X=").append(Type2str()); - build_options.append(" -DTYPE_Y=").append(Type2str()); - err = create_single_kernel_helper(context, &program, &kernel, 1, - &int2float_kernel_code, "test_X2Y", - build_options.c_str()); - test_error(err, "create_single_kernel_helper failed."); - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - test_error(err, "clSetKernelArg failed."); - - size_t threads[] = { (size_t)num_elements }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, - nullptr, nullptr); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, - sizeof(Ty) * num_elements, output.data(), 0, - nullptr, nullptr); - test_error(err, "clEnqueueReadBuffer failed."); - - err = verify_X2Y(input, output, test_name); - - return err; -} -} -int test_int2float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_X2Y(device, context, queue, num_elements, - "INT2FLOAT"); -} -int test_float2int(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_X2Y(device, context, queue, num_elements, - "FLOAT2INT"); -} diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp new file mode 100644 index 00000000..8b1203a7 --- /dev/null +++ b/test_conformance/basic/test_int2fp.cpp @@ -0,0 +1,324 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "CL/cl_half.h" +#include "harness/compat.h" +#include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "procs.h" + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +namespace { +const char *int2float_kernel_code = R"( +%s +__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) +{ + int tid = get_global_id(0); + + dst[tid] = (TYPE_Y)src[tid]; + +})"; + +template struct TypesIterator +{ + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems, const char *test_name) + : context(context), queue(queue), test_name(test_name), + num_elements(num_elems) + { + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + type2name[sizeof(cl_half)] = std::make_pair("half", "short"); + type2name[sizeof(cl_float)] = std::make_pair("float", "int"); + type2name[sizeof(cl_double)] = std::make_pair("double", "long"); + + std::tuple it; + for_each_elem(it); + } + + template void generate_random_inputs(std::vector &v) + { + RandomSeed seed(gRandomSeed); + + if (sizeof(T) == sizeof(cl_half)) + { + // Bound generated half values to 0x1.ffcp+14(32752.0) which is the + // largest cl_half value smaller than the max value of cl_short, + // 32767. + if (int2fp) + { + auto random_generator = [&seed]() { + return (cl_short)get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed)); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + else if (sizeof(T) == sizeof(cl_float)) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else if (sizeof(T) == sizeof(cl_double)) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + + template static bool equal_value(Tx a, Ty b) + { + return a == (Tx)b; + } + + static bool equal_value_from_half(cl_short a, cl_half b) + { + return a == (cl_short)HTF(b); + } + + static bool equal_value_to_half(cl_half a, cl_short b) + { + return a == HFF((float)b); + } + + + template + int verify_X2Y(std::vector input, std::vector output) + { + if (std::is_same::value + || std::is_same::value) + { + bool res = true; + if (int2fp) + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_to_half); + else + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_from_half); + + if (!res) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + else + { + if (!std::equal(output.begin(), output.end(), input.begin(), + equal_value)) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + + log_info("%s test passed\n", test_name.c_str()); + return 0; + } + + template int test_X2Y() + { + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; + + std::vector input(num_elements); + std::vector output(num_elements); + + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Tx) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Ty) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + + generate_random_inputs(input); + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(Tx) * num_elements, input.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + std::string src_name = type2name[sizeof(Tx)].first; + std::string dst_name = type2name[sizeof(Tx)].second; + if (int2fp) std::swap(src_name, dst_name); + + std::string build_options; + build_options.append("-DTYPE_X=").append(src_name.c_str()); + build_options.append(" -DTYPE_Y=").append(dst_name.c_str()); + + std::string extension; + if (sizeof(Tx) == sizeof(cl_double)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (sizeof(Tx) == sizeof(cl_half)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string kernelSource = + str_sprintf(int2float_kernel_code, extension.c_str()); + const char *ptr = kernelSource.c_str(); + + err = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_X2Y", build_options.c_str()); + test_error(err, "create_single_kernel_helper failed."); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + test_error(err, "clSetKernelArg failed."); + + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(Ty) * num_elements, output.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); + + err = verify_X2Y(input, output); + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template void iterate_type(const T &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + typedef typename std::conditional< + (sizeof(T) == sizeof(std::int16_t)), std::int16_t, + typename std::conditional<(sizeof(T) == sizeof(std::int32_t)), + std::int32_t, + std::int64_t>::type>::type U; + if (int2fp) + { + if (test_X2Y()) + throw std::runtime_error("test_X2Y failed\n"); + } + else + { + if (test_X2Y()) + throw std::runtime_error("test_X2Y failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + std::map> type2name; + + std::string test_name; + int num_elements; +}; + +} + +int test_int2fp(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements, "INT2FP"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} + +int test_fp2int(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements, "FP2INT"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} -- cgit v1.2.3 From 50f9f063236394eea1edfab92bb4ebebd8c33b78 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 21 Jun 2023 15:19:21 +0100 Subject: test_common: fix -Wsign-compare warnings (#1759) In preparation of re-enabling `-Wsign-compare` globally, avoid mixing signed and unsigned integers in comparisons in test_common. Signed-off-by: Sven van Haastregt --- test_common/gl/helpers.cpp | 4 ++-- test_common/harness/testHarness.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index b9f95a94..1fb85035 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -1715,7 +1715,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < (size_t)width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; @@ -1733,7 +1733,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 95ea8163..3d743e71 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -835,9 +835,9 @@ void callTestFunctions(test_definition testList[], std::vector threads; test_harness_state state = { testList, resultTestList, deviceToUse, config }; - for (int i = 0; i < config.numWorkerThreads; i++) + for (unsigned i = 0; i < config.numWorkerThreads; i++) { - log_info("Spawning worker thread %i\n", i); + log_info("Spawning worker thread %u\n", i); threads.push_back(new std::thread(test_function_runner, &state)); } -- cgit v1.2.3 From 2e88013b34586c10fb8cc9eb0320e5587ce94785 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 22 Jun 2023 06:08:21 +0100 Subject: compiler: fix memory leak from unnecessary strdup (#1761) The result of the `strdup` was never freed. The string duplication isn't necessary, so remove it. Signed-off-by: Sven van Haastregt --- test_conformance/compiler/test_compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index f3ee4312..d250bdd4 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -462,7 +462,7 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device header_names[i] = _strdup(buffer); sprintf(buffer, composite_kernel_extern_template, i); - const char* line = _strdup(buffer); + const char *line = buffer; error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line); if( headers[i] == NULL || error != CL_SUCCESS ) { -- cgit v1.2.3 From 60f025a7da5ab2456ba41405e9fdf655ce948eac Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:40:35 +0200 Subject: Added cl_half support for test_select (#1617) * Added cl_half support for test_select (issue #142, select) * Added corrections due to code review + performance optimization + replaced C object with wrappers * minor fix * Corrected use of user event * Removed unnecessary user event --- test_conformance/select/test_select.cpp | 295 +++++------- test_conformance/select/test_select.h | 24 +- test_conformance/select/util_select.cpp | 779 +++++++++++++++++++------------- 3 files changed, 576 insertions(+), 522 deletions(-) diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index b0cda09f..8a0567c3 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -14,11 +14,14 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/typeWrappers.h" #include #include #include #include +#include + #if ! defined( _WIN32) #if defined(__APPLE__) #include @@ -66,6 +69,16 @@ static void printUsage( void ); #define BUFFER_SIZE (1024*1024) #define KPAGESIZE 4096 +#define test_error_count(errCode, msg) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + gFailCount++; \ + print_error(errCodeResult, msg); \ + return errCode; \ + } \ + } // When we indicate non wimpy mode, the types that are 32 bits value will // test their entire range and 64 bits test will test the 32 bit @@ -74,12 +87,6 @@ static void printUsage( void ); static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; -// Tests are broken into the major test which is based on the -// src and cmp type and their corresponding vector types and -// sub tests which is for each individual test. The following -// tracks the subtests -int s_test_cnt = 0; - //----------------------------------------- // Static helper functions //----------------------------------------- @@ -237,6 +244,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont if (srctype == kdouble) strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" ); + if (srctype == khalf) + strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + // create type name and testname switch( vec_len ) { @@ -288,25 +298,14 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont return program; } - #define VECTOR_SIZE_COUNT 6 static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device) { int err = CL_SUCCESS; - int s_test_fail = 0; - MTdataHolder d; + MTdataHolder d(gRandomSeed); const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; - cl_mem src1 = NULL; - cl_mem src2 = NULL; - cl_mem cmp = NULL; - cl_mem dest = NULL; - void *ref = NULL; - void *sref = NULL; - void *src1_host = NULL; - void *src2_host = NULL; - void *cmp_host = NULL; - void *dest_host = NULL; + clMemWrapper src1, src2, cmp, dest; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; @@ -315,16 +314,22 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c // It is more efficient to create the tests all at once since we // use the same test data on each of the vector sizes - int vecsize; - cl_program programs[VECTOR_SIZE_COUNT]; - cl_kernel kernels[VECTOR_SIZE_COUNT]; + clProgramWrapper programs[VECTOR_SIZE_COUNT]; + clKernelWrapper kernels[VECTOR_SIZE_COUNT]; - if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" )) + if (stype == kdouble && !is_extension_available(device, "cl_khr_fp64")) { log_info("Skipping double because cl_khr_fp64 extension is not supported.\n"); return 0; } + if (stype == khalf && !is_extension_available(device, "cl_khr_fp16")) + { + log_info( + "Skipping half because cl_khr_fp16 extension is not supported.\n"); + return 0; + } + if (gIsEmbedded) { if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" )) @@ -340,54 +345,41 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } } - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) - { - programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] ); - if (!programs[vecsize] || !kernels[vecsize]) { - ++s_test_fail; - ++s_test_cnt; - return -1; - } - } - - ref = malloc( BUFFER_SIZE ); - if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } - sref = malloc( BUFFER_SIZE ); - if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src1 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src1 buffer\n"); src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src2 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src2 buffer\n"); cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate cmp buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate cmp buffer\n"); dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate dest buffer\n"); - src1_host = malloc(BUFFER_SIZE); - if (NULL == src1_host) - { - log_error("Error: could not allocate src1_host buffer\n"); - goto exit; - } - src2_host = malloc(BUFFER_SIZE); - if (NULL == src2_host) - { - log_error("Error: could not allocate src2_host buffer\n"); - goto exit; - } - cmp_host = malloc(BUFFER_SIZE); - if (NULL == cmp_host) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { - log_error("Error: could not allocate cmp_host buffer\n"); - goto exit; - } - dest_host = malloc(BUFFER_SIZE); - if (NULL == dest_host) - { - log_error("Error: could not allocate dest_host buffer\n"); - goto exit; + programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, + cmptype, element_count[vecsize]); + if (!programs[vecsize] || !kernels[vecsize]) + { + return -1; + } + + err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); } + std::vector ref(BUFFER_SIZE); + std::vector sref(BUFFER_SIZE); + std::vector src1_host(BUFFER_SIZE); + std::vector src2_host(BUFFER_SIZE); + std::vector cmp_host(BUFFER_SIZE); + std::vector dest_host(BUFFER_SIZE); + // We block the test as we are running over the range of compare values // "block the test" means "break the test into blocks" if( type_size[stype] == 4 ) @@ -396,111 +388,63 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1); log_info("Testing..."); - d = MTdataHolder(gRandomSeed); uint64_t i; + for (i=0; i < blocks; i+=step) { - void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src1" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s1, stype, d); - - void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src2" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s2, stype, d); - - void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map cmp" ); goto exit; } - // Setup the input data to change for each block - initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements); - - if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src2\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; } - - // Create the reference result - err = clEnqueueReadBuffer(queue, src1, CL_TRUE, 0, BUFFER_SIZE, - src1_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src1 to src1_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, src2, CL_TRUE, 0, BUFFER_SIZE, - src2_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src2 to src2_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, cmp, CL_TRUE, 0, BUFFER_SIZE, cmp_host, - 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from cmp to cmp_host failed\n"); - ++s_test_fail; - goto exit; - } + initSrcBuffer(src1_host.data(), stype, d); + initSrcBuffer(src2_host.data(), stype, d); + initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements); + + err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE, + src1_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src1"); + + err = clEnqueueWriteBuffer(queue, src2, CL_FALSE, 0, BUFFER_SIZE, + src2_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src2"); + + err = clEnqueueWriteBuffer(queue, cmp, CL_FALSE, 0, BUFFER_SIZE, + cmp_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write cmp"); Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1]; - (*sfunc)(ref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(ref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1]; - (*sfunc)(sref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(sref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { size_t vector_size = element_count[vecsize] * type_size[stype]; size_t vector_count = (BUFFER_SIZE + vector_size - 1) / vector_size; - if((err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - - // Wipe destination - void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; } - memset( d, -1, BUFFER_SIZE ); - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; } + const cl_int pattern = -1; + err = clEnqueueFillBuffer(queue, dest, &pattern, sizeof(cl_int), 0, + BUFFER_SIZE, 0, nullptr, nullptr); + test_error_count(err, "clEnqueueFillBuffer failed"); + err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err); - ++s_test_fail; - goto exit; - } + test_error_count(err, "clEnqueueNDRangeKernel failed errcode\n"); err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE, - dest_host, 0, NULL, NULL); - if (err) - { - log_error( - "Error: Reading buffer from dest to dest_host failed\n"); - ++s_test_fail; - goto exit; - } + dest_host.data(), 0, NULL, NULL); + test_error_count( + err, "Error: Reading buffer from dest to dest_host failed\n"); - if ((*checkResults[stype])(dest_host, vecsize == 0 ? sref : ref, + if ((*checkResults[stype])(dest_host.data(), + vecsize == 0 ? sref.data() : ref.data(), block_elements, element_count[vecsize]) != 0) { log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i); - ++s_test_fail; - goto exit; + return TEST_FAIL; } } // for vecsize } // for i @@ -510,28 +454,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c else log_info(" Wimpy Passed\n\n"); -exit: - if( src1 ) clReleaseMemObject( src1 ); - if( src2 ) clReleaseMemObject( src2 ); - if( cmp ) clReleaseMemObject( cmp ); - if( dest) clReleaseMemObject( dest ); - if( ref ) free(ref ); - if( sref ) free(sref ); - if (src1_host) free(src1_host); - if (src2_host) free(src2_host); - if (cmp_host) free(cmp_host); - if (dest_host) free(dest_host); - - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) { - clReleaseKernel(kernels[vecsize]); - clReleaseProgram(programs[vecsize]); - } - ++s_test_cnt; - if (s_test_fail) - { - err = TEST_FAIL; - gFailCount++; - } return err; } @@ -567,6 +489,16 @@ int test_select_short_short(cl_device_id deviceID, cl_context context, cl_comman { return doTest(queue, context, kshort, kshort, deviceID); } +int test_select_half_ushort(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kushort, deviceID); +} +int test_select_half_short(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kshort, deviceID); +} int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(queue, context, kuint, kuint, deviceID); @@ -617,26 +549,17 @@ int test_select_double_long(cl_device_id deviceID, cl_context context, cl_comman } test_definition test_list[] = { - ADD_TEST( select_uchar_uchar ), - ADD_TEST( select_uchar_char ), - ADD_TEST( select_char_uchar ), - ADD_TEST( select_char_char ), - ADD_TEST( select_ushort_ushort ), - ADD_TEST( select_ushort_short ), - ADD_TEST( select_short_ushort ), - ADD_TEST( select_short_short ), - ADD_TEST( select_uint_uint ), - ADD_TEST( select_uint_int ), - ADD_TEST( select_int_uint ), - ADD_TEST( select_int_int ), - ADD_TEST( select_float_uint ), - ADD_TEST( select_float_int ), - ADD_TEST( select_ulong_ulong ), - ADD_TEST( select_ulong_long ), - ADD_TEST( select_long_ulong ), - ADD_TEST( select_long_long ), - ADD_TEST( select_double_ulong ), - ADD_TEST( select_double_long ), + ADD_TEST(select_uchar_uchar), ADD_TEST(select_uchar_char), + ADD_TEST(select_char_uchar), ADD_TEST(select_char_char), + ADD_TEST(select_ushort_ushort), ADD_TEST(select_ushort_short), + ADD_TEST(select_short_ushort), ADD_TEST(select_short_short), + ADD_TEST(select_half_ushort), ADD_TEST(select_half_short), + ADD_TEST(select_uint_uint), ADD_TEST(select_uint_int), + ADD_TEST(select_int_uint), ADD_TEST(select_int_int), + ADD_TEST(select_float_uint), ADD_TEST(select_float_int), + ADD_TEST(select_ulong_ulong), ADD_TEST(select_ulong_long), + ADD_TEST(select_long_ulong), ADD_TEST(select_long_long), + ADD_TEST(select_double_ulong), ADD_TEST(select_double_long), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h index c51ae13c..5cd78602 100644 --- a/test_conformance/select/test_select.h +++ b/test_conformance/select/test_select.h @@ -28,18 +28,20 @@ #endif // Defines the set of types we support (no support for double) -typedef enum { +typedef enum +{ kuchar = 0, kchar = 1, kushort = 2, kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kulong = 7, - klong = 8, - kdouble = 9, - kTypeCount // always goes last + khalf = 4, + kuint = 5, + kint = 6, + kfloat = 7, + kulong = 8, + klong = 9, + kdouble = 10, + kTypeCount // always goes last } Type; @@ -56,7 +58,8 @@ extern const size_t type_size[kTypeCount]; extern const Type ctype[kTypeCount][2]; // Reference functions for the primitive (non vector) type -typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c); +typedef void (*Select)(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t c); extern Select refSelects[kTypeCount][2]; // Reference functions for the primtive type but uses the vector @@ -64,7 +67,8 @@ extern Select refSelects[kTypeCount][2]; extern Select vrefSelects[kTypeCount][2]; // Check functions for each output type -typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize); +typedef size_t (*CheckResults)(const void *const out1, const void *const out2, + size_t count, size_t vectorSize); extern CheckResults checkResults[kTypeCount]; // Helpful macros diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index f9641e99..b85f54a7 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include "harness/errorHelpers.h" #include @@ -25,29 +24,28 @@ //----------------------------------------- -const char *type_name[kTypeCount] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "ulong", "long", "double" }; +const char *type_name[kTypeCount] = { "uchar", "char", "ushort", "short", + "half", "uint", "int", "float", + "ulong", "long", "double" }; const size_t type_size[kTypeCount] = { - sizeof(cl_uchar), sizeof(cl_char), - sizeof(cl_ushort), sizeof(cl_short), - sizeof(cl_uint), sizeof(cl_int), - sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) }; + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_half), sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), + sizeof(cl_ulong), sizeof(cl_long), sizeof(cl_double) +}; const Type ctype[kTypeCount][2] = { - { kuchar, kchar }, // uchar - { kuchar, kchar }, // char - { kushort, kshort}, // ushort - { kushort, kshort}, // short - { kuint, kint }, // uint - { kuint, kint }, // int - { kuint, kint }, // float - { kulong, klong }, // ulong - { kulong, klong }, // long - { kulong, klong } // double + { kuchar, kchar }, // uchar + { kuchar, kchar }, // char + { kushort, kshort }, // ushort + { kushort, kshort }, // short + { kushort, kshort }, // half + { kuint, kint }, // uint + { kuint, kint }, // int + { kuint, kint }, // float + { kulong, klong }, // ulong + { kulong, klong }, // long + { kulong, klong } // double }; @@ -55,510 +53,594 @@ const Type ctype[kTypeCount][2] = { // Reference functions //----------------------------------------- -void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_short*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*)dest; - x = (cl_int*)src1; - y = (cl_int*)src2; - m = (cl_int*)cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void refselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*)dest; - x = (cl_uint*)src1; - y = (cl_uint*)src2; - m = (cl_int*)cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_int *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_long *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*)src1; - y = (cl_ushort*)src2; - m = (cl_short*)cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void vrefselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; +} + +void vrefselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; +} + +void vrefselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_ulong *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } // Define refSelects -Select refSelects[kTypeCount][2] = { - { refselect_1u8u, refselect_1u8 }, // cl_uchar - { refselect_1i8u, refselect_1i8 }, // char +Select refSelects[kTypeCount][2] = { + { refselect_1u8u, refselect_1u8 }, // cl_uchar + { refselect_1i8u, refselect_1i8 }, // char { refselect_1u16u, refselect_1u16 }, // ushort { refselect_1i16u, refselect_1i16 }, // short + { refselect_hhu, refselect_hhi }, // half { refselect_1u32u, refselect_1u32 }, // uint { refselect_1i32u, refselect_1i32 }, // int - { refselect_ffu, refselect_ffi }, // float + { refselect_ffu, refselect_ffi }, // float { refselect_1u64u, refselect_1u64 }, // ulong { refselect_1i64u, refselect_1i64 }, // long - { refselect_ddu, refselect_ddi } // double + { refselect_ddu, refselect_ddi } // double }; // Define vrefSelects (vector refSelects) -Select vrefSelects[kTypeCount][2] = { - { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar - { vrefselect_1i8u, vrefselect_1i8 }, // char +Select vrefSelects[kTypeCount][2] = { + { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar + { vrefselect_1i8u, vrefselect_1i8 }, // char { vrefselect_1u16u, vrefselect_1u16 }, // ushort { vrefselect_1i16u, vrefselect_1i16 }, // short + { vrefselect_hhu, vrefselect_hhi }, // half { vrefselect_1u32u, vrefselect_1u32 }, // uint { vrefselect_1i32u, vrefselect_1i32 }, // int - { vrefselect_ffu, vrefselect_ffi }, // float + { vrefselect_ffu, vrefselect_ffi }, // float { vrefselect_1u64u, vrefselect_1u64 }, // ulong { vrefselect_1i64u, vrefselect_1i64 }, // long - { vrefselect_ddu, vrefselect_ddi } // double + { vrefselect_ddu, vrefselect_ddi } // double }; //----------------------------------------- // Check functions //----------------------------------------- -size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uchar *t = (const cl_uchar *) test; - const cl_uchar *c = (const cl_uchar *) correct; +size_t check_uchar(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uchar *const t = (const cl_uchar *)test; + const cl_uchar *const c = (const cl_uchar *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -576,9 +658,11 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { - const cl_char *t = (const cl_char *) test; - const cl_char *c = (const cl_char *) correct; +size_t check_char(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_char *const t = (const cl_char *)test; + const cl_char *const c = (const cl_char *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -597,9 +681,11 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ushort *t = (const cl_ushort *) test; - const cl_ushort *c = (const cl_ushort *) correct; +size_t check_ushort(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -618,9 +704,11 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_short(void *test, void *correct, size_t count, size_t vector_size) { - const cl_short *t = (const cl_short *) test; - const cl_short *c = (const cl_short *) correct; +size_t check_short(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_short *const t = (const cl_short *)test; + const cl_short *const c = (const cl_short *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -639,9 +727,11 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_uint(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -660,9 +750,11 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { - const cl_int *t = (const cl_int *) test; - const cl_int *c = (const cl_int *) correct; +size_t check_int(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_int *const t = (const cl_int *)test; + const cl_int *const c = (const cl_int *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -682,9 +774,11 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_ulong(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -703,9 +797,11 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { - const cl_long *t = (const cl_long *) test; - const cl_long *c = (const cl_long *) correct; +size_t check_long(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_long *const t = (const cl_long *)test; + const cl_long *const c = (const cl_long *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -724,9 +820,36 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_half(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; + size_t i; + + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((cl_half *)correct)[i]) + && isnan(((cl_half *)test)[i]))) + { + log_error("\n(check_half) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%4.4x vs 0x%4.4x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } + + return 0; +} + +size_t check_float(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -746,9 +869,11 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size return 0; } -size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_double(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -770,5 +895,7 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size } CheckResults checkResults[kTypeCount] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_ulong, check_long, check_double }; + check_uchar, check_char, check_ushort, check_short, + check_half, check_uint, check_int, check_float, + check_ulong, check_long, check_double +}; -- cgit v1.2.3 From 2495eca9fa89fcfadb3bcca7fda61b9f20b1f4e3 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:42:02 +0200 Subject: Added cl_khr_fp16 extension support for test_commonfns (#1695) * Added cl_khr_fp16 extension support for commonfns test (issue #142, commonfns) * Added missing header due to presubmit check * Corrected radians/degrees ulp calculations + cosmetic fixes * Corrected presubmit code format * Corrections related to code review * Moved string format helper to test_common in separate header * Added clang format for last commit * Corrections related to code review * Modified mix verification procedure for half type to only report max error * Removed redundant condition for logging mix verification * Corrected generator limits for half tests --- test_common/harness/stringHelpers.h | 6 +- test_conformance/commonfns/main.cpp | 43 ++++-- test_conformance/commonfns/test_base.h | 86 +++++++++--- test_conformance/commonfns/test_binary_fn.cpp | 74 +++++++--- test_conformance/commonfns/test_clamp.cpp | 83 ++++++++--- test_conformance/commonfns/test_mix.cpp | 120 ++++++++++------ test_conformance/commonfns/test_smoothstep.cpp | 123 ++++++++++------ test_conformance/commonfns/test_step.cpp | 61 +++++--- test_conformance/commonfns/test_unary_fn.cpp | 155 +++++++++++++-------- .../relationals/test_comparisons_fp.cpp | 33 +---- 10 files changed, 526 insertions(+), 258 deletions(-) diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h index 3f6bf64d..a02624d6 100644 --- a/test_common/harness/stringHelpers.h +++ b/test_common/harness/stringHelpers.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef BASIC_UTILS_H -#define BASIC_UTILS_H +#ifndef STRING_HELPERS_H +#define STRING_HELPERS_H #include #include @@ -38,4 +38,4 @@ inline std::string str_sprintf(const std::string &str, Args... args) return std::string(buffer.get(), buffer.get() + s - 1); } -#endif // BASIC_UTIL_H +#endif // STRING_HELPERS_H diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index 3e4b0b8e..645d3f70 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,8 +18,10 @@ #include #include "procs.h" #include "test_base.h" +#include "harness/kernelHelpers.h" std::map BaseFunctionTest::type2name; +cl_half_rounding_mode BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount]; int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3}; @@ -45,17 +47,38 @@ test_definition test_list[] = { const int test_num = ARRAY_SIZE( test_list ); -int main(int argc, const char *argv[]) +test_status InitCL(cl_device_id device) { - initVecSizes(); - - if (BaseFunctionTest::type2name.empty()) + if (is_extension_available(device, "cl_khr_fp16")) { - BaseFunctionTest::type2name[sizeof(half)] = "half"; - BaseFunctionTest::type2name[sizeof(float)] = "float"; - BaseFunctionTest::type2name[sizeof(double)] = "double"; + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } } - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return TEST_PASS; } +int main(int argc, const char *argv[]) +{ + initVecSizes(); + + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; + + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); +} diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 44291042..be36ed26 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -19,27 +19,23 @@ #include #include #include +#include #include #include -#include "harness/deviceInfo.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" - template using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const, const int num, const int vs, const int vp); - template using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num); - using half = cl_half; - struct BaseFunctionTest { BaseFunctionTest(cl_device_id device, cl_context context, @@ -61,9 +57,9 @@ struct BaseFunctionTest bool vecParam; static std::map type2name; + static cl_half_rounding_mode halfRoundingMode; }; - struct MinTest : BaseFunctionTest { MinTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -74,7 +70,6 @@ struct MinTest : BaseFunctionTest cl_int Run() override; }; - struct MaxTest : BaseFunctionTest { MaxTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -85,7 +80,6 @@ struct MaxTest : BaseFunctionTest cl_int Run() override; }; - struct ClampTest : BaseFunctionTest { ClampTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -96,7 +90,6 @@ struct ClampTest : BaseFunctionTest cl_int Run() override; }; - struct DegreesTest : BaseFunctionTest { DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -107,7 +100,6 @@ struct DegreesTest : BaseFunctionTest cl_int Run() override; }; - struct RadiansTest : BaseFunctionTest { RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -118,7 +110,6 @@ struct RadiansTest : BaseFunctionTest cl_int Run() override; }; - struct SignTest : BaseFunctionTest { SignTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -129,7 +120,6 @@ struct SignTest : BaseFunctionTest cl_int Run() override; }; - struct SmoothstepTest : BaseFunctionTest { SmoothstepTest(cl_device_id device, cl_context context, @@ -141,7 +131,6 @@ struct SmoothstepTest : BaseFunctionTest cl_int Run() override; }; - struct StepTest : BaseFunctionTest { StepTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -152,7 +141,6 @@ struct StepTest : BaseFunctionTest cl_int Run() override; }; - struct MixTest : BaseFunctionTest { MixTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -163,19 +151,71 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + { + return Ulp_Error_Half(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("UlpFn: unsupported data type\n"); + } + + return -1.f; // wrong val +} + +template inline double conv_to_dbl(const T &val) +{ + if (std::is_same::value) + return (double)cl_half_to_float(val); + else + return (double)val; +} -template -std::string string_format(const std::string &format, Args... args) +template inline double conv_to_flt(const T &val) { - int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - if (sformat <= 0) - throw std::runtime_error("string_format: string processing error."); - auto format_size = static_cast(sformat); - std::unique_ptr buffer(new char[format_size]); - std::snprintf(buffer.get(), format_size, format.c_str(), args...); - return std::string(buffer.get(), buffer.get() + format_size - 1); + if (std::is_same::value) + return (float)cl_half_to_float(val); + else + return (float)val; } +template inline half conv_to_half(const T &val) +{ + if (std::is_floating_point::value) + return cl_half_from_float(val, BaseFunctionTest::halfRoundingMode); + return 0; +} + +template bool isfinite_fp(const T &v) +{ + if (std::is_same::value) + { + // Extract FP16 exponent and mantissa + uint16_t h_exp = (((half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((half)v) & 0x3FF; + + // !Inf test + return !(h_exp == 0x1F && h_mant == 0); + } + else + { +#if !defined(_WIN32) + return std::isfinite(v); +#else + return isfinite(v); +#endif + } +} template int MakeAndRunTest(cl_device_id device, cl_context context, diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index 1eb12f73..a6c75647 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -22,6 +22,7 @@ #include "harness/deviceInfo.h" #include "harness/typeWrappers.h" +#include "harness/stringHelpers.h" #include "procs.h" #include "test_base.h" @@ -53,7 +54,6 @@ const char *binary_fn_code_pattern_v3_scalar = " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n" "}\n"; - template int test_binary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -105,6 +105,16 @@ int test_binary_fn(cl_device_id device, cl_context context, input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -125,22 +135,22 @@ int test_binary_fn(cl_device_id device, cl_context context, { std::string str = binary_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = binary_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } } else { // do regular std::string str = binary_fn_code_pattern; - kernelSource = string_format( + kernelSource = str_sprintf( str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], fnName.c_str()); @@ -203,13 +213,20 @@ int max_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] < y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -227,13 +244,20 @@ int min_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] > y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -246,6 +270,13 @@ int min_verify(const T* const x, const T* const y, const T* const out, cl_int MaxTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify); + test_error(error, "MaxTest::Run failed"); + } error = test_binary_fn(device, context, queue, num_elems, fnName.c_str(), vecParam, max_verify); @@ -265,6 +296,13 @@ cl_int MaxTest::Run() cl_int MinTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify); + test_error(error, "MinTest::Run failed"); + } error = test_binary_fn(device, context, queue, num_elems, fnName.c_str(), vecParam, min_verify); diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index 0e96fb60..1bf40677 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -26,12 +26,10 @@ #include "procs.h" #include "test_base.h" - #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif - #define CLAMP_KERNEL(type) \ const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ "__kernel void test_clamp(__global " #type " *x, __global " #type \ @@ -64,6 +62,14 @@ "vload3(tid,maxval)), tid, dst);\n" \ "}\n"; +#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" +CLAMP_KERNEL(half) +CLAMP_KERNEL_V(half, 2) +CLAMP_KERNEL_V(half, 4) +CLAMP_KERNEL_V(half, 8) +CLAMP_KERNEL_V(half, 16) +CLAMP_KERNEL_V3(half, 3) +#undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE " " CLAMP_KERNEL(float) @@ -83,6 +89,10 @@ CLAMP_KERNEL_V(double, 16) CLAMP_KERNEL_V3(double, 3) #undef EMIT_PRAGMA_DIRECTIVE +const char *clamp_half_codes[] = { + clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code, + clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code +}; const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, @@ -96,21 +106,42 @@ const char *clamp_double_codes[] = { namespace { - template int verify_clamp(const T *const x, const T *const minval, const T *const maxval, const T *const outptr, int n) { - T t; - for (int i = 0; i < n; i++) + if (std::is_same::value) + { + float t; + for (int i = 0; i < n; i++) + { + t = std::min( + std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])), + cl_half_to_float(maxval[i])); + if (t != cl_half_to_float(outptr[i])) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]), + cl_half_to_float(maxval[i]), t, + cl_half_to_float(outptr[i])); + return -1; + } + } + } + else { - t = std::min(std::max(x[i], minval[i]), maxval[i]); - if (t != outptr[i]) + T t; + for (int i = 0; i < n; i++) { - log_error( - "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, - x[i], minval[i], maxval[i], t, outptr[i]); - return -1; + t = std::min(std::max(x[i], minval[i]), maxval[i]); + if (t != outptr[i]) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, x[i], minval[i], maxval[i], t, outptr[i]); + return -1; + } } } @@ -118,7 +149,6 @@ int verify_clamp(const T *const x, const T *const minval, const T *const maxval, } } - template int test_clamp_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) @@ -169,6 +199,17 @@ int test_clamp_fn(cl_device_id device, cl_context context, input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + for (j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[2][j] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[1][j]), fval, d)); + } + } for (i = 0; i < 3; i++) { @@ -194,9 +235,16 @@ int test_clamp_fn(cl_device_id device, cl_context context, "test_clamp"); test_error(err, "Unable to create kernel"); } + else if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_half_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } - log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", - i, g_arrVecSizes[i], i); + log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n", + tname.c_str(), i, g_arrVecSizes[i], i); fflush(stdout); for (j = 0; j < 4; j++) @@ -239,10 +287,14 @@ int test_clamp_fn(cl_device_id device, cl_context context, return err; } - cl_int ClampTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); + } error = test_clamp_fn(device, context, queue, num_elems); test_error(error, "ClampTest::Run failed"); @@ -256,7 +308,6 @@ cl_int ClampTest::Run() return error; } - int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 92c10100..2a06e43d 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -18,6 +18,8 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -52,33 +54,42 @@ const char *mix_fn_code_pattern_v3_scalar = " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n" "}\n"; - #define MAX_ERR 1e-3 namespace { - template int verify_mix(const T *const inptrX, const T *const inptrY, const T *const inptrA, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r; - float delta = 0.0f; + double r, o; + float delta = 0.f, max_delta = 0.f; int i; if (vecParam) { for (i = 0; i < n * veclen; i++) { - r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]); - delta = fabs(double(r - outptr[i])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[i]) + + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i])) + * conv_to_dbl(inptrA[i])); + + o = conv_to_dbl(outptr[i]); + delta = fabs(double(r - o)) / r; + if (!std::is_same::value) + { + if (delta > MAX_ERR) + { + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } + else { - log_error( - "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i, - inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); - return -1; + max_delta = std::max(max_delta, delta); } } } @@ -90,25 +101,40 @@ int verify_mix(const T *const inptrX, const T *const inptrY, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]); - delta = fabs(double(r - outptr[vi])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[vi]) + + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi])) + * conv_to_dbl(inptrA[i])); + delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r; + if (!std::is_same::value) { - log_error("{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, - outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + else + { + max_delta = std::max(max_delta, delta); } } } } + // due to the fact that accuracy of mix for cl_khr_fp16 is implementation + // defined this test only reports maximum error without testing maximum + // error threshold + if (std::is_same::value) + log_error("mix half verification result, max delta: %a\n", max_delta); + return 0; } } // namespace - template int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -120,7 +146,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::vector kernels; int err, i; - MTdataHolder d = MTdataHolder(gRandomSeed); + MTdataHolder d(gRandomSeed); assert(BaseFunctionTest::type2name.find(sizeof(T)) != BaseFunctionTest::type2name.end()); @@ -142,19 +168,32 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, test_error(err, "clCreateBuffer failed"); } - for (i = 0; i < num_elements; i++) - { - input_ptr[0][i] = (T)genrand_real1(d); - input_ptr[1][i] = (T)genrand_real1(d); - input_ptr[2][i] = (T)genrand_real1(d); - } - std::string pragma_str; if (std::is_same::value) { pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; } + if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[1][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[2][i] = conv_to_half((float)genrand_real1(d)); + } + } + else + { + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = (T)genrand_real1(d); + input_ptr[1][i] = (T)genrand_real1(d); + input_ptr[2][i] = (T)genrand_real1(d); + } + } + for (i = 0; i < 3; i++) { err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, @@ -164,7 +203,6 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, } char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; @@ -174,15 +212,15 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, { std::string str = mix_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = mix_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -190,10 +228,10 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, // regular path std::string str = mix_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i], - tname.c_str(), vecParam ? vecSizeNames[i] : "", - tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -242,10 +280,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, return err; } - cl_int MixTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); + } error = test_mix_fn(device, context, queue, num_elems, vecParam); test_error(error, "MixTest::Run failed"); @@ -260,7 +302,6 @@ cl_int MixTest::Run() return error; } - int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -268,7 +309,6 @@ int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 31948d3f..5afc2d0f 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -18,10 +18,11 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *smoothstep_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s " @@ -53,38 +54,43 @@ const char *smoothstep_fn_code_pattern_v3_scalar = " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - #define MAX_ERR (1e-5f) namespace { - template int verify_smoothstep(const T *const edge0, const T *const edge1, const T *const x, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r, t; - float delta = 0; + double r, t; + float delta = 0, max_delta = 0; if (vecParam) { for (int i = 0; i < n * veclen; i++) { - t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[i]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[i])); + if (!std::is_same::value) { - log_error("%d) verification error: smoothstep(%a, %a, %a) = " - "*%a vs. %a\n", - i, x[i], edge0[i], edge1[i], r, outptr[i]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "%d) verification error: smoothstep(%a, %a, %a) = " + "*%a vs. %a\n", + i, x[i], edge0[i], edge1[i], r, outptr[i]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } else @@ -95,32 +101,48 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[vi]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[vi])); + + if (!std::is_same::value) { - log_error("{%d, element %d}) verification error: " - "smoothstep(%a, %a, %a) = *%a vs. %a\n", - ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: " + "smoothstep(%a, %a, %a) = *%a vs. %a\n", + ii, j, x[vi], edge0[i], edge1[i], r, + outptr[vi]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } } + + // due to the fact that accuracy of smoothstep for cl_khr_fp16 is + // implementation defined this test only reports maximum error without + // testing maximum error threshold + if (std::is_same::value) + log_error("smoothstep half verification result, max delta: %a\n", + max_delta); + return 0; } } - template int test_smoothstep_fn(cl_device_id device, cl_context context, - cl_command_queue queue, int n_elems, bool vecParam) + cl_command_queue queue, const int n_elems, + const bool vecParam) { clMemWrapper streams[4]; std::vector input_ptr[3], output_ptr; @@ -170,6 +192,17 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d)); + input_ptr[1][i] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d)); + input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d)); + } + } for (i = 0; i < 3; i++) { @@ -179,7 +212,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, test_error(err, "Unable to write input buffer"); } - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; for (i = 0; i < kTotalVecCount; i++) { @@ -190,15 +223,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, { std::string str = smoothstep_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = smoothstep_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -206,11 +239,12 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, // regular path std::string str = smoothstep_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } + const char *programPtr = kernelSource.c_str(); err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1, @@ -259,10 +293,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, return err; } - cl_int SmoothstepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_smoothstep_fn(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run failed"); + } error = test_smoothstep_fn(device, context, queue, num_elems, vecParam); @@ -278,7 +317,6 @@ cl_int SmoothstepTest::Run() return error; } - int test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -286,7 +324,6 @@ int test_smoothstep(cl_device_id device, cl_context context, "smoothstep", true); } - int test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index dc91766e..1cfa96ea 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -18,10 +18,11 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *step_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *edge, " "__global %s%s *x, __global %s%s *dst)\n" @@ -48,7 +49,6 @@ const char *step_fn_code_pattern_v3_scalar = " vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - namespace { template @@ -62,8 +62,8 @@ int verify_step(const T *const inptrA, const T *const inptrB, { for (int i = 0; i < n * veclen; i++) { - r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0; - if (r != outptr[i]) return -1; + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[i])) ? 0.0 : 1.0; + if (r != conv_to_dbl(outptr[i])) return -1; } } else @@ -73,24 +73,31 @@ int verify_step(const T *const inptrA, const T *const inptrB, int ii = i / veclen; for (int j = 0; j < veclen && i < n; ++j, ++i) { - r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f; - if (r != outptr[i]) + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[ii])) ? 0.0f + : 1.0f; + if (r != conv_to_dbl(outptr[i])) { - log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a " - "vs %a\n", - ii, j, inptrA[ii], inptrB[i], r, outptr[i]); + if (std::is_same::value) + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, conv_to_flt(inptrA[ii]), + conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i])); + else + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, inptrA[ii], inptrB[i], r, outptr[i]); return -1; } } } } - return 0; } } - template int test_step_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -140,6 +147,16 @@ int test_step_fn(cl_device_id device, cl_context context, input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -160,15 +177,15 @@ int test_step_fn(cl_device_id device, cl_context context, { std::string str = step_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } else { std::string str = step_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } } else @@ -176,9 +193,9 @@ int test_step_fn(cl_device_id device, cl_context context, // regular path std::string str = step_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -229,10 +246,14 @@ int test_step_fn(cl_device_id device, cl_context context, return err; } - cl_int StepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_step_fn(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + } error = test_step_fn(device, context, queue, num_elems, vecParam); test_error(error, "StepTest::Run failed"); @@ -247,7 +268,6 @@ cl_int StepTest::Run() return error; } - int test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -255,7 +275,6 @@ int test_step(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index fed4389d..91b5c215 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -21,6 +21,7 @@ #include #include "harness/deviceInfo.h" +#include "harness/stringHelpers.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -30,7 +31,6 @@ #define M_PI 3.14159265358979323846264338327950288 #endif - // clang-format off const char *unary_fn_code_pattern = "%s\n" /* optional pragma */ @@ -51,23 +51,10 @@ const char *unary_fn_code_pattern_v3 = "}\n"; // clang-format on - #define MAX_ERR 2.0f namespace { - -template float UlpFn(const T &val, const double &r) -{ - if (std::is_same::value) - return Ulp_Error_Double(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); -} - - template int verify_degrees(const T *const inptr, const T *const outptr, int n) { @@ -77,7 +64,11 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (180.0 / M_PI) * inptr[i]; + r = (180.0 / M_PI) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; error = UlpFn(outptr[i], r); @@ -88,21 +79,32 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same::value) + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template int verify_radians(const T *const inptr, const T *const outptr, int n) { @@ -112,8 +114,14 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (M_PI / 180.0) * inptr[i]; - error = Ulp_Error(outptr[i], r); + r = (M_PI / 180.0) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; + + error = UlpFn(outptr[i], r); + if (fabsf(error) > max_error) { max_error = error; @@ -121,41 +129,51 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same::value) + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template int verify_sign(const T *const inptr, const T *const outptr, int n) { - T r = 0; + double r = 0; for (int i = 0; i < n; i++) { - if (inptr[i] > 0.0f) + if (conv_to_dbl(inptr[i]) > 0.0f) r = 1.0; - else if (inptr[i] < 0.0f) + else if (conv_to_dbl(inptr[i]) < 0.0f) r = -1.0; else r = 0.0; - if (r != outptr[i]) return -1; + if (r != conv_to_dbl(outptr[i])) return -1; } return 0; } } - template int test_unary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -207,33 +225,38 @@ int test_unary_fn(cl_device_id device, cl_context context, get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d); } } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = conv_to_half(get_random_float( + (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d)); + } + } err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(T) * num_elements, &input_ptr.front(), 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + test_error(err, "clEnqueueWriteBuffer failed\n"); for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; if (i >= kVectorSizeCount) { std::string str = unary_fn_code_pattern_v3; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), fnName.c_str()); } else { std::string str = unary_fn_code_pattern; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), - vecSizeNames[i], fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); } /* Create kernels */ @@ -290,11 +313,18 @@ int test_unary_fn(cl_device_id device, cl_context context, return err; } - cl_int DegreesTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_degrees); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); test_error(error, "DegreesTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -307,11 +337,18 @@ cl_int DegreesTest::Run() return error; } - cl_int RadiansTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_radians); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); test_error(error, "RadiansTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -324,11 +361,18 @@ cl_int RadiansTest::Run() return error; } - cl_int SignTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_sign); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); test_error(error, "SignTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -341,7 +385,6 @@ cl_int SignTest::Run() return error; } - int test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -349,7 +392,6 @@ int test_degrees(cl_device_id device, cl_context context, "degrees"); } - int test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -357,7 +399,6 @@ int test_radians(cl_device_id device, cl_context context, "radians"); } - int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index c3d8f67a..73ff3dd9 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -22,6 +22,8 @@ #include #include +#include "harness/stringHelpers.h" + #include #include "test_comparisons_fp.h" @@ -83,29 +85,6 @@ extension, // clang-format on -std::string concat_kernel(const char* sstr[], int num) -{ - std::string res; - for (int i = 0; i < num; i++) res += std::string(sstr[i]); - return res; -} - -template -std::string string_format(const std::string& format, Args... args) -{ - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) - + 1; // Extra space for '\0' - if (size_s <= 0) - { - throw std::runtime_error("Error during formatting."); - } - auto size = static_cast(size_s); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - return std::string(buf.get(), - buf.get() + size - 1); // We don't want the '\0' inside -} - template bool verify(const T& A, const T& B) { return F()(A, B); @@ -226,14 +205,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKerPat_3, sizeof(equivTestKerPat_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKerPatLessGreater_3, sizeof(equivTestKerPatLessGreater_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } else @@ -243,14 +222,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKernPat, sizeof(equivTestKernPat) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKernPatLessGreater, sizeof(equivTestKernPatLessGreater) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } -- cgit v1.2.3 From 43c244f01de00e4d3beb63c4b9167eccfbdeaf77 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:42:56 +0200 Subject: Added cl_khr_fp16 extension support for test vector_times_scalar from spirv_new (#1757) * Added cl_khr_fp16 support for vector_times_scalar from spirv_new (issue #142, spirv_new) * Logging correction --- .../spirv_asm/vector_times_scalar_half.spvasm32 | 46 ++++++++++++++++++++ .../spirv_asm/vector_times_scalar_half.spvasm64 | 50 ++++++++++++++++++++++ .../spirv_new/test_op_vector_times_scalar.cpp | 14 ++++++ test_conformance/spirv_new/types.hpp | 4 ++ 4 files changed, 114 insertions(+) create mode 100644 test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 create mode 100644 test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 new file mode 100644 index 00000000..6fda7d8f --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 25 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %15 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpLabel + %17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %18 = OpCompositeExtract %uint %17 0 + %19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %18 + %20 = OpLoad %v4half %19 Aligned 8 + %21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18 + %22 = OpLoad %half %21 Aligned 2 + %23 = OpVectorTimesScalar %v4half %20 %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %18 + OpStore %24 %23 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 new file mode 100644 index 00000000..fa2d5221 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 28 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %16 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %21 + %23 = OpLoad %v4half %22 Aligned 8 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21 + %25 = OpLoad %half %24 Aligned 2 + %26 = OpVectorTimesScalar %v4half %23 %25 + %27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %21 + OpStore %27 %26 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp index 0859668c..0be4e8b7 100644 --- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp +++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp @@ -17,6 +17,8 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos #include #include +using half = cl_half; + template int test_vector_times_scalar(cl_device_id deviceID, cl_context context, @@ -32,6 +34,16 @@ int test_vector_times_scalar(cl_device_id deviceID, } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; int num = (int)h_lhs.size(); size_t lhs_bytes = num * sizeof(Tv); @@ -171,5 +183,7 @@ int test_vector_times_scalar(cl_device_id deviceID, lhs, rhs); \ } + TEST_VECTOR_TIMES_SCALAR(float, 4) TEST_VECTOR_TIMES_SCALAR(double, 4) +TEST_VECTOR_TIMES_SCALAR(half, 4) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index e7fceba0..27a45c5b 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -43,6 +43,8 @@ VEC_NOT_EQ_FUNC(cl_float, 2) VEC_NOT_EQ_FUNC(cl_float, 4) VEC_NOT_EQ_FUNC(cl_double, 2) VEC_NOT_EQ_FUNC(cl_double, 4) +VEC_NOT_EQ_FUNC(cl_half, 2) +VEC_NOT_EQ_FUNC(cl_half, 4) template bool isNotEqual(const T &lhs, const T &rhs) @@ -109,6 +111,8 @@ GENRAND_REAL_FUNC(cl_float, 2) GENRAND_REAL_FUNC(cl_float, 4) GENRAND_REAL_FUNC(cl_double, 2) GENRAND_REAL_FUNC(cl_double, 4) +GENRAND_REAL_FUNC(cl_half, 2) +GENRAND_REAL_FUNC(cl_half, 4) template<> inline cl_half genrandReal(RandomSeed &seed) { -- cgit v1.2.3 From 73ead9da04c2983288799effaaa12dbd02ae321d Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:43:44 +0200 Subject: Added cl_khr_fp16 extension support for test_op_negate from spirv_new (#1762) * Added cl_khr_fp16 extension support for test_op_negate from spirv_new (issue #142) * Added clang format fix --- .../spirv_new/spirv_asm/op_neg_half.spvasm32 | 35 +++++++++++++++++++ .../spirv_new/spirv_asm/op_neg_half.spvasm64 | 39 +++++++++++++++++++++ test_conformance/spirv_new/test_op_negate.cpp | 40 +++++++++++++--------- test_conformance/spirv_new/types.hpp | 2 ++ 4 files changed, 100 insertions(+), 16 deletions(-) create mode 100644 test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 create mode 100644 test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 new file mode 100644 index 00000000..49127187 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 @@ -0,0 +1,35 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 17 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %11 = OpLabel + %12 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %13 = OpCompositeExtract %uint %12 0 + %14 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %13 + %15 = OpLoad %half %14 + %16 = OpFNegate %half %15 + OpStore %14 %16 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 new file mode 100644 index 00000000..9c7e3d6d --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 @@ -0,0 +1,39 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 20 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half + %ulong_32 = OpConstant %ulong 32 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %12 = OpLabel + %13 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %14 = OpCompositeExtract %ulong %13 0 + %15 = OpShiftLeftLogical %ulong %14 %ulong_32 + %16 = OpShiftRightArithmetic %ulong %15 %ulong_32 + %17 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %16 + %18 = OpLoad %half %17 + %19 = OpFNegate %half %18 + OpStore %17 %19 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp index e3dc1f34..5009be93 100644 --- a/test_conformance/spirv_new/test_op_negate.cpp +++ b/test_conformance/spirv_new/test_op_negate.cpp @@ -32,6 +32,15 @@ int test_negation(cl_device_id deviceID, return 0; } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } cl_int err = CL_SUCCESS; int num = (int)h_in.size(); @@ -73,29 +82,28 @@ int test_negation(cl_device_id deviceID, return 0; } -#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ - TEST_SPIRV_FUNC(OP##_##TYPE) \ - { \ - int num = 1 << 20; \ - std::vector in(num); \ - RandomSeed seed(gRandomSeed); \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - return test_negation(deviceID, \ - context, \ - queue, \ - #TYPE, \ - #OP, \ - in, FUNC); \ - } \ +#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ + TEST_SPIRV_FUNC(OP##_##TYPE) \ + { \ + int num = 1 << 20; \ + std::vector in(num); \ + RandomSeed seed(gRandomSeed); \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + return test_negation(deviceID, context, queue, #TYPE, #OP, in, \ + FUNC); \ + } +#define TEST_NEG_HALF TEST_NEGATION(half, cl_half, op_neg, negOpHalf) #define TEST_NEG(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_neg, negOp) #define TEST_NOT(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_not, notOp) #define TEST_NEG_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_neg, (negOpVec)) #define TEST_NOT_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_not, (notOpVec)) +TEST_NEG_HALF TEST_NEG(float) TEST_NEG(double) TEST_NEG(int) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index 27a45c5b..939e6fa8 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -161,6 +161,8 @@ Tv negOp(Tv in) return -in; } +inline cl_half negOpHalf(cl_half v) { return v ^ 0x8000; } + template Tv notOp(Tv in) { -- cgit v1.2.3 From fee6d6bb6643f7f5e2b6dab46486c903e2a71680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 27 Jun 2023 17:47:24 +0200 Subject: Command buffer re-enqueue testing. (#1738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Command buffer re-enqueue testing. Signed-off-by: Paweł Jastrzębski * Remove reenqueue tests and add reenqueue to existing tests. Signed-off-by: Paweł Jastrzębski * Add re-enqueue for copy and barrier tests. Signed-off-by: Paweł Jastrzębski * Fix review comments. Applied review comments for command buffer re-enqueue testing: - Add second clEnqueueCommandBufferKHR for all tests - Reinitialise memory before second enqueue of command buffers - Add different patterns for second enqueue of command buffers Signed-off-by: Paweł Jastrzębski * Fix verification patterns for second enqueue tests. Signed-off-by: Paweł Jastrzębski * Reinitialise output memory for second command buffer re-enqueue. Signed-off-by: Paweł Jastrzębski * Fix cast for conversion from 'const cl_char' to 'const cl_uint. Signed-off-by: Paweł Jastrzębski * Fix compilation error for MSVC. Signed-off-by: Paweł Jastrzębski * Reinitialise in_mem and out_mem with zero. Signed-off-by: Paweł Jastrzębski * Fix cast for conversion from 'const cl_int' to 'const cl_uint'. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../cl_khr_command_buffer/basic_command_buffer.cpp | 25 ++- .../command_buffer_test_barrier.cpp | 36 +++- .../command_buffer_test_copy.cpp | 196 ++++++++++++++++++--- .../command_buffer_test_fill.cpp | 68 +++++-- 4 files changed, 277 insertions(+), 48 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 43734da0..6c02f9f7 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -201,14 +201,33 @@ struct BasicEnqueueTest : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(num_elements); + std::vector output_data_1(num_elements); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + const cl_int new_pattern = 12; + error = clEnqueueFillBuffer(queue, in_mem, &new_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(new_pattern, output_data_2[i], i); } return CL_SUCCESS; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp index d73fc9ce..82ff16f0 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp @@ -70,15 +70,42 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(num_elements); + std::vector output_data_1(num_elements); error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, - data_size(), output_data.data(), 1, &event, - nullptr); + data_size(), output_data_1.data(), 1, + &event, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = + clEnqueueFillBuffer(queue, in_mem, &zero_pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = + clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data_2.data(), 1, + &event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data_2[i], i); } return CL_SUCCESS; @@ -106,6 +133,7 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest } const cl_int pattern = 0x16; + const cl_int zero_pattern = 0x0; clCommandQueueWrapper out_of_order_queue; clCommandBufferWrapper out_of_order_command_buffer; clEventWrapper event; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp index 102ae761..7a1f0e6d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp @@ -38,7 +38,7 @@ struct CopyImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillImageKHR(command_buffer, nullptr, src_image, - fill_color, origin, region, 0, + fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -56,13 +56,38 @@ struct CopyImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); - error = clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, - 0, output_data.data(), 0, nullptr, nullptr); + std::vector output_data_1(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, src_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueFillImage(queue, dst_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -97,8 +122,12 @@ struct CopyImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x05; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x05; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x1; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper src_image; clMemWrapper dst_image; @@ -111,7 +140,7 @@ struct CopyBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -127,20 +156,45 @@ struct CopyBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size()); + std::vector output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const cl_char pattern = 0x14; + const cl_char pattern_1 = 0x14; + const cl_char pattern_2 = 0x28; }; struct CopyBufferToImageKHR : public BasicCommandBufferTest @@ -150,7 +204,7 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, buffer, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, buffer, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -168,15 +222,40 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadImage failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillImage(queue, image, &fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -211,7 +290,14 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x11; + const cl_char pattern_1 = 0x11; + const cl_char pattern_2 = 0x22; + + const cl_uint fill_color_2[4] = { static_cast(pattern_2), + static_cast(pattern_2), + static_cast(pattern_2), + static_cast(pattern_2) }; + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper buffer; @@ -225,7 +311,7 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -243,16 +329,39 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_1[i], i); + } + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + + error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -287,8 +396,12 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x12; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x12; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x24; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -302,7 +415,7 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -319,14 +432,38 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -353,7 +490,8 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x13; + const cl_char pattern_1 = 0x13; + const cl_char pattern_2 = 0x26; clMemWrapper in_mem; clMemWrapper out_mem; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp index 88e97a27..0ba8055a 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp @@ -35,7 +35,7 @@ struct FillImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -47,14 +47,34 @@ struct FillImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -85,8 +105,12 @@ struct FillImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x10; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x10; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x20; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -99,7 +123,7 @@ struct FillBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -111,20 +135,40 @@ struct FillBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size()); + std::vector output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), 0, + data_size(), 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const char pattern = 0x15; + const char pattern_1 = 0x15; + const char pattern_2 = 0x30; }; }; -- cgit v1.2.3 From 56974a58585b8c66d9beddccd984990e45ca0ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 27 Jun 2023 17:54:14 +0200 Subject: Add global offset tests for cl_khr_command_buffer_mutable_dispatch. (#1743) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add global offset tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Add kernel with observable output. We should check that there's some observable output from the kernel as a result of the change to global work offset, not just that clGetMutableCommandInfoKHR has been updated. E.g we could call get_global_offset() inside of the kernel, write something to a buffer based on that, and read the buffer after the command-buffer enqueue has finished. Signed-off-by: Paweł Jastrzębski * Fix review comments. Applied review comments for mutable dispatch global offset test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR - Change variable and constant names for global offset - Remove redundant return CL_SUCCESS Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Fix skip conditions - Remove obsolete variable - Replace a variable with a constant Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Remove explicit base class call - Fix constant magic number Signed-off-by: Paweł Jastrzębski * Fix constant magic number. Signed-off-by: Paweł Jastrzębski * Fix clang-format. Signed-off-by: Paweł Jastrzębski * Fix condition for result check. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 1 + .../mutable_command_basic.h | 63 ++++++-- .../mutable_command_global_offset.cpp | 179 +++++++++++++++++++++ .../mutable_command_info.cpp | 60 +++---- .../cl_khr_command_buffer_mutable_dispatch/procs.h | 6 +- 6 files changed, 267 insertions(+), 43 deletions(-) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index e0625833..80214609 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_global_offset.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 97075792..b53914dc 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_global_offset), }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 96669583..c88c14d1 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -19,6 +19,17 @@ #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" +// If it is supported get the addresses of all the APIs here. +#define GET_EXTENSION_ADDRESS(FUNC) \ + FUNC = reinterpret_cast( \ + clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } + struct BasicMutableCommandBufferTest : BasicCommandBufferTest { BasicMutableCommandBufferTest(cl_device_id device, cl_context context, @@ -84,24 +95,52 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest &platform, nullptr); test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); - // If it is supported get the addresses of all the APIs here. -#define GET_EXTENSION_ADDRESS(FUNC) \ - FUNC = reinterpret_cast( \ - clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ - if (FUNC == nullptr) \ - { \ - log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ - " with " #FUNC "\n"); \ - return TEST_FAIL; \ + GET_EXTENSION_ADDRESS(clUpdateMutableCommandsKHR); + + return CL_SUCCESS; } + + clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + + const char* kernelString = "__kernel void empty() {}"; + const size_t global_work_size = 4 * 16; +}; + +struct InfoMutableCommandBufferTest : BasicMutableCommandBufferTest +{ + InfoMutableCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + cl_int error = init_extension_functions(); + test_error(error, "Unable to initialise extension functions"); + + return CL_SUCCESS; + } + + cl_int init_extension_functions() + { + BasicCommandBufferTest::init_extension_functions(); + + cl_platform_id platform; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + GET_EXTENSION_ADDRESS(clGetMutableCommandInfoKHR); return CL_SUCCESS; } clGetMutableCommandInfoKHR_fn clGetMutableCommandInfoKHR = nullptr; - const char* kernelString = "__kernel void empty() {}"; - const size_t global_work_size = 4 * sizeof(cl_int); }; -#endif // CL_KHR_MUTABLE_COMMAND_BASIC_H +#undef GET_EXTENSION_ADDRESS + +#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp new file mode 100644 index 00000000..70e1d9b1 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp @@ -0,0 +1,179 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "typeWrappers.h" +#include "procs.h" +#include "testHarness.h" +#include "imageHelpers.h" +#include +#include +#include +#include +#include +#include +#include "mutable_command_basic.h" + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR + +struct MutableDispatchGlobalOffset : InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalOffset(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_offset_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_offset(0); + })"; + + cl_int error = + create_single_kernel_helper(context, &program, &kernel, 1, + &global_offset_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + &update_global_offset /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR, + sizeof(info_global_offset), &info_global_offset, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_offset != update_global_offset) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_offset && 0 != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_offset + && update_global_offset != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + return CL_SUCCESS; + } + + size_t info_global_offset = 0; + const size_t update_global_offset = 3; + const size_t sizeToAllocate = + (global_work_size + update_global_offset) * sizeof(cl_int); + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_offset(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index cc425a4d..a8ed325a 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -42,13 +42,13 @@ // CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR // CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR -struct InfoDeviceQuery : public BasicMutableCommandBufferTest +struct InfoDeviceQuery : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoDeviceQuery(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -71,12 +71,12 @@ struct InfoDeviceQuery : public BasicMutableCommandBufferTest } }; -struct InfoBuffer : public BasicMutableCommandBufferTest +struct InfoBuffer : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoBuffer(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -108,13 +108,13 @@ struct InfoBuffer : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct PropertiesArray : public BasicMutableCommandBufferTest +struct PropertiesArray : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; PropertiesArray(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -154,12 +154,12 @@ struct PropertiesArray : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Kernel : public BasicMutableCommandBufferTest +struct Kernel : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Kernel(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -195,12 +195,12 @@ struct Kernel : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Dimensions : public BasicMutableCommandBufferTest +struct Dimensions : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Dimensions(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -234,12 +234,12 @@ struct Dimensions : public BasicMutableCommandBufferTest const size_t dimensions = 3; }; -struct InfoType : public BasicMutableCommandBufferTest +struct InfoType : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoType(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -271,12 +271,12 @@ struct InfoType : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoQueue : public BasicMutableCommandBufferTest +struct InfoQueue : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoQueue(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -308,13 +308,13 @@ struct InfoQueue : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest +struct InfoGlobalWorkOffset : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkOffset(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -346,13 +346,13 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest size_t test_global_work_offset = 0; }; -struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest +struct InfoGlobalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -383,13 +383,13 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest size_t test_global_work_size = 0; }; -struct InfoLocalWorkSize : public BasicMutableCommandBufferTest +struct InfoLocalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoLocalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 4b6dacb6..588bdc81 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,4 +59,8 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -#endif // CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +extern int test_mutable_dispatch_global_offset(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/ -- cgit v1.2.3 From 75aca34e600a9ac0fbee524404a2ac7cf4d37801 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 28 Jun 2023 08:13:15 +0100 Subject: Test CL_COMMAND_BUFFER_CONTEXT_KHR (#1697) Test coverage for spec PR https://github.com/KhronosGroup/OpenCL-Docs/pull/899 which introduces a new cl_khr_command_buffer query for the cl_context --- .../command_buffer_get_command_buffer_info.cpp | 54 ++++++++++++++++++++++ .../extensions/cl_khr_command_buffer/main.cpp | 1 + .../extensions/cl_khr_command_buffer/procs.h | 2 + 3 files changed, 57 insertions(+) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index d46b2888..1ada904d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -26,6 +26,7 @@ enum class CombufInfoTestMode CITM_REF_COUNT, CITM_STATE, CITM_PROP_ARRAY, + CITM_CONTEXT, }; namespace { @@ -38,6 +39,7 @@ namespace { // -test case for CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query // -test case for CL_COMMAND_BUFFER_STATE_KHR query // -test case for CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query +// -test case for CL_COMMAND_BUFFER_CONTEXT_KHR query template struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest @@ -70,6 +72,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest error = RunPropArrayInfoTest(); test_error(error, "RunPropArrayInfoTest failed"); break; + case CombufInfoTestMode::CITM_CONTEXT: + error = RunContextInfoTest(); + test_error(error, "RunContextInfoTest failed"); + break; } return CL_SUCCESS; @@ -323,6 +329,46 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest return TEST_FAIL; } + cl_int RunContextInfoTest() + { + cl_int error = TEST_PASS; + + // record command buffers + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + size_t ret_value_size = 0; + error = clGetCommandBufferInfoKHR(command_buffer, + CL_COMMAND_BUFFER_CONTEXT_KHR, 0, + nullptr, &ret_value_size); + test_error(error, "clGetCommandBufferInfoKHR failed"); + + test_assert_error( + ret_value_size == sizeof(cl_context), + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context ret_context = nullptr; + error = clGetCommandBufferInfoKHR( + command_buffer, CL_COMMAND_BUFFER_CONTEXT_KHR, sizeof(cl_context), + &ret_context, nullptr); + test_error(error, "clGetCommandBufferInfoKHR failed"); + test_assert_error( + ret_context != nullptr, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context expected_context = nullptr; + error = + clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), + &expected_context, nullptr); + test_error(error, "clGetCommandQueueInfo failed"); + + test_assert_error( + ret_context == expected_context, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + return TEST_PASS; + } + const cl_int pattern = 0xE; }; @@ -360,3 +406,11 @@ int test_info_prop_array(cl_device_id device, cl_context context, CommandBufferGetCommandBufferInfo>( device, context, queue, num_elements); } + +int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest< + CommandBufferGetCommandBufferInfo>( + device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 4eefc8ab..35622827 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(info_ref_count), ADD_TEST(info_state), ADD_TEST(info_prop_array), + ADD_TEST(info_context), ADD_TEST(basic_profiling), ADD_TEST(simultaneous_profiling), ADD_TEST(regular_wait_for_command_buffer), diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 53a7d934..5c4e67fe 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -41,6 +41,8 @@ extern int test_info_state(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_info_prop_array(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_basic_set_kernel_arg(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_pending_set_kernel_arg(cl_device_id device, cl_context context, -- cgit v1.2.3 From 729cd8b7a94de09589d7703e59d266ab3eed8cdd Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 28 Jun 2023 09:34:07 +0100 Subject: [NFC] device_execution: use raw string literals for block kernels (#1767) Modernize by using raw string literals, which makes the kernel sources easier to read/extract. Signed-off-by: Sven van Haastregt --- .../device_execution/enqueue_block.cpp | 1061 ++++++++++---------- 1 file changed, 519 insertions(+), 542 deletions(-) diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index 29a6cec1..4ddd1db7 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -27,561 +27,538 @@ #ifdef CL_VERSION_2_0 extern int gWimpyMode; -static const char* enqueue_simple_block[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_simple_block(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; -static const char* enqueue_block_with_local_arg1[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg1(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; +// clang-format off +static const char* enqueue_simple_block[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } -static const char* enqueue_block_with_local_arg2[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp1[i] = mul * 7 - 21;" - NL, " tmp2[i].x = (float)(mul * 7 - 21);" - NL, " tmp2[i].y = (float)(mul * 7 - 21);" - NL, " tmp2[i].z = (float)(mul * 7 - 21);" - NL, " tmp2[i].w = (float)(mul * 7 - 21);" - NL, "" - NL, " res[tid] += tmp1[i];" - NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg2(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)" - NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + kernel void enqueue_simple_block(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_wait_list[] = -{ - NL, "#define BLOCK_SUBMITTED 1" - NL, "#define BLOCK_COMPLETED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_block_with_wait_list(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; -static const char* enqueue_block_with_wait_list_and_local_arg[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define BLOCK_STARTED 3" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " res[tid] = BLOCK_STARTED;" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, " - NL, " ^(__local void* buf) {" - NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);" - NL, " }, LOCAL_MEM_SIZE*sizeof(int));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_block_get_kernel_work_group_size[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; +static const char* enqueue_block_with_local_arg1[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + res[tid] += 2; + } -static const char* enqueue_block_capture_event_profiling_info_after_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)" - NL, "{" - NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, " release_event(evt);" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg1(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_capture_event_profiling_info_before_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const ulong *value, __global int* res)" - NL, "{" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t block_evt2;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt1);" - NL, " release_event(block_evt2);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); }; -static const char* enqueue_block_with_barrier[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = mul * 7 -21;" - NL, "}" - NL, "" - NL, "void loop_fn(size_t tid, int n, __global int* res)" - NL, "{" - NL, " while(n > 0)" - NL, " {" - NL, " barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = 0;" - NL, " --n;" - NL, " }" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_barrier(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " queue_t def_q = get_default_queue();" - NL, " res[tid] = -1;" - NL, " size_t n = 256;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " ndrange_t ndrange = ndrange_1D(n);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_marker_with_block_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_block_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt1);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; +static const char* enqueue_block_with_local_arg2[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_marker_with_user_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_user_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " uint multiplier = 7;" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t marker_evt;" - NL, " clk_event_t block_evt;" - NL, "" - NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, " - NL, " ^{" - NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp1[i] = mul * 7 - 21; + tmp2[i].x = (float)(mul * 7 - 21); + tmp2[i].y = (float)(mul * 7 - 21); + tmp2[i].z = (float)(mul * 7 - 21); + tmp2[i].w = (float)(mul * 7 - 21); + + res[tid] += tmp1[i]; + res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w); + } + res[tid] += 2; + } -static const char* enqueue_marker_with_mixed_events[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t mix_ev[2];" - NL, " mix_ev[0] = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1]," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[1]);" - NL, " release_event(marker_evt);" - NL, " release_event(mix_ev[0]);" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg2(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_mixed_events[] = -{ - NL, "kernel void enqueue_block_with_mixed_events(__global int* res)" - NL, "{" - NL, " int enq_res;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t mix_ev[3];" - NL, " mix_ev[0] = create_user_event();" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " res[tid] = -2;" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[0]);" - NL, " release_event(mix_ev[1]);" - NL, " release_event(mix_ev[2]);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2) + { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); }; + + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_with_wait_list[] = { R"( + #define BLOCK_SUBMITTED 1 + #define BLOCK_COMPLETED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_block_with_wait_list(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"( + #define LOCAL_MEM_SIZE 10 + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define BLOCK_STARTED 3 + #define CHECK_SUCCESS 0 + + void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp) + { + res[tid] = BLOCK_STARTED; + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED; + } + + kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^(__local void* buf) { + block_fn_local_arg(tid, multiplier, res, (__local int*)buf); + }, LOCAL_MEM_SIZE*sizeof(int)); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_get_kernel_work_group_size[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_work_group_size(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_work_group_size(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_after_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const clk_event_t evt, __global int* res) + { + capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + release_event(evt); + } + + kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res) + { + size_t tid = get_global_id(0); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_before_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const ulong *value, __global int* res) + { + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + } + + kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + clk_event_t block_evt2; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + set_user_event_status(user_evt, CL_COMPLETE); + + void (^checkBlock) (void) = ^{ check_res(tid, &value, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + release_event(user_evt); + release_event(block_evt1); + release_event(block_evt2); + } +)" }; + +static const char* enqueue_block_with_barrier[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = mul * 7 -21; + } + + void loop_fn(size_t tid, int n, __global int* res) + { + while (n > 0) + { + barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = 0; + --n; + } + } + + kernel void enqueue_block_with_barrier(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + queue_t def_q = get_default_queue(); + res[tid] = -1; + size_t n = 256; + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + ndrange_t ndrange = ndrange_1D(n); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_marker_with_block_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_block_event(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t block_evt1; + clk_event_t marker_evt; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt1); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_user_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_user_event(__global int* res) + { + size_t tid = get_global_id(0); + uint multiplier = 7; + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t marker_evt; + clk_event_t block_evt; + + int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, + ^{ + if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS; + }); + + //check block is not started + if (res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_mixed_events[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_mixed_events(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t mix_ev[2]; + mix_ev[0] = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + clk_event_t marker_evt; + + enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[1]); + release_event(marker_evt); + release_event(mix_ev[0]); + } +)" }; + +static const char* enqueue_block_with_mixed_events[] = { R"( + kernel void enqueue_block_with_mixed_events(__global int* res) + { + int enq_res; + size_t tid = get_global_id(0); + clk_event_t mix_ev[3]; + mix_ev[0] = create_user_event(); + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + res[tid] = -2; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[0]); + release_event(mix_ev[1]); + release_event(mix_ev[2]); + } +)" }; +// clang-format on static const kernel_src sources_enqueue_block[] = { -- cgit v1.2.3 From 845ec694bbc333a563de33e5cce8e541a7b8b910 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 29 Jun 2023 12:20:15 +0100 Subject: workgroups: fix -Wsign-compare warnings (#1774) In preparation of re-enabling -Wsign-compare globally, fix some instances of this warning. Signed-off-by: Sven van Haastregt --- test_conformance/workgroups/test_wg_all.cpp | 3 +-- test_conformance/workgroups/test_wg_any.cpp | 3 +-- test_conformance/workgroups/test_wg_broadcast.cpp | 11 ++++------- .../workgroups/test_wg_suggested_local_work_size.cpp | 4 ++-- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp index 41abd124..f9b574e4 100644 --- a/test_conformance/workgroups/test_wg_all.cpp +++ b/test_conformance/workgroups/test_wg_all.cpp @@ -75,7 +75,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp index e0242cfb..f7ff899a 100644 --- a/test_conformance/workgroups/test_wg_any.cpp +++ b/test_conformance/workgroups/test_wg_any.cpp @@ -75,7 +75,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp index e24ac7b9..a4cb0c6f 100644 --- a/test_conformance/workgroups/test_wg_broadcast.cpp +++ b/test_conformance/workgroups/test_wg_broadcast.cpp @@ -70,7 +70,7 @@ verify_wg_broadcast_1D(float *inptr, float *outptr, size_t n, size_t wg_size) for (i=0,group_id=0; i wg_size ? wg_size : (n-i); + size_t local_size = (n - i) > wg_size ? wg_size : (n - i); float broadcast_result = inptr[i + (group_id % local_size)]; for (j=0; j Date: Fri, 30 Jun 2023 11:22:43 +0100 Subject: workgroups: fix program/kernel object leak (#1775) `create_single_kernel_helper` is called in a loop, overwriting the objects contained in the wrapper classes. The wrapper class is not aware of this, as the overwriting happens through its `operator&`. Move the wrapper objects into the loop, so that the contained objects get released as soon as the program and kernel objects are no longer needed. Signed-off-by: Sven van Haastregt --- test_conformance/workgroups/test_wg_suggested_local_work_size.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp index 989f1dfd..a31fca63 100644 --- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp +++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp @@ -221,8 +221,6 @@ int do_test_work_group_suggested_local_size( bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr, cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim) { - clProgramWrapper scan_program; - clKernelWrapper scan_kernel; int err; size_t test_values[] = { 1, 1, 1 }; std::string kernel_names[6] = { @@ -244,6 +242,8 @@ int do_test_work_group_suggested_local_size( for (int kernel_num = 0; kernel_num < 6; kernel_num++) { if (max_local_mem_size < local_mem_size[kernel_num]) continue; + clProgramWrapper scan_program; + clKernelWrapper scan_kernel; // Create the kernel err = create_single_kernel_helper( context, &scan_program, &scan_kernel, 1, -- cgit v1.2.3 From 9e8430a6a69b4c2f2c714137a68e460ae8f14515 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 3 Jul 2023 10:07:32 +0100 Subject: [NFC] clang-format basic/test_enqueue_map.cpp (#1777) Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_enqueue_map.cpp | 308 +++++++++++++++------------- 1 file changed, 168 insertions(+), 140 deletions(-) diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp index d28f7e41..6b650c0d 100644 --- a/test_conformance/basic/test_enqueue_map.cpp +++ b/test_conformance/basic/test_enqueue_map.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,7 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" +// clang-format off const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, @@ -33,93 +34,104 @@ const cl_mem_flags flag_set[] = { CL_MEM_COPY_HOST_PTR, 0 }; -const char* flag_set_names[] = { + +const char *flag_set_names[] = { "CL_MEM_ALLOC_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", "CL_MEM_USE_HOST_PTR", "CL_MEM_COPY_HOST_PTR", "0" }; +// clang-format on -int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; - const size_t bufferSize = 256*256; - MTdataHolder d{gRandomSeed}; + const size_t bufferSize = 256 * 256; + MTdataHolder d{ gRandomSeed }; BufferOwningPtr hostPtrData{ malloc(bufferSize) }; BufferOwningPtr referenceData{ malloc(bufferSize) }; - BufferOwningPtr finalData{malloc(bufferSize)}; + BufferOwningPtr finalData{ malloc(bufferSize) }; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) + for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); generate_random_data(kChar, (unsigned int)bufferSize, d, hostPtrData); memcpy(referenceData, hostPtrData, bufferSize); void *hostPtr = nullptr; cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); if (hasHostPtr) hostPtr = hostPtrData; - memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); - test_error( error, "Unable to create testing buffer" ); + memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); if (!hasHostPtr) { error = - clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, - hostPtrData, 0, NULL, NULL); - test_error( error, "clEnqueueWriteBuffer failed"); + clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + hostPtrData, 0, NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); } - for( int i = 0; i < 128; i++ ) + for (int i = 0; i < 128; i++) { - size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d ); - size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d ); - - cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, length, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapBuffer call failed" ); - log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length ); - return -1; - } - - // Write into the region - for( size_t j = 0; j < length; j++ ) - { - cl_char spin = (cl_char)genrand_int32( d ); - - // Test read AND write in one swipe - cl_char value = mappedRegion[ j ]; - value = spin - value; - mappedRegion[ j ] = value; - - // Also update the initial data array - value = referenceData[offset + j]; - value = spin - value; - referenceData[offset + j] = value; - } - - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); + size_t offset = (size_t)random_in_range(0, (int)bufferSize - 1, d); + size_t length = + (size_t)random_in_range(1, (int)(bufferSize - offset), d); + + cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + length, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapBuffer call failed"); + log_error("\tOffset: %d Length: %d\n", (int)offset, + (int)length); + return -1; + } + + // Write into the region + for (size_t j = 0; j < length; j++) + { + cl_char spin = (cl_char)genrand_int32(d); + + // Test read AND write in one swipe + cl_char value = mappedRegion[j]; + value = spin - value; + mappedRegion[j] = value; + + // Also update the initial data array + value = referenceData[offset + j]; + value = spin - value; + referenceData[offset + j] = value; + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); } - // Final validation: read actual values of buffer and compare against our reference - error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, bufferSize, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + // Final validation: read actual values of buffer and compare against + // our reference + error = clEnqueueReadBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); - for( size_t q = 0; q < bufferSize; q++ ) + for (size_t q = 0; q < bufferSize; q++) { if (referenceData[q] != finalData[q]) { log_error( - "ERROR: Sample %d did not validate! Got %d, expected %d\n", - (int)q, (int)finalData[q], (int)referenceData[q]); + "ERROR: Sample %d did not validate! Got %d, expected %d\n", + (int)q, (int)finalData[q], (int)referenceData[q]); return -1; } } @@ -128,112 +140,128 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman return 0; } -int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_image(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 }; const size_t imageSize = 256; const size_t imageDataSize = imageSize * imageSize * 4 * sizeof(cl_uint); - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) BufferOwningPtr hostPtrData{ malloc(imageDataSize) }; BufferOwningPtr referenceData{ malloc(imageDataSize) }; - BufferOwningPtr finalData{malloc(imageDataSize)}; - - MTdataHolder d{gRandomSeed}; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { - clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - - generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d, - hostPtrData); - memcpy(referenceData, hostPtrData, imageDataSize); - - cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); - void *hostPtr = nullptr; - if (hasHostPtr) hostPtr = hostPtrData; - memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, - imageSize, imageSize, 0, hostPtr, &error ); - test_error( error, "Unable to create testing buffer" ); - - if (!hasHostPtr) { - size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1}; - error = - clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, - 0, 0, hostPtrData, 0, NULL, NULL); - test_error( error, "Unable to write to testing buffer" ); - } - - for( int i = 0; i < 128; i++ ) + BufferOwningPtr finalData{ malloc(imageDataSize) }; + + MTdataHolder d{ gRandomSeed }; + for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { + clMemWrapper memObject; + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); + + generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), + d, hostPtrData); + memcpy(referenceData, hostPtrData, imageDataSize); + + cl_mem_flags flags = flag_set[src_flag_id]; + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + void *hostPtr = nullptr; + if (hasHostPtr) hostPtr = hostPtrData; + memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, + imageSize, imageSize, 0, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); - size_t offset[3], region[3]; - size_t rowPitch; - - offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d ); - offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d ); - offset[ 2 ] = 0; - region[ 2 ] = 1; - cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, region, &rowPitch, NULL, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapImage call failed" ); - log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] ); - return -1; - } - - // Write into the region - cl_uint *mappedPtr = mappedRegion; - for( size_t y = 0; y < region[ 1 ]; y++ ) - { - for( size_t x = 0; x < region[ 0 ] * 4; x++ ) + if (!hasHostPtr) { - cl_int spin = (cl_int)random_in_range( 16, 1024, d ); - - cl_int value; - // Test read AND write in one swipe - value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ]; - value = spin - value; - mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value; - - // Also update the initial data array - value = - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x]; - value = spin - value; - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x] = - value; + size_t write_origin[3] = { 0, 0, 0 }, + write_region[3] = { imageSize, imageSize, 1 }; + error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, + write_region, 0, 0, hostPtrData, 0, + NULL, NULL); + test_error(error, "Unable to write to testing buffer"); } - } - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); - } + for (int i = 0; i < 128; i++) + { - // Final validation: read actual values of buffer and compare against our reference - size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 }; - error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + size_t offset[3], region[3]; + size_t rowPitch; + + offset[0] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[0] = + (size_t)random_in_range(1, (int)(imageSize - offset[0] - 1), d); + offset[1] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[1] = + (size_t)random_in_range(1, (int)(imageSize - offset[1] - 1), d); + offset[2] = 0; + region[2] = 1; + cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + region, &rowPitch, NULL, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapImage call failed"); + log_error("\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], + (int)offset[1], (int)region[0], (int)region[1]); + return -1; + } - for( size_t q = 0; q < imageSize * imageSize * 4; q++ ) - { - if (referenceData[q] != finalData[q]) + // Write into the region + cl_uint *mappedPtr = mappedRegion; + for (size_t y = 0; y < region[1]; y++) + { + for (size_t x = 0; x < region[0] * 4; x++) + { + cl_int spin = (cl_int)random_in_range(16, 1024, d); + + cl_int value; + // Test read AND write in one swipe + value = mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x]; + value = spin - value; + mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x] = value; + + // Also update the initial data array + value = + referenceData[((offset[1] + y) * imageSize + offset[0]) + * 4 + + x]; + value = spin - value; + referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + + x] = value; + } + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); + } + + // Final validation: read actual values of buffer and compare against + // our reference + size_t finalOrigin[3] = { 0, 0, 0 }, + finalRegion[3] = { imageSize, imageSize, 1 }; + error = clEnqueueReadImage(queue, memObject, CL_TRUE, finalOrigin, + finalRegion, 0, 0, finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); + + for (size_t q = 0; q < imageSize * imageSize * 4; q++) { - log_error("ERROR: Sample %d (coord %d,%d) did not validate! Got " - "%d, expected %d\n", - (int)q, (int)((q / 4) % imageSize), - (int)((q / 4) / imageSize), (int)finalData[q], - (int)referenceData[q]); - return -1; + if (referenceData[q] != finalData[q]) + { + log_error( + "ERROR: Sample %d (coord %d,%d) did not validate! Got " + "%d, expected %d\n", + (int)q, (int)((q / 4) % imageSize), + (int)((q / 4) / imageSize), (int)finalData[q], + (int)referenceData[q]); + return -1; + } } - } - } // cl_mem_flags + } // cl_mem_flags return 0; } - -- cgit v1.2.3 From 1e2d3522cf7df7020aa9d12ad3d11f7b03187355 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 11 Jul 2023 17:48:41 +0200 Subject: Added cl_khr_fp16 extension support for test_op_vector_insert from spirv_new (#1766) * Added cl_khr_fp16 extension support for test_op_vector_insert from spirv_new, work in progress. * Added cl_khr_fp16 extension support for test_op_vector_insert from spirv_new (issue #142) --- .../spirv_asm/vector_half8_insert.spvasm32 | 43 ++++++++++++++++++ .../spirv_asm/vector_half8_insert.spvasm64 | 48 ++++++++++++++++++++ .../spirv_new/test_op_vector_insert.cpp | 52 ++++++++++++++-------- test_conformance/spirv_new/types.hpp | 1 + 4 files changed, 125 insertions(+), 19 deletions(-) create mode 100644 test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 create mode 100644 test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 new file mode 100644 index 00000000..27812938 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 @@ -0,0 +1,43 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 23 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %13 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %14 = OpFunctionParameter %uint + %15 = OpLabel + %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %17 = OpCompositeExtract %uint %16 0 + %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17 + %19 = OpLoad %half %18 + %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %17 + %21 = OpLoad %v8half %20 + %22 = OpVectorInsertDynamic %v8half %21 %19 %14 + OpStore %20 %22 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 new file mode 100644 index 00000000..f140fc25 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 27 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %15 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %16 = OpFunctionParameter %uint + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %21 + %23 = OpLoad %half %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %21 + %25 = OpLoad %v8half %24 + %26 = OpVectorInsertDynamic %v8half %25 %23 %16 + OpStore %24 %26 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_vector_insert.cpp b/test_conformance/spirv_new/test_op_vector_insert.cpp index 0749c14a..62fc78cb 100644 --- a/test_conformance/spirv_new/test_op_vector_insert.cpp +++ b/test_conformance/spirv_new/test_op_vector_insert.cpp @@ -25,6 +25,17 @@ int test_insert(cl_device_id deviceID, cl_context context, return 0; } } + + if (std::string(name).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; clProgramWrapper prog; err = get_program_with_il(prog, deviceID, context, name); @@ -94,27 +105,30 @@ int test_insert(cl_device_id deviceID, cl_context context, return 0; } -#define TEST_VECTOR_INSERT(TYPE, N) \ - TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \ - { \ - typedef cl_##TYPE##N Tv; \ - typedef cl_##TYPE Ts; \ - const int num = 1 << 20; \ - std::vector in(num); \ - const char *name = "vector_" #TYPE #N "_insert"; \ - \ - RandomSeed seed(gRandomSeed); \ - \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - \ - return test_insert(deviceID, \ - context, queue, \ - name, \ - in, N); \ +#define TEST_VECTOR_INSERT(TYPE, N) \ + TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \ + { \ + if (sizeof(cl_##TYPE) == 2) \ + { \ + PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \ + } \ + typedef cl_##TYPE##N Tv; \ + typedef cl_##TYPE Ts; \ + const int num = 1 << 20; \ + std::vector in(num); \ + const char *name = "vector_" #TYPE #N "_insert"; \ + \ + RandomSeed seed(gRandomSeed); \ + \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + \ + return test_insert(deviceID, context, queue, name, in, N); \ } +TEST_VECTOR_INSERT(half, 8) TEST_VECTOR_INSERT(int, 4) TEST_VECTOR_INSERT(float, 4) TEST_VECTOR_INSERT(long, 2) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index 939e6fa8..728b2445 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -113,6 +113,7 @@ GENRAND_REAL_FUNC(cl_double, 2) GENRAND_REAL_FUNC(cl_double, 4) GENRAND_REAL_FUNC(cl_half, 2) GENRAND_REAL_FUNC(cl_half, 4) +GENRAND_REAL_FUNC(cl_half, 8) template<> inline cl_half genrandReal(RandomSeed &seed) { -- cgit v1.2.3 From 40b3eb3f7fb68ab55337c820961ec13c8c081258 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 11 Jul 2023 17:49:44 +0200 Subject: Added cl_khr_fp16 extension support for test_op_vector_extract from spirv_new (#1765) * Added cl_khr_fp16 extension support for test_op_vector_extract from spirv_new (issue #142) * Added missing condition to test available cl_khr_fp16 extension --- .../spirv_asm/vector_half8_extract.spvasm32 | 42 +++++++++++++++++ .../spirv_asm/vector_half8_extract.spvasm64 | 47 +++++++++++++++++++ .../spirv_new/test_op_vector_extract.cpp | 52 ++++++++++++++-------- 3 files changed, 122 insertions(+), 19 deletions(-) create mode 100644 test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 create mode 100644 test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 new file mode 100644 index 00000000..985b5262 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 22 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %13 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half + %14 = OpFunctionParameter %uint + %15 = OpLabel + %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %17 = OpCompositeExtract %uint %16 0 + %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %17 + %19 = OpLoad %v8half %18 + %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %17 + %21 = OpVectorExtractDynamic %half %19 %14 + OpStore %20 %21 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 new file mode 100644 index 00000000..dd14f66c --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 @@ -0,0 +1,47 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 26 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %15 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpFunctionParameter %uint + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %21 + %23 = OpLoad %v8half %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %21 + %25 = OpVectorExtractDynamic %half %23 %16 + OpStore %24 %25 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_vector_extract.cpp b/test_conformance/spirv_new/test_op_vector_extract.cpp index fe1f8253..f77aa7a2 100644 --- a/test_conformance/spirv_new/test_op_vector_extract.cpp +++ b/test_conformance/spirv_new/test_op_vector_extract.cpp @@ -25,6 +25,17 @@ int test_extract(cl_device_id deviceID, cl_context context, return 0; } } + + if (std::string(name).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; clProgramWrapper prog; @@ -76,27 +87,30 @@ int test_extract(cl_device_id deviceID, cl_context context, return 0; } -#define TEST_VECTOR_EXTRACT(TYPE, N) \ - TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \ - { \ - typedef cl_##TYPE##N Tv; \ - typedef cl_##TYPE Ts; \ - const int num = 1 << 20; \ - std::vector in(num); \ - const char *name = "vector_" #TYPE #N "_extract"; \ - \ - RandomSeed seed(gRandomSeed); \ - \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - \ - return test_extract(deviceID, \ - context, queue, \ - name, \ - in, N); \ +#define TEST_VECTOR_EXTRACT(TYPE, N) \ + TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \ + { \ + if (sizeof(cl_##TYPE) == 2) \ + { \ + PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \ + } \ + typedef cl_##TYPE##N Tv; \ + typedef cl_##TYPE Ts; \ + const int num = 1 << 20; \ + std::vector in(num); \ + const char *name = "vector_" #TYPE #N "_extract"; \ + \ + RandomSeed seed(gRandomSeed); \ + \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + \ + return test_extract(deviceID, context, queue, name, in, N); \ } +TEST_VECTOR_EXTRACT(half, 8) TEST_VECTOR_EXTRACT(int, 4) TEST_VECTOR_EXTRACT(float, 4) TEST_VECTOR_EXTRACT(long, 2) -- cgit v1.2.3 From 3a1388a2b435e18eb4e8d0002125d19f71660857 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 11 Jul 2023 17:51:05 +0200 Subject: Added cl_khr_fp16 extension support for test_vloadstore from basic (#1734) * Added cl_khr_fp16 support for test_vloadstore from basic (issue #142, basic) * Moved string helper procedures due to request from test_commonfns PR #1695 * restored original test sizes * Corrected invalid initialization of reference buffer --- test_conformance/basic/test_vloadstore.cpp | 800 +++++++++++++---------------- 1 file changed, 358 insertions(+), 442 deletions(-) diff --git a/test_conformance/basic/test_vloadstore.cpp b/test_conformance/basic/test_vloadstore.cpp index e137f9e7..d34ecbf9 100644 --- a/test_conformance/basic/test_vloadstore.cpp +++ b/test_conformance/basic/test_vloadstore.cpp @@ -13,52 +13,129 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - +#include #include #include #include #include #include #include +#include +#include #include "procs.h" #include "harness/conversions.h" -#include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" // Outputs debug information for stores #define DEBUG 0 // Forces stores/loads to be done with offsets = tid #define LINEAR_OFFSETS 0 #define NUM_LOADS 512 - -static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +char pragma_str[128] = { 0 }; +char mem_type[64] = { 0 }; +char store_str[128] = { 0 }; +char load_str[128] = { 0 }; + +extern cl_half_rounding_mode halfRoundingMode; + +// clang-format off +static const char *store_pattern= "results[ tid ] = tmp;\n"; +static const char *store_patternV3 = "results[3*tid] = tmp.s0; results[3*tid+1] = tmp.s1; results[3*tid+2] = tmp.s2;\n"; +static const char *load_pattern = "sSharedStorage[ i ] = src[ i ];\n"; +static const char *load_patternV3 = "sSharedStorage[3*i] = src[ 3*i]; sSharedStorage[3*i+1] = src[3*i+1]; sSharedStorage[3*i+2] = src[3*i+2];\n"; +static const char *kernel_pattern[] = { +pragma_str, +"#define STYPE %s\n" +"__kernel void test_fn( ", mem_type, " STYPE *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" int tid = get_global_id( 0 );\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (", mem_type, " STYPE *) src ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n" +}; + +const char *pattern_local [] = { +pragma_str, +"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" int tid = get_global_id( 0 );\n" +" int lid = get_local_id( 0 );\n" +"\n" +" if( lid == 0 )\n" +" {\n" +" for( int i = 0; i < %d; i++ ) {\n" +" ", load_str, +" }\n" +" }\n" +// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all +// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be +// updated on all threads at that point +" barrier( CLK_LOCAL_MEM_FENCE );\n" +"\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n" }; + +const char *pattern_priv [] = { +pragma_str, +// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means +// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test +"#define PRIV_TYPE %s\n" +"#define PRIV_SIZE %d\n" +"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" +" int tid = get_global_id( 0 );\n" +"\n" +" for( int i = 0; i < PRIV_SIZE; i++ )\n" +" sPrivateStorage[ i ] = src[ i ];\n" +// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for +// anybody else to sync up +"\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n"}; +// clang-format on #pragma mark -------------------- vload harness -------------------------- -typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ); +typedef void (*create_program_fn)(std::string &, size_t, ExplicitType, size_t, + size_t); +typedef int (*test_fn)(cl_device_id, cl_context, cl_command_queue, ExplicitType, + unsigned int, create_program_fn, size_t); -int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vload_program_fn createFn, size_t bufferSize, MTdata d ) +int test_vload(cl_device_id device, cl_context context, cl_command_queue queue, + ExplicitType type, unsigned int vecSize, + create_program_fn createFn, size_t bufferSize) { - int error; - clProgramWrapper program; clKernelWrapper kernel; clMemWrapper streams[ 4 ]; + MTdataHolder d(gRandomSeed); const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS; if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; size_t threads[ 1 ], localThreads[ 1 ]; clProtectedArray inBuffer( bufferSize ); - char programSrc[ 10240 ]; cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ]; size_t numElements, typeSize, i; unsigned int outVectorSize; + pragma_str[0] = '\0'; + if (type == kDouble) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); + else if (type == kHalf) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); typeSize = get_explicit_type_size( type ); numElements = bufferSize / ( typeSize * vecSize ); @@ -83,25 +160,19 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, outVectorSize = vecSize; // Declare output buffers now -#if !(defined(_WIN32) && defined(_MSC_VER)) - char outBuffer[ numLoads * typeSize * outVectorSize ]; - char referenceBuffer[ numLoads * typeSize * vecSize ]; -#else - char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char)); - char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char)); -#endif + std::vector outBuffer(numLoads * typeSize * outVectorSize); + std::vector referenceBuffer(numLoads * typeSize * vecSize); // Create the program - - + std::string programSrc; createFn( programSrc, numElements, type, vecSize, outVectorSize); // Create our kernel - const char *ptr = programSrc; - - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); + const char *ptr = programSrc.c_str(); + cl_int error = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "test_fn"); test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); + if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str()); // Get the number of args to differentiate the kernels with local storage. (They have 5) cl_uint numArgs; @@ -115,7 +186,9 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, test_error( error, "Unable to create kernel stream" ); streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error ); test_error( error, "Unable to create kernel stream" ); - streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error ); + streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numLoads * typeSize * outVectorSize, + (void *)outBuffer.data(), &error); test_error( error, "Unable to create kernel stream" ); // Set parameters and run @@ -145,28 +218,32 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, test_error( error, "Unable to exec kernel" ); // Get the results - error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, + numLoads * typeSize * outVectorSize + * sizeof(cl_char), + (void *)outBuffer.data(), 0, NULL, NULL); test_error( error, "Unable to read results" ); - // Create the reference results - memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char)); + referenceBuffer.assign(numLoads * typeSize * vecSize, 0); for( i = 0; i < numLoads; i++ ) { - memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize, - typeSize * vecSize ); + memcpy(&referenceBuffer[i * typeSize * vecSize], + ((char *)(void *)inBuffer) + + ((offsets[i] * vecSize) + alignmentOffsets[i]) * typeSize, + typeSize * vecSize); } // Validate the results now - char *expected = referenceBuffer; - char *actual = outBuffer; + char *expected = referenceBuffer.data(); + char *actual = outBuffer.data(); char *in = (char *)(void *)inBuffer; if (DEBUG) { log_info("Memory contents:\n"); + char inString[1024]; + char expectedString[1024], actualString[1024]; for (i=0; i +int test_vset(cl_device_id device, cl_context context, cl_command_queue queue, + create_program_fn createFn, size_t bufferSize) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 }; const char *size_names[] = { "2", "3", "4", "8", "16"}; - unsigned int typeIdx, sizeIdx; int error = 0; - MTdata mtData = init_genrand( gRandomSeed ); log_info("Testing with buffer size of %d.\n", (int)bufferSize); - for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ ) - { + bool hasDouble = is_extension_available(device, "cl_khr_fp64"); + bool hasHalf = is_extension_available(device, "cl_khr_fp16"); - if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) + for (unsigned typeIdx = 0; typeIdx < vecType.size(); typeIdx++) + { + if (vecType[typeIdx] == kDouble && !hasDouble) continue; - - if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong ) + else if (vecType[typeIdx] == kHalf && !hasHalf) + continue; + else if ((vecType[typeIdx] == kLong || vecType[typeIdx] == kULong) + && !gHasLong) continue; - for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) + for (unsigned sizeIdx = 0; vecSizes[sizeIdx] != 0; sizeIdx++) { log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]); - int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData ); + int error_this_type = + test_func_ptr(device, context, queue, vecType[typeIdx], + vecSizes[sizeIdx], createFn, bufferSize); if (error_this_type) { error += error_this_type; log_error("Failure; skipping further sizes for this type."); @@ -233,125 +317,59 @@ int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queu } } } - - free_mtdata(mtData); - return error; } #pragma mark -------------------- vload test cases -------------------------- -void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_global_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(mem_type, sizeof(mem_type), "__global"); + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(), + typeName, (int)inVectorSize, (int)inVectorSize); } int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { - return test_vloadset( device, context, queue, create_global_load_code, 10240 ); + return test_vset(device, context, queue, + create_global_load_code, 10240); } - -void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_local_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - //" __local %s%d sSharedStorage[ %d ];\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " int lid = get_local_id( 0 );\n" - "\n" - " if( lid == 0 )\n" - " {\n" - " for( int i = 0; i < %d; i++ )\n" - " sSharedStorage[ i ] = src[ i ];\n" - " }\n" - // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all - // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be - // updated on all threads at that point - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - //" __local %s%d sSharedStorage[ %d ];\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " int lid = get_local_id( 0 );\n" - "\n" - " if( lid == 0 )\n" - " {\n" - " for( int i = 0; i < %d; i++ ) {\n" - " sSharedStorage[ 3*i ] = src[ 3*i ];\n" - " sSharedStorage[ 3*i +1] = src[ 3*i +1];\n" - " sSharedStorage[ 3*i +2] = src[ 3*i +2];\n" - " }\n" - " }\n" - // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all - // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be - // updated on all threads at that point - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid +1] = tmp.s1;\n" - " results[ 3*tid +2] = tmp.s2;\n" - "}\n"; - + std::snprintf(store_str, sizeof(store_str), store_patternV3); + std::snprintf(load_str, sizeof(load_str), load_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, /*(int)inBufferSize,*/ - typeName, typeName, - (int)inBufferSize, - typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, /*(int)inBufferSize,*/ - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + std::string inTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); + std::snprintf(load_str, sizeof(load_str), load_pattern); } + + std::string kernel_src = concat_kernel( + pattern_local, sizeof(pattern_local) / sizeof(pattern_local[0])); + destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), inTypeName.c_str(), + outTypeName.c_str(), (int)inBufferSize, typeName, + (int)inVectorSize, (int)inVectorSize, typeName); } int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -360,53 +378,34 @@ int test_vload_local(cl_device_id device, cl_context context, cl_command_queue q cl_ulong localSize; int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; + if (localSize > 10240) localSize = 10240; if (localSize > 4096) localSize -= 2048; else localSize /= 2; - return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize ); + return test_vset(device, context, queue, create_local_load_code, + (size_t)localSize); } - -void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_constant_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(mem_type, sizeof(mem_type), "__constant"); + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, - typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(), + typeName, (int)inVectorSize, (int)inVectorSize); } int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -415,109 +414,71 @@ int test_vload_constant(cl_device_id device, cl_context context, cl_command_queu cl_ulong maxSize; int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL ); test_error( error, "Unable to get max size of constant memory buffer" ); - if( maxSize > 10240 ) - maxSize = 10240; + if (maxSize > 10240) maxSize = 10240; if (maxSize > 4096) maxSize -= 2048; else maxSize /= 2; - return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize ); + return test_vset(device, context, queue, + create_constant_load_code, (size_t)maxSize); } - -void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_private_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s%d\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < %d; i++ )\n" - " sPrivateStorage[ i ] = src[ i ];\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < PRIV_SIZE; i++ )\n" - " {\n" - " sPrivateStorage[ i ] = src[ i ];\n" - " }\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize ==3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, 3*((int)inBufferSize), - typeName, typeName, - typeName ); - // log_info("Src is \"\n%s\n\"\n", destBuffer); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + std::string inTypeName = typeName; + int bufSize = (int)inBufferSize * 3; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize); + bufSize = (int)inBufferSize; + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + pattern_priv, sizeof(pattern_priv) / sizeof(pattern_priv[0])); + destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), bufSize, + inTypeName.c_str(), outTypeName.c_str(), typeName, + (int)inVectorSize, (int)inVectorSize, typeName); } int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { // We have no idea how much actual private storage is available, so just pick a reasonable value, // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vloadset( device, context, queue, create_private_load_code, 256 ); + return test_vset(device, context, queue, + create_private_load_code, 256); } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #pragma mark -------------------- vstore harness -------------------------- -typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ); - -int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vstore_program_fn createFn, size_t bufferSize, MTdata d ) +int test_vstore(cl_device_id device, cl_context context, cl_command_queue queue, + ExplicitType type, unsigned int vecSize, + create_program_fn createFn, size_t bufferSize) { - int error; - clProgramWrapper program; clKernelWrapper kernel; clMemWrapper streams[ 3 ]; + MTdataHolder d(gRandomSeed); size_t threads[ 1 ], localThreads[ 1 ]; - size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS; + pragma_str[0] = '\0'; + if (type == kDouble) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); + else if (type == kHalf) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; @@ -534,39 +495,22 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue } if (DEBUG) log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize); -#if !(defined(_WIN32) && defined(_MSC_VER)) - cl_uint offsets[ numStores ]; -#else - cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint)); -#endif - char programSrc[ 10240 ]; - size_t i; - -#if !(defined(_WIN32) && defined(_MSC_VER)) - char inBuffer[ numStores * typeSize * vecSize ]; -#else - char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char)); -#endif + + std::vector offsets(numStores); + std::vector inBuffer(numStores * typeSize * vecSize); + clProtectedArray outBuffer( numElements * typeSize * vecSize ); -#if !(defined(_WIN32) && defined(_MSC_VER)) - char referenceBuffer[ numElements * typeSize * vecSize ]; -#else - char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char)); -#endif + std::vector referenceBuffer(numElements * typeSize * vecSize); // Create some random input data and random offsets to load from - generate_random_data( type, numStores * vecSize, d, (void *)inBuffer ); + generate_random_data(type, numStores * vecSize, d, (void *)inBuffer.data()); // Note: make sure no two offsets are the same, otherwise the output would depend on // the order that threads ran in, and that would be next to impossible to verify -#if !(defined(_WIN32) && defined(_MSC_VER)) - char flags[ numElements ]; -#else - char* flags = (char*)_malloca( numElements * sizeof(char)); -#endif - - memset( flags, 0, numElements * sizeof(char) ); - for( i = 0; i < numStores; i++ ) + std::vector flags(numElements); + flags.assign(flags.size(), 0); + + for (size_t i = 0; i < numStores; i++) { do { @@ -579,13 +523,15 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n"); - createFn( programSrc, numElements, type, vecSize ); + std::string programSrc; + createFn(programSrc, numElements, type, vecSize, vecSize); // Create our kernel - const char *ptr = programSrc; - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); + const char *ptr = programSrc.c_str(); + cl_int error = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "test_fn"); test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); + if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str()); // Get the number of args to differentiate the kernels with local storage. (They have 5) cl_uint numArgs; @@ -593,9 +539,14 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue test_error( error, "clGetKernelInfo failed"); // Set up parameters - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error ); + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numStores * typeSize * vecSize * sizeof(cl_char), + (void *)inBuffer.data(), &error); test_error( error, "Unable to create kernel stream" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error ); + streams[1] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numStores * sizeof(cl_uint), offsets.data(), &error); test_error( error, "Unable to create kernel stream" ); streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error ); test_error( error, "Unable to create kernel stream" ); @@ -606,7 +557,7 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue // We need to set the size of the local storage error = clSetKernelArg(kernel, 0, bufferSize, NULL); test_error( error, "clSetKernelArg for buffer failed"); - for( i = 0; i < 3; i++ ) + for (size_t i = 0; i < 3; i++) { error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] ); test_error( error, "Unable to set kernel argument" ); @@ -615,11 +566,10 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue else { // No local storage - for( i = 0; i < 3; i++ ) + for (size_t i = 0; i < 3; i++) { error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] ); - if (error) - log_info("%s\n", programSrc); + if (error) log_info("%s\n", programSrc.c_str()); test_error( error, "Unable to set kernel argument" ); } } @@ -654,25 +604,26 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL ); test_error( error, "Unable to read results" ); - // Create the reference results - memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) ); - for( i = 0; i < numStores; i++ ) + referenceBuffer.assign(referenceBuffer.size(), 0); + for (size_t i = 0; i < numStores; i++) { - memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize ); + memcpy(&referenceBuffer[((offsets[i] * vecSize) + addressOffset) + * typeSize], + &inBuffer[i * typeSize * vecSize], typeSize * vecSize); } // Validate the results now - char *expected = referenceBuffer; + char *expected = referenceBuffer.data(); char *actual = (char *)(void *)outBuffer; if (DEBUG) { log_info("Memory contents:\n"); - for (i=0; i(device, context, queue, + create_global_store_code, 10240); } - -void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_local_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" - "\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" + // clang-format off + const char *pattern[] = { + pragma_str, + "#define LOC_TYPE %s\n" + "#define LOC_VTYPE %s%d\n" + "__kernel void test_fn(__local LOC_VTYPE *sSharedStorage, __global LOC_VTYPE *srcValues, __global uint *offsets, __global LOC_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n" + " sSharedStorage[ offsets[tid] ] = (LOC_VTYPE)(LOC_TYPE)0;\n" " sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n" " barrier( CLK_LOCAL_MEM_FENCE );\n" "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n" + " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local LOC_TYPE *)sSharedStorage ) + alignmentOffset );\n" "\n" // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy @@ -830,20 +748,20 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " int i;\n" - " __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" + " __local LOC_TYPE *sp = (__local LOC_TYPE*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" + " __global LOC_TYPE *dp = (__global LOC_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n" " for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n" }; - const char *patternV3 = - "%s" - "\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n" + const char *patternV3 [] = { + pragma_str, + "#define LOC_TYPE %s\n" + "__kernel void test_fn(__local LOC_TYPE *sSharedStorage, __global LOC_TYPE *srcValues, __global uint *offsets, __global LOC_TYPE *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n" + " sSharedStorage[ 3*offsets[tid] ] = (LOC_TYPE)0;\n" " sSharedStorage[ 3*offsets[tid] +1 ] = \n" " sSharedStorage[ 3*offsets[tid] ];\n" " sSharedStorage[ 3*offsets[tid] +2 ] = \n" @@ -865,30 +783,26 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " int i;\n" - " __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" + " __local LOC_TYPE *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" + " __global LOC_TYPE *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" " for( i = 0; i < 3; i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n" }; + // clang-format on const char *typeName = get_explicit_type_name(type); if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, - typeName, - typeName, typeName, - typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, - typeName, typeName, typeName ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = str_sprintf(kernel_src, typeName); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, typeName, + (int)inVectorSize, (int)inVectorSize); } - // log_info(destBuffer); } int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -897,81 +811,82 @@ int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue cl_ulong localSize; int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; + if (localSize > 10240) localSize = 10240; if (localSize > 4096) localSize -= 2048; else localSize /= 2; - return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize ); + return test_vset(device, context, queue, + create_local_store_code, (size_t)localSize); } - -void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_private_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" + // clang-format off + const char *pattern [] = { + pragma_str, + "#define PRIV_TYPE %s\n" + "#define PRIV_VTYPE %s%d\n" // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test "\n" - "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" + "__kernel void test_fn( __global PRIV_VTYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" - " __private %s%d sPrivateStorage[ %d ];\n" - " int tid = get_global_id( 0 );\n" + " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" + " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s%d)(%s)0;\n" + " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n" "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" + " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n" "\n" // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " uint i;\n" - " __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" + " __private PRIV_TYPE *sp = (__private PRIV_TYPE*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" + " __global PRIV_TYPE *dp = (__global PRIV_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n" " for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; - + "}\n"}; - const char *patternV3 = - "%s" + const char *patternV3 [] = { + pragma_str, + "#define PRIV_TYPE %s\n" + "#define PRIV_VTYPE %s3\n" // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test "\n" - "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n" + "__kernel void test_fn( __global PRIV_TYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" - " __private %s3 sPrivateStorage[ %d ];\n" // keep this %d - " int tid = get_global_id( 0 );\n" + " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" // keep this %d + " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s3)(%s)0;\n" + " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n" "\n" - - " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads + " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n" " uint i;\n" - " __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" - " __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" + " __private PRIV_TYPE *sp = ((__private PRIV_TYPE*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" + " __global PRIV_TYPE *dp = ((__global PRIV_TYPE*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" " for( i = 0; i < 3; i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n"}; + // clang-format on const char *typeName = get_explicit_type_name(type); if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, typeName, - typeName, (int)inBufferSize, - typeName, typeName, - typeName, typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, typeName, typeName, typeName ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = + str_sprintf(kernel_src, typeName, typeName, (int)inBufferSize); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = + str_sprintf(kernel_src, typeName, typeName, (int)inVectorSize, + (int)inBufferSize, (int)inVectorSize); } } @@ -979,7 +894,8 @@ int test_vstore_private(cl_device_id device, cl_context context, cl_command_queu { // We have no idea how much actual private storage is available, so just pick a reasonable value, // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vstoreset( device, context, queue, create_private_store_code, 256 ); + return test_vset(device, context, queue, + create_private_store_code, 256); } -- cgit v1.2.3 From 2686b9e2c194b8c68ace95953a77d2f41d513d6f Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 11 Jul 2023 17:52:25 +0200 Subject: Modernization of conversions test (#1719) * Modernization of conversions test, preparation to handle cl_khr_fp16 extension * Added missing virtual descructor * Added corrections due to code review * More separators removed * Fixed clang format * Added multiple corrections related to code review * Corrected missing implicit test lost after modernization corrections * Corrected single, selected test to limit number of unnecessary operations --- .../conversions/basic_test_conversions.cpp | 3306 ++++++++------------ .../conversions/basic_test_conversions.h | 348 ++- .../conversions/conversions_data_info.h | 781 +++++ test_conformance/conversions/fplib.h | 5 + test_conformance/conversions/test_conversions.cpp | 1332 +------- 5 files changed, 2451 insertions(+), 3321 deletions(-) create mode 100644 test_conformance/conversions/conversions_data_info.h diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index dfb32279..43fb449b 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -15,2243 +15,1495 @@ // #include "harness/testHarness.h" #include "harness/compat.h" +#include "harness/ThreadPool.h" -#include "basic_test_conversions.h" -#include -#include +#if defined(__APPLE__) +#include +#include +#endif -#include "harness/mt19937.h" +#if defined(__linux__) +#include +#include +#include +#endif +#if defined(__linux__) +#include +#include +#endif -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) -#include "fplib.h" +#if defined(__MINGW32__) +#include #endif -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) -/* Rounding modes and saturation for use with qcom 64 bit to float conversion library */ - bool qcom_sat; - roundingMode qcom_rm; +#include +#include +#if !defined(_WIN32) +#include +#include #endif +#include -static inline cl_ulong random64( MTdata d ); +#include -#if defined (_WIN32) - #include - #include +#include +#include + +#include "basic_test_conversions.h" + +#if defined(_WIN32) +#include +#include #else // !_WIN32 -#if defined (__SSE__ ) - #include +#if defined(__SSE__) +#include #endif -#if defined (__SSE2__ ) - #include +#if defined(__SSE2__) +#include #endif #endif // _WIN32 -const char *gTypeNames[ kTypeCount ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - -const char *gRoundingModeNames[ kRoundingModeCount ] = { - "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gSaturationNames[ 2 ] = { "", "_sat" }; - -size_t gTypeSizes[ kTypeCount ] = { - sizeof( cl_uchar ), sizeof( cl_char ), - sizeof( cl_ushort ), sizeof( cl_short ), - sizeof( cl_uint ), sizeof( cl_int ), - sizeof( cl_float ), sizeof( cl_double ), - sizeof( cl_ulong ), sizeof( cl_long ), - }; - -long lrintf_clamped( float f ); -long lrintf_clamped( float f ) -{ - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; +cl_context gContext = NULL; +cl_command_queue gQueue = NULL; +int gStartTestNumber = -1; +int gEndTestNumber = 0; +#if defined(__APPLE__) +int gTimeResults = 1; +#else +int gTimeResults = 0; +#endif +int gReportAverageTimes = 0; +void *gIn = NULL; +void *gRef = NULL; +void *gAllowZ = NULL; +void *gOut[kCallStyleCount] = { NULL }; +cl_mem gInBuffer; +cl_mem gOutBuffers[kCallStyleCount]; +size_t gComputeDevices = 0; +uint32_t gDeviceFrequency = 0; +int gWimpyMode = 0; +int gWimpyReductionFactor = 128; +int gSkipTesting = 0; +int gForceFTZ = 0; +int gIsRTZ = 0; +uint32_t gSimdSize = 1; +int gHasDouble = 0; +int gTestDouble = 1; +const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; +int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; +int gMinVectorSize = 0; +int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]); +MTdata gMTdata; +const char **argList = NULL; +int argCount = 0; + + +double SubtractTime(uint64_t endTime, uint64_t startTime); + + +// clang-format off +// for readability sake keep this section unformatted + +std::vector DataInitInfo::specialValuesUInt = { + uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2), + uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)), + uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3), + uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1), + uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3), + uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1), + uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3), + uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1), + uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3), + uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1), + uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3), + uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1), + uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3), + uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1), + uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3), + uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1), + uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3), + uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1), + uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3), + uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4, + (1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3, + (1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3, + (1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3, + (1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3, + (1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3, + (1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3, + (1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3, + (1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3, + (1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3, + INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above + UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX +}; - if( f >= -(float) LONG_MIN ) - return LONG_MAX; +std::vector DataInitInfo::specialValuesFloat = { + -NAN, -INFINITY, -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, -100.f, -4.0f, -3.5f, -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, +100.f, +4.0f, +3.5f, +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f +}; - if( f <= (float) LONG_MIN ) - return LONG_MIN; +// A table of more difficult cases to get right +std::vector DataInitInfo::specialValuesDouble = { + -NAN, -INFINITY, -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8), + MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), + MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000., -100., -4.0, -3.5, -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), + MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), + MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), + MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), + MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), + MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), + MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), + MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), + MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), + MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), + MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), + MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), + MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), +}; +// clang-format on - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) ) - { - volatile float x = f; - float magicVal = magic[ f < 0 ]; - -#if defined( __SSE__ ) || defined (_WIN32) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); + +// Windows (since long double got deprecated) sets the x87 to 53-bit precision +// (that's x87 default state). This causes problems with the tests that +// convert long and ulong to float and double or otherwise deal with values +// that need more precision than 53-bit. So, set the x87 to 64-bit precision. +static inline void Force64BitFPUPrecision(void) +{ +#if __MINGW32__ + // The usual method is to use _controlfp as follows: + // #include + // _controlfp(_PC_64, _MCW_PC); + // + // _controlfp is available on MinGW32 but not on MinGW64. Instead of having + // divergent code just use inline assembly which works for both. + unsigned short int orig_cw = 0; + unsigned short int new_cw = 0; + __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); + new_cw = orig_cw | 0x0300; // set precision to 64-bit + __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); #else - x += magicVal; - x -= magicVal; + /* Implement for other platforms if needed */ #endif - f = x; +} + + +template +int CalcRefValsPat::check_result(void *test, uint32_t count, + int vectorSize) +{ + const cl_uchar *a = (const cl_uchar *)gAllowZ; + + if (std::is_integral::value) + { // char/uchar/short/ushort/int/uint/long/ulong + const OutType *t = (const OutType *)test; + const OutType *c = (const OutType *)gRef; + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0)) + { + size_t s = sizeof(OutType) * 2; + std::stringstream sstr; + sstr << "\nError for vector size %d found at 0x%8.8x: *0x%" + << s << "." << s << "x vs 0x%" << s << "." << s << "x\n"; + vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]); + return i + 1; + } + } + else if (std::is_same::value) + { + // cast to integral - from original test + const cl_uint *t = (const cl_uint *)test; + const cl_uint *c = (const cl_uint *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fffffffU) > 0x7f800000U + && (c[i] & 0x7fffffffU) > 0x7f800000U) + && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]); + return i + 1; + } + } + else + { + const cl_ulong *t = (const cl_ulong *)test; + const cl_ulong *c = (const cl_ulong *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL + && (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) + && !(a[i] != (cl_uchar)0 + && t[i] == (c[i] & 0x8000000000000000ULL))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]); + return i + 1; + } } - return (long) f; + return 0; } -long long llrintf_clamped( float f ); -long long llrintf_clamped( float f ) + +cl_uint RoundUpToNextPowerOfTwo(cl_uint x) { - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; + if (0 == (x & (x - 1))) return x; - if( f >= -(float) LLONG_MIN ) - return LLONG_MAX; + while (x & (x - 1)) x &= x - 1; + + return x + x; +} - if( f <= (float) LLONG_MIN ) - return LLONG_MIN; - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) ) +cl_int CustomConversionsTest::Run() +{ + int startMinVectorSize = gMinVectorSize; + Type inType, outType; + RoundingMode round; + SaturationMode sat; + + for (int i = 0; i < argCount; i++) { - volatile float x = f; - float magicVal = magic[ f < 0 ]; -#if defined( __SSE__ ) || defined (_WIN32) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round)) + { + vlog_error("\n\t\t**** ERROR: Unable to parse function name " + "%s. Skipping.... *****\n\n", + argList[i]); + continue; + } + + // skip double if we don't have it + if (!gTestDouble && (inType == kdouble || outType == kdouble)) + { + if (gHasDouble) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + vlog("\t\tcl_khr_fp64 enabled, but double testing turned " + "off.\n"); + } + continue; + } + + // skip longs on embedded + if (!gHasLong + && (inType == klong || outType == klong || inType == kulong + || outType == kulong)) + { + continue; + } + + // Skip the implicit converts if the rounding mode is not default or + // test is saturated + if (0 == startMinVectorSize) + { + if (sat || round != kDefaultRoundingMode) + gMinVectorSize = 1; + else + gMinVectorSize = 0; + } + + IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round, + sat); + + iter.Run(); + + if (gFailCount) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + } } - return (long long) f; + return gFailCount; } -long lrint_clamped( double f ); -long lrint_clamped( double f ) + +ConversionsTest::ConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : context(context), device(device), queue(queue), num_elements(0), + typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0), + cl_uint(0), cl_int(0), cl_float(0), cl_double(0), + cl_ulong(0), cl_long(0) }) +{} + + +cl_int ConversionsTest::Run() { - static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; + IterOverTypes iter(typeIterator, *this); - if( sizeof( long ) > 4 ) + iter.Run(); + + return gFailCount; +} + + +cl_int ConversionsTest::SetUp(int elements) +{ + num_elements = elements; + return CL_SUCCESS; +} + + +template +void ConversionsTest::TestTypesConversion(const Type &inType, + const Type &outType, int &testNumber, + int startMinVectorSize) +{ + SaturationMode sat; + RoundingMode round; + int error; + + // skip longs on embedded + if (!gHasLong + && (inType == klong || outType == klong || inType == kulong + || outType == kulong)) { - if( f >= -(double) LONG_MIN ) - return LONG_MAX; + return; } - else + + for (sat = (SaturationMode)0; sat < kSaturationModeCount; + sat = (SaturationMode)(sat + 1)) { - if( f >= LONG_MAX ) - return LONG_MAX; - } + // skip illegal saturated conversions to float type + if (kSaturated == sat && (outType == kfloat || outType == kdouble)) + { + continue; + } + + for (round = (RoundingMode)0; round < kRoundingModeCount; + round = (RoundingMode)(round + 1)) + { + if (++testNumber < gStartTestNumber) + { + continue; + } + else + { + if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return; + } - if( f <= (double) LONG_MIN ) - return LONG_MIN; + vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber, + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); - // Round fractional values to integer in round towards nearest mode - if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) ) - { - volatile double x = f; - double magicVal = magic[ f < 0 ]; -#if defined( __SSE2__ ) || defined (_MSC_VER) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128d v = _mm_set_sd( x ); - __m128d m = _mm_set_sd( magicVal ); - v = _mm_add_sd( v, m ); - v = _mm_sub_sd( v, m ); - _mm_store_sd( (double*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; - } + // skip double if we don't have it + if (!gTestDouble && (inType == kdouble || outType == kdouble)) + { + if (gHasDouble) + { + vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " + "FAILED ** \n", + testNumber, gTypeNames[outType], + gSaturationNames[sat], gRoundingModeNames[round], + gTypeNames[inType]); + vlog("\t\tcl_khr_fp64 enabled, but double " + "testing turned off.\n"); + } + continue; + } + + // Skip the implicit converts if the rounding mode is + // not default or test is saturated + if (0 == startMinVectorSize) + { + if (sat || round != kDefaultRoundingMode) + gMinVectorSize = 1; + else + gMinVectorSize = 0; + } - return (long) f; + if ((error = DoTest(outType, inType, sat, round))) + { + vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " + "FAILED ** \n", + testNumber, gTypeNames[outType], + gSaturationNames[sat], gRoundingModeNames[round], + gTypeNames[inType]); + } + } + } } -long long llrint_clamped( double f ); -long long llrint_clamped( double f ) + +template +int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, + RoundingMode round) { - static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; +#ifdef __APPLE__ + cl_ulong wall_start = mach_absolute_time(); +#endif + + uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); + cl_uint threads = GetThreadCount(); - if( f >= -(double) LLONG_MIN ) - return LLONG_MAX; + DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; + DataInfoSpec init_info(info); + WriteInputBufferInfo writeInputBufferInfo; + int vectorSize; + int error = 0; + uint64_t i; - if( f <= (double) LLONG_MIN ) - return LLONG_MIN; + gTestCount++; + size_t blockCount = + BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); + size_t step = blockCount; - // Round fractional values to integer in round towards nearest mode - if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) ) + for (i = 0; i < threads; i++) { - volatile double x = f; - double magicVal = magic[ f < 0 ]; -#if defined( __SSE2__ ) || defined (_MSC_VER) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128d v = _mm_set_sd( x ); - __m128d m = _mm_set_sd( magicVal ); - v = _mm_add_sd( v, m ); - v = _mm_sub_sd( v, m ); - _mm_store_sd( (double*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + init_info.mdv.emplace_back(MTdataHolder(gRandomSeed)); } - return (long long) f; -} - + writeInputBufferInfo.outType = outType; + writeInputBufferInfo.inType = inType; -/* - Names created as: + writeInputBufferInfo.calcInfo.resize(gMaxVectorSize); + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + writeInputBufferInfo.calcInfo[vectorSize].reset( + new CalcRefValsPat()); + writeInputBufferInfo.calcInfo[vectorSize]->program = + conv_test::MakeProgram( + outType, inType, sat, round, vectorSize, + &writeInputBufferInfo.calcInfo[vectorSize]->kernel); + if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program) + { + gFailCount++; + return -1; + } + if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel) + { + gFailCount++; + vlog_error("\t\tFAILED -- Failed to create kernel.\n"); + return -2; + } - #include + writeInputBufferInfo.calcInfo[vectorSize]->parent = + &writeInputBufferInfo; + writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize; + writeInputBufferInfo.calcInfo[vectorSize]->result = -1; + } - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; + if (gSkipTesting) return error; - int main( void ) + // Patch up rounding mode if default is RTZ + // We leave the part above in default rounding mode so that the right kernel + // is compiled. + if (std::is_same::value) { + if (round == kDefaultRoundingMode && gIsRTZ) + init_info.round = round = kRoundTowardZero; + } - int i,j; + // Figure out how many elements are in a work block + // we handle 64-bit types a bit differently. + if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) - { - if( j == i ) - continue; + if (!gWimpyMode && gIsEmbedded) + step = blockCount * EMBEDDED_REDUCTION_FACTOR; - vlog( "void %s2%s( void *, void *);\n", names[i], names[j] ); - } + if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor; + vlog("Testing... "); + fflush(stdout); + for (i = 0; i < (uint64_t)lastCase; i += step) + { + if (0 == (i & ((lastCase >> 3) - 1))) + { + vlog("."); + fflush(stdout); + } - return 0; - } -*/ - -static float my_fabsf( float x ); -static double my_fabs( double x ); - - - -static void uchar2char( void *, void *); -static void uchar2ushort( void *, void *); -static void uchar2short( void *, void *); -static void uchar2uint( void *, void *); -static void uchar2int( void *, void *); -static void uchar2float( void *, void *); -static void uchar2double( void *, void *); -static void uchar2ulong( void *, void *); -static void uchar2long( void *, void *); -static void char2uchar( void *, void *); -static void char2ushort( void *, void *); -static void char2short( void *, void *); -static void char2uint( void *, void *); -static void char2int( void *, void *); -static void char2float( void *, void *); -static void char2double( void *, void *); -static void char2ulong( void *, void *); -static void char2long( void *, void *); -static void ushort2uchar( void *, void *); -static void ushort2char( void *, void *); -static void ushort2short( void *, void *); -static void ushort2uint( void *, void *); -static void ushort2int( void *, void *); -static void ushort2float( void *, void *); -static void ushort2double( void *, void *); -static void ushort2ulong( void *, void *); -static void ushort2long( void *, void *); -static void short2uchar( void *, void *); -static void short2char( void *, void *); -static void short2ushort( void *, void *); -static void short2uint( void *, void *); -static void short2int( void *, void *); -static void short2float( void *, void *); -static void short2double( void *, void *); -static void short2ulong( void *, void *); -static void short2long( void *, void *); -static void uint2uchar( void *, void *); -static void uint2char( void *, void *); -static void uint2ushort( void *, void *); -static void uint2short( void *, void *); -static void uint2int( void *, void *); -static void uint2float( void *, void *); -static void uint2double( void *, void *); -static void uint2ulong( void *, void *); -static void uint2long( void *, void *); -static void int2uchar( void *, void *); -static void int2char( void *, void *); -static void int2ushort( void *, void *); -static void int2short( void *, void *); -static void int2uint( void *, void *); -static void int2float( void *, void *); -static void int2double( void *, void *); -static void int2ulong( void *, void *); -static void int2long( void *, void *); -static void float2uchar( void *, void *); -static void float2char( void *, void *); -static void float2ushort( void *, void *); -static void float2short( void *, void *); -static void float2uint( void *, void *); -static void float2int( void *, void *); -static void float2double( void *, void *); -static void float2ulong( void *, void *); -static void float2long( void *, void *); -static void double2uchar( void *, void *); -static void double2char( void *, void *); -static void double2ushort( void *, void *); -static void double2short( void *, void *); -static void double2uint( void *, void *); -static void double2int( void *, void *); -static void double2float( void *, void *); -static void double2ulong( void *, void *); -static void double2long( void *, void *); -static void ulong2uchar( void *, void *); -static void ulong2char( void *, void *); -static void ulong2ushort( void *, void *); -static void ulong2short( void *, void *); -static void ulong2uint( void *, void *); -static void ulong2int( void *, void *); -static void ulong2float( void *, void *); -static void ulong2double( void *, void *); -static void ulong2long( void *, void *); -static void long2uchar( void *, void *); -static void long2char( void *, void *); -static void long2ushort( void *, void *); -static void long2short( void *, void *); -static void long2uint( void *, void *); -static void long2int( void *, void *); -static void long2float( void *, void *); -static void long2double( void *, void *); -static void long2ulong( void *, void *); - -/* - Conversion list created as - - #include - - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - - int main( void ) - { + cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); + writeInputBufferInfo.count = count; - int i,j; + // Crate a user event to represent the status of the reference value + // computation completion + writeInputBufferInfo.calcReferenceValues = + clCreateUserEvent(gContext, &error); + if (error || NULL == writeInputBufferInfo.calcReferenceValues) + { + vlog_error("ERROR: Unable to create user event. (%d)\n", error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) + // retain for consumption by MapOutputBufferComplete + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) { - vlog( "{ " ); - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) + if ((error = + clRetainEvent(writeInputBufferInfo.calcReferenceValues))) { - if( j == i ) - vlog( " NULL, " ); - else - { - char s[64]; - sprintf( s, "%s2%s,", names[j], names[i] ); - vlog( "%15s ", s ); - } + vlog_error("ERROR: Unable to retain user event. (%d)\n", error); + gFailCount++; + return error; } - vlog( "},\n" ); } - return 0; - } - - */ -/* -Convert gConversions[kTypeCount][kTypeCount] = { -{ NULL, char2uchar, ushort2uchar, short2uchar, uint2uchar, int2uchar, float2uchar, double2uchar, ulong2uchar, long2uchar, }, -{ uchar2char, NULL, ushort2char, short2char, uint2char, int2char, float2char, double2char, ulong2char, long2char, }, -{ uchar2ushort, char2ushort, NULL, short2ushort, uint2ushort, int2ushort, float2ushort, double2ushort, ulong2ushort, long2ushort, }, -{ uchar2short, char2short, ushort2short, NULL, uint2short, int2short, float2short, double2short, ulong2short, long2short, }, -{ uchar2uint, char2uint, ushort2uint, short2uint, NULL, int2uint, float2uint, double2uint, ulong2uint, long2uint, }, -{ uchar2int, char2int, ushort2int, short2int, uint2int, NULL, float2int, double2int, ulong2int, long2int, }, -{ uchar2float, char2float, ushort2float, short2float, uint2float, int2float, NULL, double2float, ulong2float, long2float, }, -{ uchar2double, char2double, ushort2double, short2double, uint2double, int2double, float2double, NULL, ulong2double, long2double, }, -{ uchar2ulong, char2ulong, ushort2ulong, short2ulong, uint2ulong, int2ulong, float2ulong, double2ulong, NULL, long2ulong, }, -{ uchar2long, char2long, ushort2long, short2long, uint2long, int2long, float2long, double2long, ulong2long, NULL, } }; -*/ - -static void uchar2char_sat( void *, void *); -static void uchar2ushort_sat( void *, void *); -static void uchar2short_sat( void *, void *); -static void uchar2uint_sat( void *, void *); -static void uchar2int_sat( void *, void *); -static void uchar2float_sat( void *, void *); -static void uchar2double_sat( void *, void *); -static void uchar2ulong_sat( void *, void *); -static void uchar2long_sat( void *, void *); -static void char2uchar_sat( void *, void *); -static void char2ushort_sat( void *, void *); -static void char2short_sat( void *, void *); -static void char2uint_sat( void *, void *); -static void char2int_sat( void *, void *); -static void char2float_sat( void *, void *); -static void char2double_sat( void *, void *); -static void char2ulong_sat( void *, void *); -static void char2long_sat( void *, void *); -static void ushort2uchar_sat( void *, void *); -static void ushort2char_sat( void *, void *); -static void ushort2short_sat( void *, void *); -static void ushort2uint_sat( void *, void *); -static void ushort2int_sat( void *, void *); -static void ushort2float_sat( void *, void *); -static void ushort2double_sat( void *, void *); -static void ushort2ulong_sat( void *, void *); -static void ushort2long_sat( void *, void *); -static void short2uchar_sat( void *, void *); -static void short2char_sat( void *, void *); -static void short2ushort_sat( void *, void *); -static void short2uint_sat( void *, void *); -static void short2int_sat( void *, void *); -static void short2float_sat( void *, void *); -static void short2double_sat( void *, void *); -static void short2ulong_sat( void *, void *); -static void short2long_sat( void *, void *); -static void uint2uchar_sat( void *, void *); -static void uint2char_sat( void *, void *); -static void uint2ushort_sat( void *, void *); -static void uint2short_sat( void *, void *); -static void uint2int_sat( void *, void *); -static void uint2float_sat( void *, void *); -static void uint2double_sat( void *, void *); -static void uint2ulong_sat( void *, void *); -static void uint2long_sat( void *, void *); -static void int2uchar_sat( void *, void *); -static void int2char_sat( void *, void *); -static void int2ushort_sat( void *, void *); -static void int2short_sat( void *, void *); -static void int2uint_sat( void *, void *); -static void int2float_sat( void *, void *); -static void int2double_sat( void *, void *); -static void int2ulong_sat( void *, void *); -static void int2long_sat( void *, void *); -static void float2uchar_sat( void *, void *); -static void float2char_sat( void *, void *); -static void float2ushort_sat( void *, void *); -static void float2short_sat( void *, void *); -static void float2uint_sat( void *, void *); -static void float2int_sat( void *, void *); -static void float2double_sat( void *, void *); -static void float2ulong_sat( void *, void *); -static void float2long_sat( void *, void *); -static void double2uchar_sat( void *, void *); -static void double2char_sat( void *, void *); -static void double2ushort_sat( void *, void *); -static void double2short_sat( void *, void *); -static void double2uint_sat( void *, void *); -static void double2int_sat( void *, void *); -static void double2float_sat( void *, void *); -static void double2ulong_sat( void *, void *); -static void double2long_sat( void *, void *); -static void ulong2uchar_sat( void *, void *); -static void ulong2char_sat( void *, void *); -static void ulong2ushort_sat( void *, void *); -static void ulong2short_sat( void *, void *); -static void ulong2uint_sat( void *, void *); -static void ulong2int_sat( void *, void *); -static void ulong2float_sat( void *, void *); -static void ulong2double_sat( void *, void *); -static void ulong2long_sat( void *, void *); -static void long2uchar_sat( void *, void *); -static void long2char_sat( void *, void *); -static void long2ushort_sat( void *, void *); -static void long2short_sat( void *, void *); -static void long2uint_sat( void *, void *); -static void long2int_sat( void *, void *); -static void long2float_sat( void *, void *); -static void long2double_sat( void *, void *); -static void long2ulong_sat( void *, void *); -/* - #include - - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - - int main( void ) - { + // Crate a user event to represent when the callbacks are done verifying + // correctness + writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error); + if (error || NULL == writeInputBufferInfo.doneBarrier) + { + vlog_error("ERROR: Unable to create user event for barrier. (%d)\n", + error); + gFailCount++; + return error; + } - int i,j; + // retain for use by the callback that calls this + if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier))) + { + vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n", + error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) + // Call this in a multithreaded manner + cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2; + init_info.start = i; + init_info.size = count / chunks; + if (init_info.size < 16384) { - vlog( "{ " ); - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) + chunks = RoundUpToNextPowerOfTwo(threads); + init_info.size = count / chunks; + if (init_info.size < 16384) { - if( j == i ) - vlog( " NULL, " ); - else - { - char s[64]; - sprintf( s, "%s2%s_sat,", names[j], names[i] ); - vlog( "%18s ", s ); - } + init_info.size = count; + chunks = 1; } - vlog( "},\n" ); } - return 0; - } + ThreadPool_Do(conv_test::InitData, chunks, &init_info); -Convert gSaturatedConversions[kTypeCount][kTypeCount] = { -{ NULL, char2uchar_sat, ushort2uchar_sat, short2uchar_sat, uint2uchar_sat, int2uchar_sat, float2uchar_sat, double2uchar_sat, ulong2uchar_sat, long2uchar_sat, }, -{ uchar2char_sat, NULL, ushort2char_sat, short2char_sat, uint2char_sat, int2char_sat, float2char_sat, double2char_sat, ulong2char_sat, long2char_sat, }, -{ uchar2ushort_sat, char2ushort_sat, NULL, short2ushort_sat, uint2ushort_sat, int2ushort_sat, float2ushort_sat, double2ushort_sat, ulong2ushort_sat, long2ushort_sat, }, -{ uchar2short_sat, char2short_sat, ushort2short_sat, NULL, uint2short_sat, int2short_sat, float2short_sat, double2short_sat, ulong2short_sat, long2short_sat, }, -{ uchar2uint_sat, char2uint_sat, ushort2uint_sat, short2uint_sat, NULL, int2uint_sat, float2uint_sat, double2uint_sat, ulong2uint_sat, long2uint_sat, }, -{ uchar2int_sat, char2int_sat, ushort2int_sat, short2int_sat, uint2int_sat, NULL, float2int_sat, double2int_sat, ulong2int_sat, long2int_sat, }, -{ uchar2float_sat, char2float_sat, ushort2float_sat, short2float_sat, uint2float_sat, int2float_sat, NULL, double2float_sat, ulong2float_sat, long2float_sat, }, -{ uchar2double_sat, char2double_sat, ushort2double_sat, short2double_sat, uint2double_sat, int2double_sat, float2double_sat, NULL, ulong2double_sat, long2double_sat, }, -{ uchar2ulong_sat, char2ulong_sat, ushort2ulong_sat, short2ulong_sat, uint2ulong_sat, int2ulong_sat, float2ulong_sat, double2ulong_sat, NULL, long2ulong_sat, }, -{ uchar2long_sat, char2long_sat, ushort2long_sat, short2long_sat, uint2long_sat, int2long_sat, float2long_sat, double2long_sat, ulong2long_sat, NULL, } -}; -*/ + // Copy the results to the device + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, + count * gTypeSizes[inType], gIn, 0, + NULL, NULL))) + { + vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error); + gFailCount++; + return error; + } -/* - #include + // Call completion callback for the write, which will enqueue the rest + // of the work. + conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo); - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - const char *types[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "llong" }; + // Make sure the work is actually running, so we don't deadlock + if ((error = clFlush(gQueue))) + { + vlog_error("clFlush failed with error %d\n", error); + gFailCount++; + return error; + } - int main( void ) - { + ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info); + + // signal we are done calculating the reference results + if ((error = clSetUserEventStatus( + writeInputBufferInfo.calcReferenceValues, CL_COMPLETE))) + { + vlog_error( + "Error: Failed to set user event status to CL_COMPLETE: %d\n", + error); + gFailCount++; + return error; + } - int i,j; + // Wait for the event callbacks to finish verifying correctness. + if ((error = clWaitForEvents( + 1, (cl_event *)&writeInputBufferInfo.doneBarrier))) + { + vlog_error("Error: Failed to wait for barrier: %d\n", error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) - { - if( j == i ) - continue; + if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues))) + { + vlog_error("Error: Failed to release calcReferenceValues: %d\n", + error); + gFailCount++; + return error; + } - switch( i ) + if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier))) + { + vlog_error("Error: Failed to release done barrier: %d\n", error); + gFailCount++; + return error; + } + + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) + { + if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result)) + { + switch (inType) { - case 6: //float - if( j == 7 ) - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); - else - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) my_rintf(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] ); + case kuchar: + case kchar: + vlog("Input value: 0x%2.2x ", + ((unsigned char *)gIn)[error - 1]); + break; + case kushort: + case kshort: + vlog("Input value: 0x%4.4x ", + ((unsigned short *)gIn)[error - 1]); + break; + case kuint: + case kint: + vlog("Input value: 0x%8.8x ", + ((unsigned int *)gIn)[error - 1]); break; - case 7: //double - if( j == 6 ) - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); - else - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) rint(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] ); + case kfloat: + vlog("Input value: %a ", ((float *)gIn)[error - 1]); + break; + case kulong: + case klong: + vlog("Input value: 0x%16.16llx ", + ((unsigned long long *)gIn)[error - 1]); + break; + case kdouble: + vlog("Input value: %a ", ((double *)gIn)[error - 1]); break; default: - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) - ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); + vlog_error("Internal error at %s: %d\n", __FILE__, + __LINE__); + abort(); break; } - } + // tell the user which conversion it was. + if (0 == vectorSize) + vlog(" (implicit scalar conversion from %s to %s)\n", + gTypeNames[inType], gTypeNames[outType]); + else + vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], + sizeNames[vectorSize], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType], + sizeNames[vectorSize]); - return 0; + gFailCount++; + return error; + } + } } -*/ -float my_fabsf( float x ) -{ - union{ cl_uint u; float f; }u; - u.f = x; - u.u &= 0x7fffffff; - return u.f; -} + log_info("done.\n"); -double my_fabs( double x ) -{ - union{ cl_ulong u; double f; }u; - u.f = x; - u.u &= 0x7fffffffffffffffULL; - return u.f; -} + if (gTimeResults) + { + // Kick off tests for the various vector lengths + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) + { + size_t workItemCount = blockCount / vectorSizes[vectorSize]; + if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4) + workItemCount /= + 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]); + + double sum = 0.0; + double bestTime = INFINITY; + cl_uint k; + for (k = 0; k < PERF_LOOP_COUNT; k++) + { + uint64_t startTime = conv_test::GetTime(); + if ((error = conv_test::RunKernel( + writeInputBufferInfo.calcInfo[vectorSize]->kernel, + gInBuffer, gOutBuffers[vectorSize], workItemCount))) + { + gFailCount++; + return error; + } -static float my_rintf( float f ); -static float my_rintf( float f ) -{ - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; + // Make sure OpenCL is done + if ((error = clFinish(gQueue))) + { + vlog_error("Error %d at clFinish\n", error); + return error; + } - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) ) - { - volatile float x = f; - float magicVal = magic[ f < 0 ]; - -#if defined( __SSE__ ) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + uint64_t endTime = conv_test::GetTime(); + double time = SubtractTime(endTime, startTime); + sum += time; + if (time < bestTime) bestTime = time; + } + + if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; + double clocksPerOp = bestTime * (double)gDeviceFrequency + * gComputeDevices * gSimdSize * 1e6 + / (workItemCount * vectorSizes[vectorSize]); + if (0 == vectorSize) + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", + "implicit convert %s -> %s", gTypeNames[inType], + gTypeNames[outType]); + else + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", + "convert_%s%s%s%s( %s%s )", gTypeNames[outType], + sizeNames[vectorSize], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType], + sizeNames[vectorSize]); + } } - return f; -} + if (gWimpyMode) + vlog("\tWimp pass"); + else + vlog("\tpassed"); -static void uchar2char( void *out, void *in){ ((char*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2short( void *out, void *in){ ((short*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2int( void *out, void *in){ ((int*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2float( void *out, void *in) -{ - cl_uchar l = ((cl_uchar*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uchar2double( void *out, void *in) -{ - cl_uchar l = ((cl_uchar*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uchar2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; } -static void char2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_char*) in)[0]; } -static void char2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_char*) in)[0]; } -static void char2short( void *out, void *in){ ((short*) out)[0] = ((cl_char*) in)[0]; } -static void char2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_char*) in)[0]; } -static void char2int( void *out, void *in){ ((int*) out)[0] = ((cl_char*) in)[0]; } -static void char2float( void *out, void *in) -{ - cl_char l = ((cl_char*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void char2double( void *out, void *in) -{ - cl_char l = ((cl_char*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void char2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_char*) in)[0]; } -static void char2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; } -static void ushort2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2char( void *out, void *in){ ((char*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2short( void *out, void *in){ ((short*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2int( void *out, void *in){ ((int*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2float( void *out, void *in) -{ - cl_ushort l = ((cl_ushort*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void ushort2double( void *out, void *in) -{ - cl_ushort l = ((cl_ushort*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void ushort2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; } -static void short2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_short*) in)[0]; } -static void short2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_short*) in)[0]; } -static void short2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_short*) in)[0]; } -static void short2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_short*) in)[0]; } -static void short2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; } -static void short2float( void *out, void *in) -{ - cl_short l = ((cl_short*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void short2double( void *out, void *in) -{ - cl_short l = ((cl_short*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void short2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_short*) in)[0]; } -static void short2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; } -static void uint2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2float( void *out, void *in) -{ - // Use volatile to prevent optimization by Clang compiler - volatile cl_uint l = ((cl_uint *)in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uint2double( void *out, void *in) -{ - cl_uint l = ((cl_uint*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uint2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; } -static void int2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_int*) in)[0]; } -static void int2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_int*) in)[0]; } -static void int2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_int*) in)[0]; } -static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) in)[0]; } -static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; } -static void int2float( void *out, void *in) -{ - // Use volatile to prevent optimization by Clang compiler - volatile cl_int l = ((cl_int *)in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void int2double( void *out, void *in) -{ - cl_int l = ((cl_int*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void int2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_int*) in)[0]; } -static void int2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; } -static void float2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2char( void *out, void *in){ ((cl_char*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2short( void *out, void *in){ ((cl_short*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2uint( void *out, void *in){ ((cl_uint*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2int( void *out, void *in){ ((cl_int*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2double( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; } -static void float2ulong( void *out, void *in) -{ -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int. - // However, fistp stores it as a signed int, and some of the test values won't - // fit into a signed int. (These test values are >= 2^63.) The result on VS2005 - // is that these end up silently (at least by default settings) clamped to - // the max lowest ulong. - cl_float x = my_rintf(((cl_float *)in)[0]); - if (x >= 9223372036854775808.0f) { - x -= 9223372036854775808.0f; - ((cl_ulong*) out)[0] = x; - ((cl_ulong*) out)[0] += 9223372036854775808ULL; - } else { - ((cl_ulong*) out)[0] = x; - } -#else - ((cl_ulong*) out)[0] = my_rintf(((cl_float*) in)[0]); +#ifdef __APPLE__ + // record the run time + vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start)); #endif -} + vlog("\n\n"); + fflush(stdout); -static void float2long( void *out, void *in){ ((cl_long*) out)[0] = llrint_clamped( ((cl_float*) in)[0] ); } -static void double2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2char( void *out, void *in){ ((cl_char*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2short( void *out, void *in){ ((cl_short*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) rint(((cl_double*) in)[0]); } -static void double2int( void *out, void *in){ ((cl_int*) out)[0] = (int) rint(((cl_double*) in)[0]); } -static void double2float( void *out, void *in){ ((cl_float*) out)[0] = (float) ((cl_double*) in)[0]; } -static void double2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = (cl_ulong) rint(((cl_double*) in)[0]); } -static void double2long( void *out, void *in){ ((cl_long*) out)[0] = (cl_long) rint(((cl_double*) in)[0]); } -static void ulong2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_ulong*) in)[0]; } -static void ulong2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_ulong*) in)[0]; } -static void ulong2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_ulong*) in)[0]; } -static void ulong2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short)((cl_ulong*) in)[0]; } -static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_ulong*) in)[0]; } -static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; } -static void ulong2float( void *out, void *in) -{ -#if defined(_MSC_VER) && defined(_M_X64) - cl_ulong l = ((cl_ulong*) in)[0]; - float result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; - _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); - ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result)); -#else - cl_ulong l = ((cl_ulong*) in)[0]; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point emulation - * code despite which -mfloat-abi setting it is. But the emulation code in - * libgcc.a has only one rounding mode (round to nearest even in this case) - * and ignores the user rounding mode setting in hardware. - * As a result setting rounding modes in hardware won't give correct - * rounding results for type covert from 64-bit integer to float using GCC - * for ARM compiler so for testing different rounding modes, we need to use - * alternative reference function. ARM64 does have an instruction, however - * we cannot guarantee the compiler will use it. On all ARM architechures - * use emulation to calculate reference.*/ - ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm); -#else - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -#endif + return error; } -static void ulong2double( void *out, void *in) -{ -#if defined(_MSC_VER) - cl_ulong l = ((cl_ulong*) in)[0]; - double result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; -#if defined(_M_X64) - _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl)); -#else - result = sl; +#if !defined(__APPLE__) +void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); #endif - ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); -#else - // Use volatile to prevent optimization by Clang compiler - volatile cl_ulong l = ((cl_ulong *)in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -} -static void ulong2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ulong*) in)[0]; } -static void long2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_long*) in)[0]; } -static void long2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_long*) in)[0]; } -static void long2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_long*) in)[0]; } -static void long2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short) ((cl_long*) in)[0]; } -static void long2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_long*) in)[0]; } -static void long2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_long*) in)[0]; } -static void long2float( void *out, void *in) -{ -#if defined(_MSC_VER) && defined(_M_X64) - cl_long l = ((cl_long*) in)[0]; - float result; - _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l)); - ((float*) out)[0] = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#else - cl_long l = ((cl_long*) in)[0]; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point emulation - * code despite which -mfloat-abi setting it is. But the emulation code in - * libgcc.a has only one rounding mode (round to nearest even in this case) - * and ignores the user rounding mode setting in hardware. - * As a result setting rounding modes in hardware won't give correct - * rounding results for type covert from 64-bit integer to float using GCC - * for ARM compiler so for testing different rounding modes, we need to use - * alternative reference function. ARM64 does have an instruction, however - * we cannot guarantee the compiler will use it. On all ARM architechures - * use emulation to calculate reference.*/ - ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm)); +#if defined(_MSC_VER) +/* function is defined in "compat.h" */ #else - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -#endif -} -static void long2double( void *out, void *in) +double SubtractTime(uint64_t endTime, uint64_t startTime) { -#if defined(_MSC_VER) && defined(_M_X64) - cl_long l = ((cl_long*) in)[0]; - double result; + uint64_t diff = endTime - startTime; + static double conversion = 0.0; - _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l)); - ((double*) out)[0] = (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 + if (0.0 == conversion) + { +#if defined(__APPLE__) + mach_timebase_info_data_t info = { 0, 0 }; + kern_return_t err = mach_timebase_info(&info); + if (0 == err) + conversion = 1e-9 * (double)info.numer / (double)info.denom; #else - cl_long l = ((cl_long*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 + // This function consumes output from GetTime() above, and converts the + // time to secionds. +#warning need accurate ticks to seconds conversion factor here. Times are invalid. #endif -} -static void long2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_long*) in)[0]; } - -#define CLAMP( _lo, _x, _hi ) ( (_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) - -// Done by hand -static void uchar2char_sat( void *out, void *in){ cl_uchar c = ((cl_uchar*) in)[0]; ((cl_char*) out)[0] = c > 0x7f ? 0x7f : c; } -static void uchar2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uchar*) in)[0]); } // my_fabs workaround for -static void uchar2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uchar*) in)[0]); } // my_fabs workaround for -static void uchar2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; } -static void char2uchar_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uchar*) out)[0] = c < 0 ? 0 : c; } -static void char2ushort_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ushort*) out)[0] = c < 0 ? 0 : c; } -static void char2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_char*) in)[0]; } -static void char2uint_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uint*) out)[0] = c < 0 ? 0 : c; } -static void char2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_char*) in)[0]; } -static void char2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_char*) in)[0]; } -static void char2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_char*) in)[0]; } -static void char2ulong_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ulong*) out)[0] = c < 0 ? 0 : c; } -static void char2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; } -static void ushort2uchar_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_uchar*) out)[0] = u > 0xff ? 0xFF : u; } -static void ushort2char_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_char*) out)[0] = u > 0x7f ? 0x7F : u; } -static void ushort2short_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_short*) out)[0] = u > 0x7fff ? 0x7fFF : u; } -static void ushort2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf((cl_float)((cl_ushort*) in)[0]); } // my_fabs workaround for -static void ushort2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_ushort*) in)[0]); } // my_fabs workaround for -static void ushort2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; } -static void short2uchar_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, s, CL_UCHAR_MAX ); } -static void short2char_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, s, CL_CHAR_MAX ); } -static void short2ushort_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ushort*) out)[0] = s < 0 ? 0 : s; } -static void short2uint_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uint*) out)[0] = s < 0 ? 0 : s; } -static void short2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; } -static void short2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_short*) in)[0]; } -static void short2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_short*) in)[0]; } -static void short2ulong_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ulong*) out)[0] = s < 0 ? 0 : s; } -static void short2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; } -static void uint2uchar_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX); } -static void uint2char_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); } -static void uint2ushort_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX); } -static void uint2short_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX); } -static void uint2int_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_int*) out)[0] = CLAMP( 0, u, CL_INT_MAX); } -static void uint2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uint*) in)[0] ); } // my_fabs workaround for -static void uint2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uint*) in)[0]); } // my_fabs workaround for -static void uint2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; } -static void int2uchar_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, i, CL_UCHAR_MAX); } -static void int2char_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, i, CL_CHAR_MAX); } -static void int2ushort_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, i, CL_USHRT_MAX); } -static void int2short_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, i, CL_SHRT_MAX); } -static void int2uint_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uint*) out)[0] = CLAMP( 0, i, CL_INT_MAX); } -static void int2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_int*) in)[0]; } -static void int2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_int*) in)[0]; } -static void int2ulong_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ulong*) out)[0] = i < 0 ? 0 : i; } -static void int2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; } -static void float2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_UCHAR_MAX ); } -static void float2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_CHAR_MAX); } -static void float2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_USHRT_MAX ); } -static void float2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_SHRT_MAX ); } -static void float2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrintf_clamped(((cl_float*) in)[0]), CL_UINT_MAX ); } -static void float2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_INT_MAX ); } -static void float2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; } -static void float2ulong_sat( void *out, void *in) -{ -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int. - // However, fistp stores it as a signed int, and some of the test values won't - // fit into a signed int. (These test values are >= 2^63.) The result on VS2005 - // is that these end up silently (at least by default settings) clamped to - // the max lowest ulong. - cl_float x = my_rintf(((cl_float *)in)[0]); - if (x >= 18446744073709551616.0f) { // 2^64 - ((cl_ulong*) out)[0] = 0xFFFFFFFFFFFFFFFFULL; - } else if (x < 0) { - ((cl_ulong*) out)[0] = 0; - } else if (x >= 9223372036854775808.0f) { // 2^63 - x -= 9223372036854775808.0f; - ((cl_ulong*) out)[0] = x; - ((cl_ulong*) out)[0] += 9223372036854775808ULL; - } else { - ((cl_ulong*) out)[0] = x; } -#else - float f = my_rintf(((float*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; -#endif + + // strictly speaking we should also be subtracting out timer latency here + return conversion * (double)diff; } -// The final cast used to be (cl_ulong) f, but on Linux (RHEL5 at least) -// if f = -1.0f, then (cl_ulong) f = 0xffffffff, which clearly isn't right. -// Switching it to (cl_long) f seems to fix that. -static void float2long_sat( void *out, void *in){ float f = my_rintf(((float*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; } -static void double2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_UCHAR_MAX ); } -static void double2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrint_clamped(((cl_double*) in)[0]), CL_CHAR_MAX); } -static void double2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_USHRT_MAX ); } -static void double2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_SHRT_MAX ); } -static void double2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrint_clamped(((cl_double*) in)[0]), CL_UINT_MAX ); } -static void double2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_INT_MAX ); } -static void double2float_sat( void *out, void *in){ ((cl_float*) out)[0] = (cl_float) ((double*) in)[0]; } -static void double2ulong_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; } -static void double2long_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; } -static void ulong2uchar_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); } -static void ulong2char_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); } -static void ulong2ushort_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); } -static void ulong2short_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX ); } -static void ulong2uint_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); } -static void ulong2int_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_int*) out)[0] = (cl_int) CLAMP( 0, u, CL_INT_MAX ); } -static void ulong2float_sat( void *out, void *in){ ((float*) out)[0] = my_fabsf((float) ((cl_ulong*) in)[0]); } // my_fabs workaround for -static void ulong2double_sat( void *out, void *in){ ((double*) out)[0] = my_fabs( ((cl_ulong*) in)[0]); } // my_fabs workaround for -static void ulong2long_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_long*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); } -static void long2uchar_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); } -static void long2char_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, u, CL_CHAR_MAX ); } -static void long2ushort_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); } -static void long2short_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, u, CL_SHRT_MAX ); } -static void long2uint_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); } -static void long2int_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_int*) out)[0] = (int) CLAMP( CL_INT_MIN, u, CL_INT_MAX ); } -static void long2float_sat( void *out, void *in){ ((float*) out)[0] = (float) ((cl_long*) in)[0]; } -static void long2double_sat( void *out, void *in){ ((double*) out)[0] = ((cl_long*) in)[0]; } -static void long2ulong_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ulong*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); } - -/* -#include - -char *ground[] = { "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gTypeNames[ ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - - -int main( void ) +#endif + +//////////////////////////////////////////////////////////////////////////////// + +static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) { - int i, j; + cl_uint i; + for (i = 0; i < count; ++i) + allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); +} - for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ ) - for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ ) - { - vlog( "float clampf_%s%s( float );\n", gTypeNames[i], ground[j] ); - vlog( "double clampd_%s%s( double );\n", gTypeNames[i], ground[j] ); - } - return 0; +void MapResultValuesComplete(const std::unique_ptr &ptr); -} -*/ - - -float clampf_uchar( float ); -double clampd_uchar( double ); -float clampf_uchar_rte( float ); -double clampd_uchar_rte( double ); -float clampf_uchar_rtp( float ); -double clampd_uchar_rtp( double ); -float clampf_uchar_rtn( float ); -double clampd_uchar_rtn( double ); -float clampf_uchar_rtz( float ); -double clampd_uchar_rtz( double ); -float clampf_char( float ); -double clampd_char( double ); -float clampf_char_rte( float ); -double clampd_char_rte( double ); -float clampf_char_rtp( float ); -double clampd_char_rtp( double ); -float clampf_char_rtn( float ); -double clampd_char_rtn( double ); -float clampf_char_rtz( float ); -double clampd_char_rtz( double ); -float clampf_ushort( float ); -double clampd_ushort( double ); -float clampf_ushort_rte( float ); -double clampd_ushort_rte( double ); -float clampf_ushort_rtp( float ); -double clampd_ushort_rtp( double ); -float clampf_ushort_rtn( float ); -double clampd_ushort_rtn( double ); -float clampf_ushort_rtz( float ); -double clampd_ushort_rtz( double ); -float clampf_short( float ); -double clampd_short( double ); -float clampf_short_rte( float ); -double clampd_short_rte( double ); -float clampf_short_rtp( float ); -double clampd_short_rtp( double ); -float clampf_short_rtn( float ); -double clampd_short_rtn( double ); -float clampf_short_rtz( float ); -double clampd_short_rtz( double ); -float clampf_uint( float ); -double clampd_uint( double ); -float clampf_uint_rte( float ); -double clampd_uint_rte( double ); -float clampf_uint_rtp( float ); -double clampd_uint_rtp( double ); -float clampf_uint_rtn( float ); -double clampd_uint_rtn( double ); -float clampf_uint_rtz( float ); -double clampd_uint_rtz( double ); -float clampf_int( float ); -double clampd_int( double ); -float clampf_int_rte( float ); -double clampd_int_rte( double ); -float clampf_int_rtp( float ); -double clampd_int_rtp( double ); -float clampf_int_rtn( float ); -double clampd_int_rtn( double ); -float clampf_int_rtz( float ); -double clampd_int_rtz( double ); -float clampf_float( float ); -double clampd_float( double ); -float clampf_float_rte( float ); -double clampd_float_rte( double ); -float clampf_float_rtp( float ); -double clampd_float_rtp( double ); -float clampf_float_rtn( float ); -double clampd_float_rtn( double ); -float clampf_float_rtz( float ); -double clampd_float_rtz( double ); -float clampf_double( float ); -double clampd_double( double ); -float clampf_double_rte( float ); -double clampd_double_rte( double ); -float clampf_double_rtp( float ); -double clampd_double_rtp( double ); -float clampf_double_rtn( float ); -double clampd_double_rtn( double ); -float clampf_double_rtz( float ); -double clampd_double_rtz( double ); -float clampf_ulong( float ); -double clampd_ulong( double ); -float clampf_ulong_rte( float ); -double clampd_ulong_rte( double ); -float clampf_ulong_rtp( float ); -double clampd_ulong_rtp( double ); -float clampf_ulong_rtn( float ); -double clampd_ulong_rtn( double ); -float clampf_ulong_rtz( float ); -double clampd_ulong_rtz( double ); -float clampf_long( float ); -double clampd_long( double ); -float clampf_long_rte( float ); -double clampd_long_rte( double ); -float clampf_long_rtp( float ); -double clampd_long_rtp( double ); -float clampf_long_rtn( float ); -double clampd_long_rtn( double ); -float clampf_long_rtz( float ); -double clampd_long_rtz( double ); - -/* -#include - -char *ground[] = { "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gTypeNames[ ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - - -int main( void ) -{ - int i, j; +void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, + void *data); - for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ ) +// Note: May be called reentrantly +void MapResultValuesComplete(const std::unique_ptr &info) +{ + cl_int status; + // CalcRefValsBase *info = (CalcRefValsBase *)data; + cl_event calcReferenceValues = info->parent->calcReferenceValues; + + // we know that the map is done, wait for the main thread to finish + // calculating the reference values + if ((status = + clSetEventCallback(calcReferenceValues, CL_COMPLETE, + CalcReferenceValuesComplete, (void *)&info))) { - vlog( "{\t" ); - for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ ) - vlog( "clampf_%s%s,\t", gTypeNames[i], ground[j] ); + vlog_error("ERROR: clSetEventCallback failed in " + "MapResultValuesComplete with status: %d\n", + status); + gFailCount++; // not thread safe -- being lazy here + } - vlog( "\t},\n" ); + // this thread no longer needs its reference to info->calcReferenceValues, + // so release it + if ((status = clReleaseEvent(calcReferenceValues))) + { + vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed " + "with status: %d\n", + status); + gFailCount++; // not thread safe -- being lazy here } - return 0; + // no need to flush since we didn't enqueue anything + // e was already released by WriteInputBufferComplete. It should be + // destroyed automatically soon after we exit. } -*/ -clampf gClampFloat[ kTypeCount ][kRoundingModeCount] = { - { clampf_uchar, clampf_uchar_rte, clampf_uchar_rtp, clampf_uchar_rtn, clampf_uchar_rtz, }, - { clampf_char, clampf_char_rte, clampf_char_rtp, clampf_char_rtn, clampf_char_rtz, }, - { clampf_ushort, clampf_ushort_rte, clampf_ushort_rtp, clampf_ushort_rtn, clampf_ushort_rtz, }, - { clampf_short, clampf_short_rte, clampf_short_rtp, clampf_short_rtn, clampf_short_rtz, }, - { clampf_uint, clampf_uint_rte, clampf_uint_rtp, clampf_uint_rtn, clampf_uint_rtz, }, - { clampf_int, clampf_int_rte, clampf_int_rtp, clampf_int_rtn, clampf_int_rtz, }, - { clampf_float, clampf_float_rte, clampf_float_rtp, clampf_float_rtn, clampf_float_rtz, }, - { clampf_double, clampf_double_rte, clampf_double_rtp, clampf_double_rtn, clampf_double_rtz, }, - { clampf_ulong, clampf_ulong_rte, clampf_ulong_rtp, clampf_ulong_rtn, clampf_ulong_rtz, }, - { clampf_long, clampf_long_rte, clampf_long_rtp, clampf_long_rtn, clampf_long_rtz, } -}; - -clampd gClampDouble[ kTypeCount ][kRoundingModeCount] = { - { clampd_uchar, clampd_uchar_rte, clampd_uchar_rtp, clampd_uchar_rtn, clampd_uchar_rtz, }, - { clampd_char, clampd_char_rte, clampd_char_rtp, clampd_char_rtn, clampd_char_rtz, }, - { clampd_ushort, clampd_ushort_rte, clampd_ushort_rtp, clampd_ushort_rtn, clampd_ushort_rtz, }, - { clampd_short, clampd_short_rte, clampd_short_rtp, clampd_short_rtn, clampd_short_rtz, }, - { clampd_uint, clampd_uint_rte, clampd_uint_rtp, clampd_uint_rtn, clampd_uint_rtz, }, - { clampd_int, clampd_int_rte, clampd_int_rtp, clampd_int_rtn, clampd_int_rtz, }, - { clampd_float, clampd_float_rte, clampd_float_rtp, clampd_float_rtn, clampd_float_rtz, }, - { clampd_double, clampd_double_rte, clampd_double_rtp, clampd_double_rtn, clampd_double_rtz, }, - { clampd_ulong, clampd_ulong_rte, clampd_ulong_rtp, clampd_ulong_rtn, clampd_ulong_rtz, }, - { clampd_long, clampd_long_rte, clampd_long_rtp, clampd_long_rtn, clampd_long_rtz, } -}; -#if defined (_WIN32) -#define __attribute__(X) -#endif -static inline float fclamp( float lo, float v, float hi ) __attribute__ ((always_inline)); -static inline double dclamp( double lo, double v, double hi ) __attribute__ ((always_inline)); - -static inline float fclamp( float lo, float v, float hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; } -static inline double dclamp( double lo, double v, double hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; } - -// Clamp unsaturated inputs into range so we don't get test errors: -float clampf_uchar( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); } -double clampd_uchar( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); } -float clampf_uchar_rte( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); } -double clampd_uchar_rte( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); } -float clampf_uchar_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 255.0f ); } -double clampd_uchar_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 255.0 ); } -float clampf_uchar_rtn( float f ) { return fclamp( -0.0f, f, 256.0f - 128.0f * FLT_EPSILON); } -double clampd_uchar_rtn( double f ) { return dclamp( -0.0, f, 256.0 - 128.0 * DBL_EPSILON); } -float clampf_uchar_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 256.0f - 128.0f * FLT_EPSILON); } -double clampd_uchar_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 256.0 - 128.0f * DBL_EPSILON); } - -float clampf_char( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); } -double clampd_char( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); } -float clampf_char_rte( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); } -double clampd_char_rte( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); } -float clampf_char_rtp( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 127.f ); } -double clampd_char_rtp( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 127. ); } -float clampf_char_rtn( float f ) { return fclamp( -128.0f, f, 128.f - 64.0f*FLT_EPSILON ); } -double clampd_char_rtn( double f ) { return dclamp( -128.0, f, 128. - 64.0*DBL_EPSILON ); } -float clampf_char_rtz( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 128.f - 64.0f*FLT_EPSILON ); } -double clampd_char_rtz( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 128. - 64.0*DBL_EPSILON ); } - -float clampf_ushort( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); } -double clampd_ushort( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); } -float clampf_ushort_rte( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); } -double clampd_ushort_rte( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); } -float clampf_ushort_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65535.0f ); } -double clampd_ushort_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65535.0 ); } -float clampf_ushort_rtn( float f ) { return fclamp( -0.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); } -double clampd_ushort_rtn( double f ) { return dclamp( -0.0, f, 65536.0 - 32768.0 * DBL_EPSILON); } -float clampf_ushort_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); } -double clampd_ushort_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65536.0 - 32768.0f * DBL_EPSILON); } - -float clampf_short( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); } -double clampd_short( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); } -float clampf_short_rte( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); } -double clampd_short_rte( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); } -float clampf_short_rtp( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32767.f ); } -double clampd_short_rtp( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32767. ); } -float clampf_short_rtn( float f ) { return fclamp( -32768.0f, f, 32768.f - 16384.0f*FLT_EPSILON ); } -double clampd_short_rtn( double f ) { return dclamp( -32768.0, f, 32768. - 16384.0*DBL_EPSILON ); } -float clampf_short_rtz( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32768.f - 16384.0f*FLT_EPSILON ); } -double clampd_short_rtz( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32768. - 16384.0*DBL_EPSILON ); } - -float clampf_uint( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); } -float clampf_uint_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint_rte( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); } -float clampf_uint_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, CL_UINT_MAX ); } -float clampf_uint_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); } -double clampd_uint_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) ); } -float clampf_uint_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); } -double clampd_uint_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)); } - -float clampf_int( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rte( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rte( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rtp( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtp( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX ); } -float clampf_int_rtn( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtn( double f ) { return dclamp( INT_MIN, f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rtz( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtz( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } - -float clampf_float( float f ){ return f; } -double clampd_float( double f ){ return f; } -float clampf_float_rte( float f ){ return f; } -double clampd_float_rte( double f ){ return f; } -float clampf_float_rtp( float f ){ return f; } -double clampd_float_rtp( double f ){ return f; } -float clampf_float_rtn( float f ){ return f; } -double clampd_float_rtn( double f ){ return f; } -float clampf_float_rtz( float f ){ return f; } -double clampd_float_rtz( double f ){ return f; } - -float clampf_double( float f ){ return f; } -double clampd_double( double f ){ return f; } -float clampf_double_rte( float f ){ return f; } -double clampd_double_rte( double f ){ return f; } -float clampf_double_rtp( float f ){ return f; } -double clampd_double_rtp( double f ){ return f; } -float clampf_double_rtn( float f ){ return f; } -double clampd_double_rtn( double f ){ return f; } -float clampf_double_rtz( float f ){ return f; } -double clampd_double_rtz( double f ){ return f; } - -float clampf_ulong( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rte( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } - -float clampf_long( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rte( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rte( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtp( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtp( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtn( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtn( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtz( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtz( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } - -#pragma mark - - -int alwaysPass( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int alwaysFail( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_uchar( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_char( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_ushort( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_short( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_uint( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_int( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_ulong( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_long( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_float( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_double( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); - -void init_uchar( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_char( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_ushort( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_short( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_uint( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_int( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_float( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_double( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_ulong( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_long( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); - -InitDataFunc gInitFunctions[ kTypeCount ] = { - init_uchar, init_char, - init_ushort, init_short, - init_uint, init_int, - init_float, init_double, - init_ulong, init_long - }; - - -CheckResults gCheckResults[ kTypeCount ] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_double, check_ulong, check_long - }; -#if !defined (__APPLE__) -#define UNUSED -#else -#define UNUSED __attribute__((unused)) -#endif +void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, + void *data) +{ + std::unique_ptr &info = + *(std::unique_ptr *)data; + + cl_uint vectorSize = info->vectorSize; + cl_uint count = info->parent->count; + Type outType = + info->parent->outType; // the data type of the conversion result + Type inType = info->parent->inType; // the data type of the conversion input + size_t j; + cl_int error; + cl_event doneBarrier = info->parent->doneBarrier; + + // report spurious error condition + if (CL_SUCCESS != status) + { + vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", + status); + gFailCount++; // lazy about thread safety here + return; + } -int alwaysPass( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize){ return 0; } -int alwaysFail( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize ){ return -1; } + // Now we know that both results have been mapped back from the device, and + // the main thread is done calculating the reference results. It is now time + // to check the results. -int check_uchar( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_uchar *t = (const cl_uchar*)test; - const cl_uchar *c = (const cl_uchar*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + // verify results + void *mapped = info->p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uchar)0)) + // Patch up NaNs conversions to integer to zero -- these can be converted to + // any integer + if (outType != kfloat && outType != kdouble) + { + if (inType == kfloat) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + float *inp = (float *)gIn; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } } - - return 0; -} - -int check_char( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_char *t = (const cl_char*)test; - const cl_char *c = (const cl_char*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_char)0)) + if (inType == kdouble) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + double *inp = (double *)gIn; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } } + } + else if (inType == kfloat || inType == kdouble) + { // outtype and intype is float or double. NaN conversions for float <-> + // double can be any NaN + if (inType == kfloat && outType == kdouble) + { + float *inp = (float *)gIn; + double *outp = (double *)mapped; + for (j = 0; j < count; j++) + { + if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; + } + } + if (inType == kdouble && outType == kfloat) + { + double *inp = (double *)gIn; + float *outp = (float *)mapped; + for (j = 0; j < count; j++) + { + if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; + } + } + } - return 0; -} + if (memcmp(mapped, gRef, count * gTypeSizes[outType])) + info->result = + info->check_result(mapped, count, vectorSizes[vectorSize]); + else + info->result = 0; -int check_ushort( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ushort *t = (const cl_ushort*)test; - const cl_ushort *c = (const cl_ushort*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + // Fill the output buffer with junk and release it + { + cl_uint pattern = 0xffffdead; + memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]); + if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize], + mapped, 0, NULL, NULL))) + { + vlog_error("ERROR: clEnqueueUnmapMemObject failed in " + "CalcReferenceValuesComplete (%d)\n", + error); + gFailCount++; + } + } - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ushort)0)) + if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1)) + { + if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + vlog_error("ERROR: clSetUserEventStatus failed in " + "CalcReferenceValuesComplete (err: %d). We're probably " + "going to deadlock.\n", + status); + gFailCount++; + return; } - return 0; + if ((status = clReleaseEvent(doneBarrier))) + { + vlog_error("ERROR: clReleaseEvent failed in " + "CalcReferenceValuesComplete (err: %d).\n", + status); + gFailCount++; + return; + } + } + // e was already released by WriteInputBufferComplete. It should be + // destroyed automatically soon after all the calls to + // CalcReferenceValuesComplete exit. } -int check_short( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_short *t = (const cl_short*)test; - const cl_short *c = (const cl_short*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; +// - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_short)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } +namespace conv_test { - return 0; -} +//////////////////////////////////////////////////////////////////////////////// -int check_uint( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) +cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) { - const cl_uint *t = (const cl_uint*)test; - const cl_uint *c = (const cl_uint*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + DataInitBase *info = (DataInitBase *)p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uint)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } + info->init(job_id, thread_id); - return 0; + return CL_SUCCESS; } -int check_int( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) +//////////////////////////////////////////////////////////////////////////////// + +cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) { - const cl_int *t = (const cl_int*)test; - const cl_int *c = (const cl_int*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + DataInitBase *info = (DataInitBase *)p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_int)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } + cl_uint count = info->size; + Type inType = info->inType; + Type outType = info->outType; + RoundingMode round = info->round; + size_t j; - return 0; -} + Force64BitFPUPrecision(); -int check_ulong( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ulong *t = (const cl_ulong*)test; - const cl_ulong *c = (const cl_ulong*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType]; + void *a = (cl_uchar *)gAllowZ + job_id * count; + void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ulong)0)) + + if (outType != inType) + { + // create the reference while we wait +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) + /* ARM VFP doesn't have hardware instruction for converting from 64-bit + * integer to float types, hence GCC ARM uses the floating-point + * emulation code despite which -mfloat-abi setting it is. But the + * emulation code in libgcc.a has only one rounding mode (round to + * nearest even in this case) and ignores the user rounding mode setting + * in hardware. As a result setting rounding modes in hardware won't + * give correct rounding results for type covert from 64-bit integer to + * float using GCC for ARM compiler so for testing different rounding + * modes, we need to use alternative reference function. ARM64 does have + * an instruction, however we cannot guarantee the compiler will use it. + * On all ARM architechures use emulation to calculate reference.*/ + switch (round) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] ); - return i + 1; + /* conversions to floating-point type use the current rounding mode. + * The only default floating-point rounding mode supported is round + * to nearest even i.e the current rounding mode will be _rte for + * floating-point types. */ + case kDefaultRoundingMode: qcom_rm = qcomRTE; break; + case kRoundToNearestEven: qcom_rm = qcomRTE; break; + case kRoundUp: qcom_rm = qcomRTP; break; + case kRoundDown: qcom_rm = qcomRTN; break; + case kRoundTowardZero: qcom_rm = qcomRTZ; break; + default: + vlog_error("ERROR: undefined rounding mode %d\n", round); + break; } + qcom_sat = info->sat; +#endif - return 0; -} + RoundingMode oldRound = set_round(round, outType); -int check_long( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_long *t = (const cl_long*)test; - const cl_long *c = (const cl_long*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + if (info->sat) + info->conv_array_sat(d, s, count); + else + info->conv_array(d, s, count); - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_long)0)) + set_round(oldRound, outType); + + // Decide if we allow a zero result in addition to the correctly rounded + // one + memset(a, 0, count); + if (gForceFTZ) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] ); - return i + 1; + if (inType == kfloat || outType == kfloat) + setAllowZ((uint8_t *)a, (uint32_t *)s, count); } + } + else + { + // Copy the input to the reference + memcpy(d, s, info->size * gTypeSizes[inType]); + } - return 0; -} - -int check_float( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_uint *t = (const cl_uint*)test; - const cl_uint *c = (const cl_uint*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if (t[i] != c[i] && - // Allow nan's to be binary different - !((t[i] & 0x7fffffffU) > 0x7f800000U && - (c[i] & 0x7fffffffU) > 0x7f800000U) && - !(a[i] != (cl_uchar)0 && - t[i] == (c[i] & 0x80000000U))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", - vectorSize, i, ((float*)correct)[i], ((float*)test)[i] ); - return i + 1; + // Patch up NaNs conversions to integer to zero -- these can be converted to + // any integer + if (info->outType != kfloat && info->outType != kdouble) + { + if (inType == kfloat) + { + float *inp = (float *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)d + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + if (inType == kdouble) + { + double *inp = (double *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)d + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + } + else if (inType == kfloat || inType == kdouble) + { // outtype and intype is float or double. NaN conversions for float <-> + // double can be any NaN + if (inType == kfloat && outType == kdouble) + { + float *inp = (float *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) ((double *)d)[j] = NAN; + } } + if (inType == kdouble && outType == kfloat) + { + double *inp = (double *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) ((float *)d)[j] = NAN; + } + } + } - return 0; + return CL_SUCCESS; } -int check_double( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ulong *t = (const cl_ulong*)test; - const cl_ulong *c = (const cl_ulong*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if (t[i] != c[i] && - // Allow nan's to be binary different - !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL && - (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) && - !(a[i] != (cl_uchar)0 && - t[i] == (c[i] & 0x8000000000000000ULL))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", - vectorSize, i, ((double*)correct)[i], ((double*)test)[i] ); - return i + 1; - } +//////////////////////////////////////////////////////////////////////////////// +uint64_t GetTime(void) +{ +#if defined(__APPLE__) + return mach_absolute_time(); +#elif defined(_MSC_VER) + return ReadTime(); +#else + // mach_absolute_time is a high precision timer with precision < 1 + // microsecond. +#warning need accurate clock here. Times are invalid. return 0; +#endif } +//////////////////////////////////////////////////////////////////////////////// -void init_uchar( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) +// Note: not called reentrantly +void WriteInputBufferComplete(void *data) { - cl_uchar *o = (cl_uchar *)out; - int i; + cl_int status; + WriteInputBufferInfo *info = (WriteInputBufferInfo *)data; + cl_uint count = info->count; + int vectorSize; - for( i = 0; i < count; i++ ) - o[i] = start++; -} + info->barrierCount = gMaxVectorSize - gMinVectorSize; -void init_char( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) -{ - char *o = (char *)out; - int i; + // now that we know that the write buffer is complete, enqueue callbacks to + // wait for the main thread to finish calculating the reference results. + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + size_t workItemCount = + (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]); - for( i = 0; i < count; i++ ) - o[i] = start++; -} + if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel, + gInBuffer, gOutBuffers[vectorSize], + workItemCount))) + { + gFailCount++; + return; + } -void init_ushort( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) -{ - cl_ushort *o = (cl_ushort *)out; - int i; + info->calcInfo[vectorSize]->p = clEnqueueMapBuffer( + gQueue, gOutBuffers[vectorSize], CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0, + NULL, NULL, &status); + { + if (status) + { + vlog_error("ERROR: WriteInputBufferComplete calback failed " + "with status: %d\n", + status); + gFailCount++; + return; + } + } + } - for( i = 0; i < count; i++ ) - o[i] = start++; -} + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + MapResultValuesComplete(info->calcInfo[vectorSize]); + } -void init_short( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, UNUSED Type destType, uint64_t start, int count, MTdata UNUSED d ) -{ - short *o = (short *)out; - int i; + // Make sure the work starts moving -- otherwise we may deadlock + if ((status = clFlush(gQueue))) + { + vlog_error( + "ERROR: WriteInputBufferComplete calback failed with status: %d\n", + status); + gFailCount++; + return; + } - for( i = 0; i < count; i++ ) - o[i] = start++; + // e was already released by the main thread. It should be destroyed + // automatically soon after we exit. } -void init_uint( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) +//////////////////////////////////////////////////////////////////////////////// + +cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, + RoundingMode round, int vectorSize, cl_kernel *outKernel) { - static const unsigned int specialValuesUInt[] = { - INT_MIN, INT_MIN + 1, INT_MIN + 2, - -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3, - -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3, - -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3, - -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3, - -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3, - -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3, - -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3, - -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3, - -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3, - -4, -3, -2, -1, 0, 1, 2, 3, 4, - (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3, - (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3, - (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3, - (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3, - (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3, - (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3, - (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3, - (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3, - (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3, - INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above - UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX - }; - - cl_uint *o = (cl_uint *)out; - int i; + cl_program program; + char testName[256]; + int error = 0; - for( i = 0; i < count; i++) { - if( gIsEmbedded ) - o[i] = (cl_uint) genrand_int32(d); - else - o[i] = (cl_uint)i + start; - } + std::ostringstream source; + if (outType == kdouble || inType == kdouble) + source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - if( 0 == start ) + // Create the program. This is a bit complicated because we are trying to + // avoid byte and short stores. + if (0 == vectorSize) { - size_t tableSize = sizeof( specialValuesUInt ); - if( sizeof( cl_uint) * count < tableSize ) - tableSize = sizeof( cl_uint) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesUInt, tableSize ); + // Create the type names. + char inName[32]; + char outName[32]; + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + sprintf(testName, "test_implicit_%s_%s", outName, inName); + + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = src[i];\n"; + source << "}\n"; + + vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], + gTypeNames[outType]); + fflush(stdout); } -} + else + { + int vectorSizetmp = vectorSizes[vectorSize]; -void init_int( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) -{ - static const unsigned int specialValuesInt[] = { - INT_MIN, INT_MIN + 1, INT_MIN + 2, - -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3, - -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3, - -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3, - -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3, - -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3, - -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3, - -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3, - -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3, - -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3, - -4, -3, -2, -1, 0, 1, 2, 3, 4, - (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3, - (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3, - (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3, - (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3, - (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3, - (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3, - (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3, - (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3, - (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3, - INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above - UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX - }; - - int *o = (int *)out; - int i; + // Create the type names. + char convertString[128]; + char inName[32]; + char outName[32]; + switch (vectorSizetmp) + { + case 1: + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + snprintf(convertString, sizeof(convertString), "convert_%s%s%s", + outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s", convertString, inName); + vlog("Building %s( %s ) test\n", convertString, inName); + break; + case 3: + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + snprintf(convertString, sizeof(convertString), + "convert_%s3%s%s", outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s3", convertString, inName); + vlog("Building %s( %s3 ) test\n", convertString, inName); + break; + default: + snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], + vectorSizetmp); + snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], + vectorSizetmp); + snprintf(convertString, sizeof(convertString), "convert_%s%s%s", + outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s", convertString, inName); + vlog("Building %s( %s ) test\n", convertString, inName); + break; + } + fflush(stdout); - for( i = 0; i < count; i++ ) { - if( gIsEmbedded ) { - o[i] = (int) genrand_int32(d); - } - else { - o[i] = (int) i + start; - } + if (vectorSizetmp == 3) + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " if( i + 1 < get_global_size(0))\n"; + source << " vstore3( " << convertString + << "( vload3( i, src)), i, dest );\n"; + source << " else\n"; + source << " {\n"; + source << " " << inName << "3 in;\n"; + source << " " << outName << "3 out;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " in.y = src[3*i+1];\n"; + source << " in.x = src[3*i];\n"; + source << " out = " << convertString << "( in ); \n"; + source << " dest[3*i] = out.x;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " dest[3*i+1] = out.y;\n"; + source << " }\n"; + source << "}\n"; + } + else + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = " << convertString << "( src[i] );\n"; + source << "}\n"; + } } + *outKernel = NULL; - if( 0 == start ) + const char *flags = NULL; + if (gForceFTZ) flags = "-cl-denorms-are-zero"; + + // build it + std::string sourceString = source.str(); + const char *programSource = sourceString.c_str(); + error = create_single_kernel_helper(gContext, &program, outKernel, 1, + &programSource, testName, flags); + if (error) { - size_t tableSize = sizeof( specialValuesInt ); - if( sizeof( int) * count < tableSize ) - tableSize = sizeof( int) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesInt, tableSize ); + vlog_error("Failed to build kernel/program (err = %d).\n", error); + return NULL; } + + return program; } -void init_float( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d ) +// + +int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount) { - static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f - }; - - cl_uint *o = (cl_uint *)out; - int i; + // The global dimensions are just the blockCount to execute since we haven't + // set up multiple queues for multiple devices. + int error; - for( i = 0; i < count; i++ ) { - if( gIsEmbedded ) - o[i] = (cl_uint) genrand_int32(d); - else - o[i] = (cl_uint) i + start; - } + error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf); + error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf); - if( 0 == start ) + if (error) { - size_t tableSize = sizeof( specialValuesFloat ); - if( sizeof( float) * count < tableSize ) - tableSize = sizeof( float) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesFloat, tableSize ); + vlog_error("FAILED -- could not set kernel args (%d)\n", error); + return error; } - if( kUnsaturated == sat ) + if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, + NULL, 0, NULL, NULL))) { - clampf func = gClampFloat[ destType ][round]; - float *f = (float *)out; - - for( i = 0; i < count; i++ ) - f[i] = func( f[i] ); + vlog_error("FAILED -- could not execute kernel (%d)\n", error); + return error; } -} - -// used to convert a bucket of bits into a search pattern through double -static inline double DoubleFromUInt32( uint32_t bits ); -static inline double DoubleFromUInt32( uint32_t bits ) -{ - union{ uint64_t u; double d;} u; - // split 0x89abcdef to 0x89abc00000000def - u.u = bits & 0xfffU; - u.u |= (uint64_t) (bits & ~0xfffU) << 32; - - // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s - u.u -= (bits & 0x800U) << 1; - - // return result - return u.d; + return 0; } -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8), - MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), - MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, - - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), - MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), - MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), - MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), - MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), -}; - -void init_double( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata UNUSED d ) +int GetTestCase(const char *name, Type *outType, Type *inType, + SaturationMode *sat, RoundingMode *round) { - double *o = (double*)out; int i; - for( i = 0; i < count; i++ ) - { - uint64_t z = i + start; - o[i] = DoubleFromUInt32( (uint32_t) z ^ (uint32_t) (z >> 32)); - } + // Find the return type + for (i = 0; i < kTypeCount; i++) + if (name == strstr(name, gTypeNames[i])) + { + *outType = (Type)i; + name += strlen(gTypeNames[i]); - if( 0 == start ) - { - size_t tableSize = sizeof( specialValuesDouble ); - if( sizeof( cl_double) * count < tableSize ) - tableSize = sizeof( cl_double) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesDouble, tableSize ); - } + break; + } - if( 0 == sat ) - { - clampd func = gClampDouble[ destType ][round]; + if (i == kTypeCount) return -1; - for( i = 0; i < count; i++ ) - o[i] = func( o[i] ); - } -} + // Check to see if _sat appears next + *sat = (SaturationMode)0; + for (i = 1; i < kSaturationModeCount; i++) + if (name == strstr(name, gSaturationNames[i])) + { + *sat = (SaturationMode)i; + name += strlen(gSaturationNames[i]); + break; + } -cl_ulong random64( MTdata d ) -{ - return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); -} + *round = (RoundingMode)0; + for (i = 1; i < kRoundingModeCount; i++) + if (name == strstr(name, gRoundingModeNames[i])) + { + *round = (RoundingMode)i; + name += strlen(gRoundingModeNames[i]); + break; + } -void init_ulong( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) -{ - cl_ulong *o = (cl_ulong *)out; - cl_ulong i, j, k; + if (*name != '_') return -2; + name++; - i = 0; - if( start == 0 ) - { - //Try various powers of two - for( j = 0; j < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[j] = (cl_ulong) 1 << j; - i = j; - - // try the complement of those - for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[i++] = ~((cl_ulong) 1 << j); - - //Try various negative powers of two - for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[i++] = (cl_ulong) 0xFFFFFFFFFFFFFFFEULL << j; - - //try various powers of two plus 1, shifted by various amounts - for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ ) - for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ ) - o[i++] = (((cl_ulong) 1 << j) + 1) << k; - - //try various powers of two minus 1 - for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ ) - for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ ) - o[i++] = (((cl_ulong) 1 << j) - 1) << k; - - // Other patterns - cl_ulong pattern[] = { 0x3333333333333333ULL, 0x5555555555555555ULL, 0x9999999999999999ULL, 0x6666666666666666ULL, 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL }; - cl_ulong mask[] = { 0xffffffffffffffffULL, 0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL, 0xffffffff00000000ULL }; - for( j = 0; i < (cl_ulong) count && j < sizeof(pattern) / sizeof( pattern[0]); j++ ) - for( k = 0; i + 2 <= (cl_ulong) count && k < sizeof(mask) / sizeof( mask[0]); k++ ) - { - o[i++] = pattern[j] & mask[k]; - o[i++] = pattern[j] & ~mask[k]; - } - } + for (i = 0; i < kTypeCount; i++) + if (name == strstr(name, gTypeNames[i])) + { + *inType = (Type)i; + name += strlen(gTypeNames[i]); - for( ; i < (cl_ulong) count; i++ ) - o[i] = random64(d); -} + break; + } -void init_long( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d ) -{ - init_ulong( out, sat, round, destType, start, count, d ); -} + if (i == kTypeCount) return -3; -// ====== - -void uchar2uchar_many( void *out, void *in, size_t n); -void uchar2uchar_sat_many( void *out, void *in, size_t n); -void char2uchar_many( void *out, void *in, size_t n); -void char2uchar_sat_many( void *out, void *in, size_t n); -void ushort2uchar_many( void *out, void *in, size_t n); -void ushort2uchar_sat_many( void *out, void *in, size_t n); -void short2uchar_many( void *out, void *in, size_t n); -void short2uchar_sat_many( void *out, void *in, size_t n); -void uint2uchar_many( void *out, void *in, size_t n); -void uint2uchar_sat_many( void *out, void *in, size_t n); -void int2uchar_many( void *out, void *in, size_t n); -void int2uchar_sat_many( void *out, void *in, size_t n); -void float2uchar_many( void *out, void *in, size_t n); -void float2uchar_sat_many( void *out, void *in, size_t n); -void double2uchar_many( void *out, void *in, size_t n); -void double2uchar_sat_many( void *out, void *in, size_t n); -void ulong2uchar_many( void *out, void *in, size_t n); -void ulong2uchar_sat_many( void *out, void *in, size_t n); -void long2uchar_many( void *out, void *in, size_t n); -void long2uchar_sat_many( void *out, void *in, size_t n); -void uchar2char_many( void *out, void *in, size_t n); -void uchar2char_sat_many( void *out, void *in, size_t n); -void char2char_many( void *out, void *in, size_t n); -void char2char_sat_many( void *out, void *in, size_t n); -void ushort2char_many( void *out, void *in, size_t n); -void ushort2char_sat_many( void *out, void *in, size_t n); -void short2char_many( void *out, void *in, size_t n); -void short2char_sat_many( void *out, void *in, size_t n); -void uint2char_many( void *out, void *in, size_t n); -void uint2char_sat_many( void *out, void *in, size_t n); -void int2char_many( void *out, void *in, size_t n); -void int2char_sat_many( void *out, void *in, size_t n); -void float2char_many( void *out, void *in, size_t n); -void float2char_sat_many( void *out, void *in, size_t n); -void double2char_many( void *out, void *in, size_t n); -void double2char_sat_many( void *out, void *in, size_t n); -void ulong2char_many( void *out, void *in, size_t n); -void ulong2char_sat_many( void *out, void *in, size_t n); -void long2char_many( void *out, void *in, size_t n); -void long2char_sat_many( void *out, void *in, size_t n); -void uchar2ushort_many( void *out, void *in, size_t n); -void uchar2ushort_sat_many( void *out, void *in, size_t n); -void char2ushort_many( void *out, void *in, size_t n); -void char2ushort_sat_many( void *out, void *in, size_t n); -void ushort2ushort_many( void *out, void *in, size_t n); -void ushort2ushort_sat_many( void *out, void *in, size_t n); -void short2ushort_many( void *out, void *in, size_t n); -void short2ushort_sat_many( void *out, void *in, size_t n); -void uint2ushort_many( void *out, void *in, size_t n); -void uint2ushort_sat_many( void *out, void *in, size_t n); -void int2ushort_many( void *out, void *in, size_t n); -void int2ushort_sat_many( void *out, void *in, size_t n); -void float2ushort_many( void *out, void *in, size_t n); -void float2ushort_sat_many( void *out, void *in, size_t n); -void double2ushort_many( void *out, void *in, size_t n); -void double2ushort_sat_many( void *out, void *in, size_t n); -void ulong2ushort_many( void *out, void *in, size_t n); -void ulong2ushort_sat_many( void *out, void *in, size_t n); -void long2ushort_many( void *out, void *in, size_t n); -void long2ushort_sat_many( void *out, void *in, size_t n); -void uchar2short_many( void *out, void *in, size_t n); -void uchar2short_sat_many( void *out, void *in, size_t n); -void char2short_many( void *out, void *in, size_t n); -void char2short_sat_many( void *out, void *in, size_t n); -void ushort2short_many( void *out, void *in, size_t n); -void ushort2short_sat_many( void *out, void *in, size_t n); -void short2short_many( void *out, void *in, size_t n); -void short2short_sat_many( void *out, void *in, size_t n); -void uint2short_many( void *out, void *in, size_t n); -void uint2short_sat_many( void *out, void *in, size_t n); -void int2short_many( void *out, void *in, size_t n); -void int2short_sat_many( void *out, void *in, size_t n); -void float2short_many( void *out, void *in, size_t n); -void float2short_sat_many( void *out, void *in, size_t n); -void double2short_many( void *out, void *in, size_t n); -void double2short_sat_many( void *out, void *in, size_t n); -void ulong2short_many( void *out, void *in, size_t n); -void ulong2short_sat_many( void *out, void *in, size_t n); -void long2short_many( void *out, void *in, size_t n); -void long2short_sat_many( void *out, void *in, size_t n); -void uchar2uint_many( void *out, void *in, size_t n); -void uchar2uint_sat_many( void *out, void *in, size_t n); -void char2uint_many( void *out, void *in, size_t n); -void char2uint_sat_many( void *out, void *in, size_t n); -void ushort2uint_many( void *out, void *in, size_t n); -void ushort2uint_sat_many( void *out, void *in, size_t n); -void short2uint_many( void *out, void *in, size_t n); -void short2uint_sat_many( void *out, void *in, size_t n); -void uint2uint_many( void *out, void *in, size_t n); -void uint2uint_sat_many( void *out, void *in, size_t n); -void int2uint_many( void *out, void *in, size_t n); -void int2uint_sat_many( void *out, void *in, size_t n); -void float2uint_many( void *out, void *in, size_t n); -void float2uint_sat_many( void *out, void *in, size_t n); -void double2uint_many( void *out, void *in, size_t n); -void double2uint_sat_many( void *out, void *in, size_t n); -void ulong2uint_many( void *out, void *in, size_t n); -void ulong2uint_sat_many( void *out, void *in, size_t n); -void long2uint_many( void *out, void *in, size_t n); -void long2uint_sat_many( void *out, void *in, size_t n); -void uchar2int_many( void *out, void *in, size_t n); -void uchar2int_sat_many( void *out, void *in, size_t n); -void char2int_many( void *out, void *in, size_t n); -void char2int_sat_many( void *out, void *in, size_t n); -void ushort2int_many( void *out, void *in, size_t n); -void ushort2int_sat_many( void *out, void *in, size_t n); -void short2int_many( void *out, void *in, size_t n); -void short2int_sat_many( void *out, void *in, size_t n); -void uint2int_many( void *out, void *in, size_t n); -void uint2int_sat_many( void *out, void *in, size_t n); -void int2int_many( void *out, void *in, size_t n); -void int2int_sat_many( void *out, void *in, size_t n); -void float2int_many( void *out, void *in, size_t n); -void float2int_sat_many( void *out, void *in, size_t n); -void double2int_many( void *out, void *in, size_t n); -void double2int_sat_many( void *out, void *in, size_t n); -void ulong2int_many( void *out, void *in, size_t n); -void ulong2int_sat_many( void *out, void *in, size_t n); -void long2int_many( void *out, void *in, size_t n); -void long2int_sat_many( void *out, void *in, size_t n); -void uchar2float_many( void *out, void *in, size_t n); -void uchar2float_sat_many( void *out, void *in, size_t n); -void char2float_many( void *out, void *in, size_t n); -void char2float_sat_many( void *out, void *in, size_t n); -void ushort2float_many( void *out, void *in, size_t n); -void ushort2float_sat_many( void *out, void *in, size_t n); -void short2float_many( void *out, void *in, size_t n); -void short2float_sat_many( void *out, void *in, size_t n); -void uint2float_many( void *out, void *in, size_t n); -void uint2float_sat_many( void *out, void *in, size_t n); -void int2float_many( void *out, void *in, size_t n); -void int2float_sat_many( void *out, void *in, size_t n); -void float2float_many( void *out, void *in, size_t n); -void float2float_sat_many( void *out, void *in, size_t n); -void double2float_many( void *out, void *in, size_t n); -void double2float_sat_many( void *out, void *in, size_t n); -void ulong2float_many( void *out, void *in, size_t n); -void ulong2float_sat_many( void *out, void *in, size_t n); -void long2float_many( void *out, void *in, size_t n); -void long2float_sat_many( void *out, void *in, size_t n); -void uchar2double_many( void *out, void *in, size_t n); -void uchar2double_sat_many( void *out, void *in, size_t n); -void char2double_many( void *out, void *in, size_t n); -void char2double_sat_many( void *out, void *in, size_t n); -void ushort2double_many( void *out, void *in, size_t n); -void ushort2double_sat_many( void *out, void *in, size_t n); -void short2double_many( void *out, void *in, size_t n); -void short2double_sat_many( void *out, void *in, size_t n); -void uint2double_many( void *out, void *in, size_t n); -void uint2double_sat_many( void *out, void *in, size_t n); -void int2double_many( void *out, void *in, size_t n); -void int2double_sat_many( void *out, void *in, size_t n); -void float2double_many( void *out, void *in, size_t n); -void float2double_sat_many( void *out, void *in, size_t n); -void double2double_many( void *out, void *in, size_t n); -void double2double_sat_many( void *out, void *in, size_t n); -void ulong2double_many( void *out, void *in, size_t n); -void ulong2double_sat_many( void *out, void *in, size_t n); -void long2double_many( void *out, void *in, size_t n); -void long2double_sat_many( void *out, void *in, size_t n); -void uchar2ulong_many( void *out, void *in, size_t n); -void uchar2ulong_sat_many( void *out, void *in, size_t n); -void char2ulong_many( void *out, void *in, size_t n); -void char2ulong_sat_many( void *out, void *in, size_t n); -void ushort2ulong_many( void *out, void *in, size_t n); -void ushort2ulong_sat_many( void *out, void *in, size_t n); -void short2ulong_many( void *out, void *in, size_t n); -void short2ulong_sat_many( void *out, void *in, size_t n); -void uint2ulong_many( void *out, void *in, size_t n); -void uint2ulong_sat_many( void *out, void *in, size_t n); -void int2ulong_many( void *out, void *in, size_t n); -void int2ulong_sat_many( void *out, void *in, size_t n); -void float2ulong_many( void *out, void *in, size_t n); -void float2ulong_sat_many( void *out, void *in, size_t n); -void double2ulong_many( void *out, void *in, size_t n); -void double2ulong_sat_many( void *out, void *in, size_t n); -void ulong2ulong_many( void *out, void *in, size_t n); -void ulong2ulong_sat_many( void *out, void *in, size_t n); -void long2ulong_many( void *out, void *in, size_t n); -void long2ulong_sat_many( void *out, void *in, size_t n); -void uchar2long_many( void *out, void *in, size_t n); -void uchar2long_sat_many( void *out, void *in, size_t n); -void char2long_many( void *out, void *in, size_t n); -void char2long_sat_many( void *out, void *in, size_t n); -void ushort2long_many( void *out, void *in, size_t n); -void ushort2long_sat_many( void *out, void *in, size_t n); -void short2long_many( void *out, void *in, size_t n); -void short2long_sat_many( void *out, void *in, size_t n); -void uint2long_many( void *out, void *in, size_t n); -void uint2long_sat_many( void *out, void *in, size_t n); -void int2long_many( void *out, void *in, size_t n); -void int2long_sat_many( void *out, void *in, size_t n); -void float2long_many( void *out, void *in, size_t n); -void float2long_sat_many( void *out, void *in, size_t n); -void double2long_many( void *out, void *in, size_t n); -void double2long_sat_many( void *out, void *in, size_t n); -void ulong2long_many( void *out, void *in, size_t n); -void ulong2long_sat_many( void *out, void *in, size_t n); -void long2long_many( void *out, void *in, size_t n); -void long2long_sat_many( void *out, void *in, size_t n); - -void uchar2uchar_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); } -void uchar2uchar_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); } -void char2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }} -void char2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }} -void ushort2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }} -void short2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }} -void short2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }} -void uint2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }} -void uint2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }} -void int2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }} -void int2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }} -void float2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }} -void float2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }} -void double2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }} -void double2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }} -void ulong2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }} -void long2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }} -void long2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }} -void uchar2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }} -void char2char_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); } -void char2char_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); } -void ushort2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }} -void short2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }} -void short2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }} -void uint2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }} -void uint2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }} -void int2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }} -void int2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }} -void float2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }} -void float2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }} -void double2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }} -void double2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }} -void ulong2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }} -void long2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }} -void long2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }} -void uchar2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }} -void char2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }} -void char2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }} -void ushort2ushort_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); } -void ushort2ushort_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); } -void short2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }} -void short2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }} -void uint2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }} -void uint2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }} -void int2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }} -void int2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }} -void float2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }} -void float2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }} -void double2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }} -void double2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }} -void ulong2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }} -void long2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }} -void long2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }} -void uchar2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }} -void char2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }} -void char2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }} -void ushort2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }} -void short2short_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); } -void short2short_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); } -void uint2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }} -void uint2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }} -void int2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }} -void int2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }} -void float2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }} -void float2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }} -void double2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }} -void double2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }} -void ulong2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }} -void long2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }} -void long2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }} -void uchar2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }} -void char2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }} -void char2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }} -void ushort2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }} -void short2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }} -void short2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }} -void uint2uint_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); } -void uint2uint_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); } -void int2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }} -void int2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }} -void float2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }} -void float2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }} -void double2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }} -void double2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }} -void ulong2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }} -void long2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }} -void long2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }} -void uchar2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }} -void char2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }} -void char2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }} -void ushort2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }} -void short2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }} -void short2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }} -void uint2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }} -void uint2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }} -void int2int_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); } -void int2int_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); } -void float2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }} -void float2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }} -void double2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }} -void double2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }} -void ulong2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }} -void long2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }} -void long2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }} -void uchar2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }} -void char2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }} -void char2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }} -void ushort2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }} -void short2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }} -void short2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }} -void uint2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }} -void uint2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }} -void int2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }} -void int2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }} -void float2float_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); } -void float2float_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); } -void double2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }} -void double2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }} -void ulong2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }} -void long2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }} -void long2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }} -void uchar2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }} -void char2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }} -void char2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }} -void ushort2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }} -void short2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }} -void short2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }} -void uint2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }} -void uint2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }} -void int2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }} -void int2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }} -void float2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }} -void float2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }} -void double2double_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); } -void double2double_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); } -void ulong2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }} -void long2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }} -void long2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }} -void uchar2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }} -void char2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }} -void char2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }} -void ushort2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }} -void short2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }} -void short2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }} -void uint2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }} -void uint2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }} -void int2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }} -void int2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }} -void float2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }} -void float2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }} -void double2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }} -void double2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }} -void ulong2ulong_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); } -void ulong2ulong_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); } -void long2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }} -void long2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }} -void uchar2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }} -void char2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }} -void char2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }} -void ushort2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }} -void short2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }} -void short2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }} -void uint2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }} -void uint2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }} -void int2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }} -void int2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }} -void float2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }} -void float2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }} -void double2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }} -void double2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }} -void ulong2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }} -void long2long_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); } -void long2long_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); } - -Convert gSaturatedConversions[kTypeCount][kTypeCount] = { - { uchar2uchar_sat_many, char2uchar_sat_many, ushort2uchar_sat_many, short2uchar_sat_many, uint2uchar_sat_many, int2uchar_sat_many, float2uchar_sat_many, double2uchar_sat_many, ulong2uchar_sat_many, long2uchar_sat_many, }, - { uchar2char_sat_many, char2char_sat_many, ushort2char_sat_many, short2char_sat_many, uint2char_sat_many, int2char_sat_many, float2char_sat_many, double2char_sat_many, ulong2char_sat_many, long2char_sat_many, }, - { uchar2ushort_sat_many, char2ushort_sat_many, ushort2ushort_sat_many, short2ushort_sat_many, uint2ushort_sat_many, int2ushort_sat_many, float2ushort_sat_many, double2ushort_sat_many, ulong2ushort_sat_many, long2ushort_sat_many, }, - { uchar2short_sat_many, char2short_sat_many, ushort2short_sat_many, short2short_sat_many, uint2short_sat_many, int2short_sat_many, float2short_sat_many, double2short_sat_many, ulong2short_sat_many, long2short_sat_many, }, - { uchar2uint_sat_many, char2uint_sat_many, ushort2uint_sat_many, short2uint_sat_many, uint2uint_sat_many, int2uint_sat_many, float2uint_sat_many, double2uint_sat_many, ulong2uint_sat_many, long2uint_sat_many, }, - { uchar2int_sat_many, char2int_sat_many, ushort2int_sat_many, short2int_sat_many, uint2int_sat_many, int2int_sat_many, float2int_sat_many, double2int_sat_many, ulong2int_sat_many,long2int_sat_many, }, - { uchar2float_sat_many, char2float_sat_many, ushort2float_sat_many, short2float_sat_many, uint2float_sat_many, int2float_sat_many, float2float_sat_many, double2float_sat_many, ulong2float_sat_many, long2float_sat_many, }, - { uchar2double_sat_many, char2double_sat_many, ushort2double_sat_many, short2double_sat_many, uint2double_sat_many, int2double_sat_many, float2double_sat_many, double2double_sat_many, ulong2double_sat_many, long2double_sat_many, }, - { uchar2ulong_sat_many, char2ulong_sat_many, ushort2ulong_sat_many, short2ulong_sat_many, uint2ulong_sat_many, int2ulong_sat_many, float2ulong_sat_many, double2ulong_sat_many, ulong2ulong_sat_many, long2ulong_sat_many, }, - { uchar2long_sat_many, char2long_sat_many, ushort2long_sat_many, short2long_sat_many, uint2long_sat_many, int2long_sat_many, float2long_sat_many, double2long_sat_many, ulong2long_sat_many, long2long_sat_many, }, -}; + if (*name != '\0') return -4; -Convert gConversions[kTypeCount][kTypeCount] = { - { uchar2uchar_many, char2uchar_many, ushort2uchar_many, short2uchar_many, uint2uchar_many, int2uchar_many, float2uchar_many, double2uchar_many, ulong2uchar_many, long2uchar_many, }, - { uchar2char_many, char2char_many, ushort2char_many, short2char_many, uint2char_many, int2char_many, float2char_many, double2char_many, ulong2char_many, long2char_many, }, - { uchar2ushort_many, char2ushort_many, ushort2ushort_many, short2ushort_many, uint2ushort_many, int2ushort_many, float2ushort_many, double2ushort_many, ulong2ushort_many, long2ushort_many, }, - { uchar2short_many, char2short_many, ushort2short_many, short2short_many, uint2short_many, int2short_many, float2short_many, double2short_many, ulong2short_many, long2short_many, }, - { uchar2uint_many, char2uint_many, ushort2uint_many, short2uint_many, uint2uint_many, int2uint_many, float2uint_many, double2uint_many, ulong2uint_many, long2uint_many, }, - { uchar2int_many, char2int_many, ushort2int_many, short2int_many, uint2int_many, int2int_many, float2int_many, double2int_many, ulong2int_many, long2int_many, }, - { uchar2float_many, char2float_many, ushort2float_many, short2float_many, uint2float_many, int2float_many, float2float_many, double2float_many, ulong2float_many, long2float_many, }, - { uchar2double_many, char2double_many, ushort2double_many, short2double_many, uint2double_many, int2double_many, float2double_many, double2double_many, ulong2double_many, long2double_many, }, - { uchar2ulong_many, char2ulong_many, ushort2ulong_many, short2ulong_many, uint2ulong_many, int2ulong_many, float2ulong_many, double2ulong_many, ulong2ulong_many, long2ulong_many, }, - { uchar2long_many, char2long_many, ushort2long_many, short2long_many, uint2long_many, int2long_many, float2long_many, double2long_many, ulong2long_many, long2long_many, }, -}; + return 0; +} + +} // namespace conv_test diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index ab887afd..2314ee74 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,8 +16,6 @@ #ifndef BASIC_TEST_CONVERSIONS_H #define BASIC_TEST_CONVERSIONS_H -#include "harness/compat.h" - #if !defined(_WIN32) #include #endif @@ -33,22 +31,23 @@ #endif #include "harness/mt19937.h" +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" -typedef void (*Convert)( void *dest, void *src, size_t ); +#include +#include +#include -#define kVectorSizeCount 6 -#define kMaxVectorSize 16 +#include "conversions_data_info.h" -typedef enum -{ - kUnsaturated = 0, - kSaturated, +#define kVectorSizeCount 6 +#define kMaxVectorSize 16 +#define kPageSize 4096 - kSaturationModeCount -}SaturationMode; +#define BUFFER_SIZE (1024 * 1024) +#define EMBEDDED_REDUCTION_FACTOR 16 +#define PERF_LOOP_COUNT 100 -extern Convert gConversions[kTypeCount][kTypeCount]; // [dest format][source format] -extern Convert gSaturatedConversions[kTypeCount][kTypeCount]; // [dest format][source format] extern const char *gTypeNames[ kTypeCount ]; extern const char *gRoundingModeNames[ kRoundingModeCount ]; // { "", "_rte", "_rtp", "_rtn", "_rtz" } extern const char *gSaturationNames[ kSaturationModeCount ]; // { "", "_sat" } @@ -68,5 +67,324 @@ extern InitDataFunc gInitFunctions[ kTypeCount ]; typedef int (*CheckResults)( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); extern CheckResults gCheckResults[ kTypeCount ]; +#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */) + +extern MTdata gMTdata; +extern cl_command_queue gQueue; +extern cl_context gContext; +extern cl_mem gInBuffer; +extern cl_mem gOutBuffers[]; +extern int gHasDouble; +extern int gTestDouble; +extern int gWimpyMode; +extern int gWimpyReductionFactor; +extern int gSkipTesting; +extern int gMinVectorSize; +extern int gMaxVectorSize; +extern int gForceFTZ; +extern int gTimeResults; +extern int gReportAverageTimes; +extern int gStartTestNumber; +extern int gEndTestNumber; +extern int gIsRTZ; +extern void *gIn; +extern void *gRef; +extern void *gAllowZ; +extern void *gOut[]; + +extern const char **argList; +extern int argCount; + +extern const char *sizeNames[]; +extern int vectorSizes[]; + +extern size_t gComputeDevices; +extern uint32_t gDeviceFrequency; + +namespace conv_test { + +cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, + RoundingMode round, int vectorSize, + cl_kernel *outKernel); + +int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount); + +int GetTestCase(const char *name, Type *outType, Type *inType, + SaturationMode *sat, RoundingMode *round); + +cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p); +cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); +uint64_t GetTime(void); + +void WriteInputBufferComplete(void *); +void *FlushToZero(void); +void UnFlushToZero(void *); +} + +struct CalcRefValsBase +{ + virtual ~CalcRefValsBase() = default; + virtual int check_result(void *, uint32_t, int) { return 0; } + + // pointer back to the parent WriteInputBufferInfo struct + struct WriteInputBufferInfo *parent; + clKernelWrapper kernel; // the kernel for this vector size + clProgramWrapper program; // the program for this vector size + cl_uint vectorSize; // the vector size for this callback chain + void *p; // the pointer to mapped result data for this vector size + cl_int result; +}; + +template +struct CalcRefValsPat : CalcRefValsBase +{ + int check_result(void *, uint32_t, int) override; +}; + +struct WriteInputBufferInfo +{ + WriteInputBufferInfo() + : calcReferenceValues(nullptr), doneBarrier(nullptr), count(0), + outType(kuchar), inType(kuchar), barrierCount(0) + {} + + volatile cl_event + calcReferenceValues; // user event which signals when main thread is + // done calculating reference values + volatile cl_event + doneBarrier; // user event which signals when worker threads are done + cl_uint count; // the number of elements in the array + Type outType; // the data type of the conversion result + Type inType; // the data type of the conversion input + volatile int barrierCount; + + std::vector> calcInfo; +}; + +// Must be aligned with Type enums! +using TypeIter = std::tuple; + +// Helper test fixture for constructing OpenCL objects used in testing +// a variety of simple command-buffer enqueue scenarios. +struct ConversionsTest +{ + virtual ~ConversionsTest() = default; + + ConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue); + + cl_int SetUp(int elements); + + // Test body returning an OpenCL error code + cl_int Run(); + + template + int DoTest(Type outType, Type inType, SaturationMode sat, + RoundingMode round); + + template + void TestTypesConversion(const Type &inType, const Type &outType, int &tn, + const int smvs); + +protected: + cl_context context; + cl_device_id device; + cl_command_queue queue; + + size_t num_elements; + + TypeIter typeIterator; +}; + +struct CustomConversionsTest : ConversionsTest +{ + CustomConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : ConversionsTest(device, context, queue) + {} + + cl_int Run(); +}; + +template +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + T test_fixture(device, context, queue); + + cl_int error = test_fixture.SetUp(num_elements); + test_error_ret(error, "Error in test initialization", TEST_FAIL); + + return test_fixture.Run(); +} + +struct TestType +{ + template bool testType(Type in) + { + switch (in) + { + default: return false; + case kuchar: return std::is_same::value; + case kchar: return std::is_same::value; + case kushort: return std::is_same::value; + case kshort: return std::is_same::value; + case kuint: return std::is_same::value; + case kint: return std::is_same::value; + case kfloat: return std::is_same::value; + case kdouble: return std::is_same::value; + case kulong: return std::is_same::value; + case klong: return std::is_same::value; + } + } +}; + +// Helper structures to iterate over all tuple attributes of different types +struct IterOverTypes : public TestType +{ + IterOverTypes(const TypeIter &typeIter, ConversionsTest &test) + : inType((Type)0), outType((Type)0), typeIter(typeIter), test(test), + testNumber(-1), startMinVectorSize(gMinVectorSize) + {} + + void Run() { for_each_out_elem(typeIter); } + +protected: + template + void iterate_out_type(const OutType &t) + { + for_each_in_elem<0, Out, OutType>(typeIter); + outType = (Type)(outType + 1); + inType = (Type)0; + } + + template + void iterate_in_type(const InType &t) + { + if (!testType(inType)) vlog_error("Unexpected data type!\n"); + + if (!testType(outType)) vlog_error("Unexpected data type!\n"); + + // run the conversions + test.TestTypesConversion(inType, outType, testNumber, + startMinVectorSize); + inType = (Type)(inType + 1); + } + + template + inline typename std::enable_if::type + for_each_out_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Out::type + for_each_out_elem(const std::tuple &t) + { + iterate_out_type(std::get(t)); + for_each_out_elem(t); + } + + template + inline typename std::enable_if::type + for_each_in_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < In::type + for_each_in_elem(const std::tuple &t) + { + iterate_in_type(std::get(t)); + for_each_in_elem(t); + } + +protected: + Type inType; + Type outType; + const TypeIter &typeIter; + ConversionsTest &test; + int testNumber; + int startMinVectorSize; +}; + + +// Helper structures to select type 2 type conversion test case +struct IterOverSelectedTypes : public TestType +{ + IterOverSelectedTypes(const TypeIter &typeIter, ConversionsTest &test, + const Type in, const Type out, + const RoundingMode round, const SaturationMode sat) + : inType(in), outType(out), rounding(round), saturation(sat), + typeIter(typeIter), test(test), testNumber(-1), + startMinVectorSize(gMinVectorSize) + {} + + void Run() { for_each_out_elem(typeIter); } + +protected: + template + void iterate_out_type(const OutType &t) + { + for_each_in_elem<0, Out, OutType>(typeIter); + } + + template + void iterate_in_type(const InType &t) + { + if (testType(inType) && testType(outType)) + { + // run selected conversion + // testing of the result will happen afterwards + test.DoTest(outType, inType, saturation, rounding); + } + } + + template + inline typename std::enable_if::type + for_each_out_elem(const std::tuple &) + {} + + template + inline typename std::enable_if < Out::type + for_each_out_elem(const std::tuple &t) + { + iterate_out_type(std::get(t)); + for_each_out_elem(t); + } + + template + inline typename std::enable_if::type + for_each_in_elem(const std::tuple &) + {} + + template + inline typename std::enable_if < In::type + for_each_in_elem(const std::tuple &t) + { + iterate_in_type(std::get(t)); + for_each_in_elem(t); + } + +protected: + Type inType; + Type outType; + RoundingMode rounding; + SaturationMode saturation; + + const TypeIter &typeIter; + ConversionsTest &test; + int testNumber; + int startMinVectorSize; +}; + + #endif /* BASIC_TEST_CONVERSIONS_H */ diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h new file mode 100644 index 00000000..b02773b1 --- /dev/null +++ b/test_conformance/conversions/conversions_data_info.h @@ -0,0 +1,781 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef CONVERSIONS_DATA_INFO_H +#define CONVERSIONS_DATA_INFO_H + +#if defined(__APPLE__) +#include +#else +#include +#endif + +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +#include "fplib.h" +extern bool qcom_sat; +extern roundingMode qcom_rm; +#endif + +#include "harness/mt19937.h" +#include "harness/rounding_mode.h" + +#include + +#if defined(__linux__) +#include +#include +#endif + +extern size_t gTypeSizes[kTypeCount]; +extern void *gIn; + + +typedef enum +{ + kUnsaturated = 0, + kSaturated, + + kSaturationModeCount +} SaturationMode; + +struct DataInitInfo +{ + cl_ulong start; + cl_uint size; + Type outType; + Type inType; + SaturationMode sat; + RoundingMode round; + cl_uint threads; + + static std::vector specialValuesUInt; + static std::vector specialValuesFloat; + static std::vector specialValuesDouble; +}; + +struct DataInitBase : public DataInitInfo +{ + virtual ~DataInitBase() = default; + + explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} + virtual void conv_array(void *out, void *in, size_t n) {} + virtual void conv_array_sat(void *out, void *in, size_t n) {} + virtual void init(const cl_uint &, const cl_uint &) {} +}; + +template +struct DataInfoSpec : public DataInitBase +{ + explicit DataInfoSpec(const DataInitInfo &agg); + + // helpers + float round_to_int(float f); + long long round_to_int_and_clamp(double d); + + OutType absolute(const OutType &x); + + // actual conversion of reference values + void conv(OutType *out, InType *in); + void conv_sat(OutType *out, InType *in); + + // min/max ranges for output type of data + std::pair ranges; + + // matrix of clamping ranges for each rounding type + std::vector> clamp_ranges; + + std::vector mdv; + + void conv_array(void *out, void *in, size_t n) override + { + for (size_t i = 0; i < n; i++) + conv(&((OutType *)out)[i], &((InType *)in)[i]); + } + + void conv_array_sat(void *out, void *in, size_t n) override + { + for (size_t i = 0; i < n; i++) + conv_sat(&((OutType *)out)[i], &((InType *)in)[i]); + } + + void init(const cl_uint &, const cl_uint &) override; + InType clamp(const InType &); + inline float fclamp(float lo, float v, float hi) + { + v = v < lo ? lo : v; + return v < hi ? v : hi; + } + + inline double dclamp(double lo, double v, double hi) + { + v = v < lo ? lo : v; + return v < hi ? v : hi; + } +}; + +template +DataInfoSpec::DataInfoSpec(const DataInitInfo &agg) + : DataInitBase(agg), mdv(0) +{ + if (std::is_same::value) + ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_UCHAR_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_USHRT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_UINT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_ULONG_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX); + + InType outMin = ((InType)ranges.first); + InType outMax = ((InType)ranges.second); + + // clang-format off + // for readability sake keep this section unformatted + if (std::is_floating_point::value) + { // from float/double + InType eps = std::is_same::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON; + if (std::is_integral::value) + { // to char/uchar/short/ushort/int/uint/long/ulong + if (sizeof(OutType)<=sizeof(cl_short)) + { // to char/uchar/short/ushort + clamp_ranges= + {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, outMax-1.f}, + {outMin-0.0f, outMax - outMax * 0.5f * eps }, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}}; + } + else if (std::is_same::value) + { // from float + if (std::is_same::value) + { // to uint + clamp_ranges= + { {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) }, + {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}}; + } + else if (std::is_same::value) + { // to int + clamp_ranges= + { {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) }, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}}; + } + else if (std::is_same::value) + { // to ulong + clamp_ranges= + {{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) }, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}}; + } + else if (std::is_same::value) + { // to long + clamp_ranges= + { {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}}; + } + } + else + { // from double + if (std::is_same::value) + { // to uint + clamp_ranges= + { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps}, + {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps}, + {outMin-1.0f+0.5f*eps, outMax}, + {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) }, + {outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}}; + } + else if (std::is_same::value) + { // to int + clamp_ranges= + { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}, + {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}, + {outMin-1.0f+outMax*eps, outMax}, + {outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps }, + {outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}}; + } + else if (std::is_same::value) + { // to ulong + clamp_ranges= + {{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) }, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}}; + } + else if (std::is_same::value) + { // to long + clamp_ranges= + { {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}}; + } + } + } + } + // clang-format on +} + +template +float DataInfoSpec::round_to_int(float f) +{ + static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23), + -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) }; + + // Round fractional values to integer in round towards nearest mode + if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23)) + { + volatile float x = f; + float magicVal = magic[f < 0]; + +#if defined(__SSE__) + // Defeat x87 based arithmetic, which cant do FTZ, and will round this + // incorrectly + __m128 v = _mm_set_ss(x); + __m128 m = _mm_set_ss(magicVal); + v = _mm_add_ss(v, m); + v = _mm_sub_ss(v, m); + _mm_store_ss((float *)&x, v); +#else + x += magicVal; + x -= magicVal; +#endif + f = x; + } + return f; +} + +template +long long DataInfoSpec::round_to_int_and_clamp(double f) +{ + static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), + MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; + + if (f >= -(double)LLONG_MIN) return LLONG_MAX; + + if (f <= (double)LLONG_MIN) return LLONG_MIN; + + // Round fractional values to integer in round towards nearest mode + if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52)) + { + volatile double x = f; + double magicVal = magic[f < 0]; +#if defined(__SSE2__) || defined(_MSC_VER) + // Defeat x87 based arithmetic, which cant do FTZ, and will round this + // incorrectly + __m128d v = _mm_set_sd(x); + __m128d m = _mm_set_sd(magicVal); + v = _mm_add_sd(v, m); + v = _mm_sub_sd(v, m); + _mm_store_sd((double *)&x, v); +#else + x += magicVal; + x -= magicVal; +#endif + f = x; + } + return (long long)f; +} + +template +OutType DataInfoSpec::absolute(const OutType &x) +{ + union { + cl_uint u; + OutType f; + } u; + u.f = x; + if (std::is_same::value) + u.u &= 0x7fffffff; + else if (std::is_same::value) + u.u &= 0x7fffffffffffffffULL; + else + log_error("Unexpected argument type of DataInfoSpec::absolute"); + + return u.f; +} + +template +void DataInfoSpec::conv(OutType *out, InType *in) +{ + if (std::is_same::value) + { + cl_float inVal = *in; + + if (std::is_floating_point::value) + { + *out = (OutType)inVal; + } + else if (std::is_same::value) + { +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + // VS2005 (at least) on x86 uses fistp to store the float as a + // 64-bit int. However, fistp stores it as a signed int, and some of + // the test values won't fit into a signed int. (These test values + // are >= 2^63.) The result on VS2005 is that these end up silently + // (at least by default settings) clamped to the max lowest ulong. + cl_float x = round_to_int(inVal); + if (x >= 9223372036854775808.0f) + { + x -= 9223372036854775808.0f; + ((cl_ulong *)out)[0] = x; + ((cl_ulong *)out)[0] += 9223372036854775808ULL; + } + else + { + ((cl_ulong *)out)[0] = x; + } +#else + *out = round_to_int(inVal); +#endif + } + else if (std::is_same::value) + { + *out = round_to_int_and_clamp(inVal); + } + else + *out = round_to_int(inVal); + } + else if (std::is_same::value) + { + if (std::is_same::value) + *out = (OutType)*in; + else + *out = rint(*in); + } + else if (std::is_same::value + || std::is_same::value) + { + if (std::is_same::value) + { +#if defined(_MSC_VER) + cl_ulong l = ((cl_ulong *)in)[0]; + double result; + + if (std::is_same::value) + { + cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) + : (cl_long)l; +#if defined(_M_X64) + _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl)); +#else + result = sl; +#endif + ((double *)out)[0] = + (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); + } + else + { + _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l)); + ((double *)out)[0] = + (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 + } +#else + *out = (*in == 0 ? 0.0 : (OutType)*in); +#endif + } + else if (std::is_same::value) + { + cl_float outVal = 0.f; + +#if defined(_MSC_VER) && defined(_M_X64) + cl_ulong l = ((cl_ulong *)in)[0]; + float result; + if (std::is_same::value) + { + cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) + : (cl_long)l; + _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); + outVal = (l == 0 ? 0.0f + : (((cl_long)l < 0) ? result * 2.0f : result)); + } + else + { + _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l)); + outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, + // 0's always convert to +0.0 + } +#else + InType l = ((InType *)in)[0]; +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) + /* ARM VFP doesn't have hardware instruction for converting from + * 64-bit integer to float types, hence GCC ARM uses the + * floating-point emulation code despite which -mfloat-abi setting + * it is. But the emulation code in libgcc.a has only one rounding + * mode (round to nearest even in this case) and ignores the user + * rounding mode setting in hardware. As a result setting rounding + * modes in hardware won't give correct rounding results for type + * covert from 64-bit integer to float using GCC for ARM compiler so + * for testing different rounding modes, we need to use alternative + * reference function. ARM64 does have an instruction, however we + * cannot guarantee the compiler will use it. On all ARM + * architechures use emulation to calculate reference.*/ + if (std::is_same::value) + outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm); + else + outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm)); +#else + outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 +#endif +#endif + + *out = outVal; + } + else + { + *out = (OutType)*in; + } + } + else + { + if (std::is_same::value) + *out = (*in == 0 ? 0.f : *in); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 + else if (std::is_same::value) + *out = (*in == 0 ? 0.0 : *in); + else + *out = (OutType)*in; + } +} + +#define CLAMP(_lo, _x, _hi) \ + ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) + +template +void DataInfoSpec::conv_sat(OutType *out, InType *in) +{ + if (std::is_floating_point::value) + { + if (std::is_floating_point::value) + { // in float/double, out float/double + *out = (OutType)(*in); + } + else if ((std::is_same::value) + && std::is_same::value) + { + cl_float x = round_to_int(*in); + +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + // VS2005 (at least) on x86 uses fistp to store the float as a + // 64-bit int. However, fistp stores it as a signed int, and some of + // the test values won't fit into a signed int. (These test values + // are >= 2^63.) The result on VS2005 is that these end up silently + // (at least by default settings) clamped to the max lowest ulong. + if (x >= 18446744073709551616.0f) + { // 2^64 + *out = 0xFFFFFFFFFFFFFFFFULL; + } + else if (x < 0) + { + *out = 0; + } + else if (x >= 9223372036854775808.0f) + { // 2^63 + x -= 9223372036854775808.0f; + *out = x; + *out += 9223372036854775808ULL; + } + else + { + *out = x; + } +#else + *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) + ? 0xFFFFFFFFFFFFFFFFULL + : x < 0 ? 0 : (OutType)x; +#endif + } + else if ((std::is_same::value) + && std::is_same::value) + { + cl_float f = round_to_int(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) + ? 0x7FFFFFFFFFFFFFFFULL + : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) + ? 0x8000000000000000LL + : (OutType)f; + } + else if (std::is_same::value + && std::is_same::value) + { + InType f = rint(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) + ? 0xFFFFFFFFFFFFFFFFULL + : f < 0 ? 0 : (OutType)f; + } + else if (std::is_same::value + && std::is_same::value) + { + InType f = rint(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) + ? 0x7FFFFFFFFFFFFFFFULL + : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) + ? 0x8000000000000000LL + : (OutType)f; + } + else + { // in float/double, out char/uchar/short/ushort/int/uint + *out = + CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second); + } + } + else if (std::is_integral::value + && std::is_integral::value) + { + { + if ((std::is_signed::value + && std::is_signed::value) + || (!std::is_signed::value + && !std::is_signed::value)) + { + if (sizeof(InType) <= sizeof(OutType)) + { + *out = (OutType)*in; + } + else + { + *out = CLAMP(ranges.first, *in, ranges.second); + } + } + else + { // mixed signed/unsigned types + if (sizeof(InType) < sizeof(OutType)) + { + *out = (!std::is_signed::value) + ? (OutType)*in + : CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in + } + else + { // bigger/equal mixed signed/unsigned types - always clamp + *out = CLAMP(0, *in, ranges.second); + } + } + } + } + else + { // InType integral, OutType floating + *out = std::is_signed::value ? (OutType)*in + : absolute((OutType)*in); + } +} + +template +void DataInfoSpec::init(const cl_uint &job_id, + const cl_uint &thread_id) +{ + uint64_t ulStart = start; + void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType]; + + if (std::is_integral::value) + { + InType *o = (InType *)pIn; + if (sizeof(InType) <= sizeof(cl_short)) + { // char/uchar/ushort/short + for (int i = 0; i < size; i++) o[i] = ulStart++; + } + else if (sizeof(InType) <= sizeof(cl_int)) + { // int/uint + int i = 0; + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (InType)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (InType)i + ulStart; + + if (0 == ulStart) + { + size_t tableSize = specialValuesUInt.size() + * sizeof(decltype(specialValuesUInt)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(), + tableSize); + } + } + else + { // long/ulong + cl_ulong *o = (cl_ulong *)pIn; + cl_ulong i, j, k; + + i = 0; + if (ulStart == 0) + { + // Try various powers of two + for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[j] = (cl_ulong)1 << j; + i = j; + + // try the complement of those + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[i++] = ~((cl_ulong)1 << j); + + // Try various negative powers of two + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j; + + // try various powers of two plus 1, shifted by various amounts + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + for (k = 0; + i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j; + k++) + o[i++] = (((cl_ulong)1 << j) + 1) << k; + + // try various powers of two minus 1 + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + for (k = 0; + i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j; + k++) + o[i++] = (((cl_ulong)1 << j) - 1) << k; + + // Other patterns + cl_ulong pattern[] = { + 0x3333333333333333ULL, 0x5555555555555555ULL, + 0x9999999999999999ULL, 0x6666666666666666ULL, + 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL + }; + cl_ulong mask[] = { 0xffffffffffffffffULL, + 0xff00ff00ff00ff00ULL, + 0xffff0000ffff0000ULL, + 0xffffffff00000000ULL }; + for (j = 0; i < (cl_ulong)size + && j < sizeof(pattern) / sizeof(pattern[0]); + j++) + for (k = 0; i + 2 <= (cl_ulong)size + && k < sizeof(mask) / sizeof(mask[0]); + k++) + { + o[i++] = pattern[j] & mask[k]; + o[i++] = pattern[j] & ~mask[k]; + } + } + + auto &md = mdv[thread_id]; + for (; i < (cl_ulong)size; i++) + o[i] = (cl_ulong)genrand_int32(md) + | ((cl_ulong)genrand_int32(md) << 32); + } + } // integrals + else if (std::is_same::value) + { + cl_uint *o = (cl_uint *)pIn; + int i; + + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (cl_uint)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart; + + if (0 == ulStart) + { + size_t tableSize = specialValuesFloat.size() + * sizeof(decltype(specialValuesFloat)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(), + tableSize); + } + + if (kUnsaturated == sat) + { + InType *f = (InType *)pIn; + for (i = 0; i < size; i++) f[i] = clamp(f[i]); + } + } + else if (std::is_same::value) + { + InType *o = (InType *)pIn; + int i = 0; + + union { + uint64_t u; + InType d; + } u; + + for (i = 0; i < size; i++) + { + uint64_t z = i + ulStart; + + uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32)); + // split 0x89abcdef to 0x89abc00000000def + u.u = bits & 0xfffU; + u.u |= (uint64_t)(bits & ~0xfffU) << 32; + // sign extend the leading bit of def segment as sign bit so that + // the middle region consists of either all 1s or 0s + u.u -= (bits & 0x800U) << 1; + o[i] = u.d; + } + + if (0 == ulStart) + { + size_t tableSize = specialValuesDouble.size() + * sizeof(decltype(specialValuesDouble)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(), + tableSize); + } + + if (0 == sat) + for (i = 0; i < size; i++) o[i] = clamp(o[i]); + } +} + +template +InType DataInfoSpec::clamp(const InType &in) +{ + if (std::is_integral::value) + { + if (std::is_same::value) + { + return fclamp(clamp_ranges[round].first, in, + clamp_ranges[round].second); + } + else if (std::is_same::value) + { + return dclamp(clamp_ranges[round].first, in, + clamp_ranges[round].second); + } + } + return in; +} + +#endif /* CONVERSIONS_DATA_INFO_H */ diff --git a/test_conformance/conversions/fplib.h b/test_conformance/conversions/fplib.h index 534550a3..c69b1e89 100644 --- a/test_conformance/conversions/fplib.h +++ b/test_conformance/conversions/fplib.h @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. // +#ifndef CONVERSIONS_FPLIB_H +#define CONVERSIONS_FPLIB_H + #include #include @@ -28,3 +31,5 @@ typedef enum float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd); float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd); + +#endif diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 2ee05463..a8be2098 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -13,12 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" -#include "harness/rounding_mode.h" #include "harness/ThreadPool.h" #include "harness/testHarness.h" -#include "harness/kernelHelpers.h" #include "harness/parseParameters.h" +#include "harness/mt19937.h" + #if defined(__APPLE__) #include #endif @@ -33,7 +32,6 @@ #include #endif -#include "mingw_compat.h" #if defined(__MINGW32__) #include #endif @@ -49,278 +47,73 @@ #include #include +#include +#include #include "Sleep.h" -#include "basic_test_conversions.h" - -#if (defined(_WIN32) && defined(_MSC_VER)) -// need for _controlfp_s and rouinding modes in RoundingMode -#include "harness/testHarness.h" -#endif - -#pragma mark - -#pragma mark globals - -#define BUFFER_SIZE (1024 * 1024) -#define kPageSize 4096 -#define EMBEDDED_REDUCTION_FACTOR 16 -#define PERF_LOOP_COUNT 100 -#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */) +#include "basic_test_conversions.h" +#include +#include #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) #include "fplib.h" -extern bool qcom_sat; -extern roundingMode qcom_rm; #endif -const char **argList = NULL; -int argCount = 0; -cl_context gContext = NULL; -cl_command_queue gQueue = NULL; -char appName[64] = "ctest"; -int gStartTestNumber = -1; -int gEndTestNumber = 0; -#if defined(__APPLE__) -int gTimeResults = 1; -#else -int gTimeResults = 0; +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +/* Rounding modes and saturation for use with qcom 64 bit to float conversion + * library */ +bool qcom_sat; +roundingMode qcom_rm; #endif -int gReportAverageTimes = 0; -void *gIn = NULL; -void *gRef = NULL; -void *gAllowZ = NULL; -void *gOut[kCallStyleCount] = { NULL }; -cl_mem gInBuffer; -cl_mem gOutBuffers[kCallStyleCount]; -size_t gComputeDevices = 0; -uint32_t gDeviceFrequency = 0; -int gWimpyMode = 0; -int gWimpyReductionFactor = 128; -int gSkipTesting = 0; -int gForceFTZ = 0; -int gMultithread = 1; -int gIsRTZ = 0; -uint32_t gSimdSize = 1; -int gHasDouble = 0; -int gTestDouble = 1; -const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; -const int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; -int gMinVectorSize = 0; -int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]); -static MTdata gMTdata; - -#pragma mark - -#pragma mark Declarations + static int ParseArgs(int argc, const char **argv); static void PrintUsage(void); test_status InitCL(cl_device_id device); -static int GetTestCase(const char *name, Type *outType, Type *inType, - SaturationMode *sat, RoundingMode *round); -static int DoTest(cl_device_id device, Type outType, Type inType, - SaturationMode sat, RoundingMode round, MTdata d); -static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, - RoundingMode round, int vectorSize, - cl_kernel *outKernel); -static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, - size_t blockCount); - -void *FlushToZero(void); -void UnFlushToZero(void *); - -// Windows (since long double got deprecated) sets the x87 to 53-bit precision -// (that's x87 default state). This causes problems with the tests that -// convert long and ulong to float and double or otherwise deal with values -// that need more precision than 53-bit. So, set the x87 to 64-bit precision. -static inline void Force64BitFPUPrecision(void) -{ -#if __MINGW32__ - // The usual method is to use _controlfp as follows: - // #include - // _controlfp(_PC_64, _MCW_PC); - // - // _controlfp is available on MinGW32 but not on MinGW64. Instead of having - // divergent code just use inline assembly which works for both. - unsigned short int orig_cw = 0; - unsigned short int new_cw = 0; - __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); - new_cw = orig_cw | 0x0300; // set precision to 64-bit - __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); -#else - /* Implement for other platforms if needed */ -#endif -} -int test_conversions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - int error, i, testNumber = -1; - int startMinVectorSize = gMinVectorSize; - Type inType, outType; - RoundingMode round; - SaturationMode sat; - if (argCount) - { - for (i = 0; i < argCount; i++) - { - if (GetTestCase(argList[i], &outType, &inType, &sat, &round)) - { - vlog_error("\n\t\t**** ERROR: Unable to parse function name " - "%s. Skipping.... *****\n\n", - argList[i]); - continue; - } +const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short", + "uint", "int", "float", "double", + "ulong", "long" }; - // skip double if we don't have it - if (!gTestDouble && (inType == kdouble || outType == kdouble)) - { - if (gHasDouble) - { - vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", - gTypeNames[outType], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double testing turned " - "off.\n"); - } +const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp", + "_rtn", "_rtz" }; - continue; - } +const char *gSaturationNames[2] = { "", "_sat" }; - // skip longs on embedded - if (!gHasLong - && (inType == klong || outType == klong || inType == kulong - || outType == kulong)) - { - continue; - } +size_t gTypeSizes[kTypeCount] = { + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double), + sizeof(cl_ulong), sizeof(cl_long), +}; - // Skip the implicit converts if the rounding mode is not default or - // test is saturated - if (0 == startMinVectorSize) - { - if (sat || round != kDefaultRoundingMode) - gMinVectorSize = 1; - else - gMinVectorSize = 0; - } +char appName[64] = "ctest"; +int gMultithread = 1; - if ((error = DoTest(device, outType, inType, sat, round, gMTdata))) - { - vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", - gTypeNames[outType], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType]); - } - } + +int test_conversions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (argCount) + { + return MakeAndRunTest(device, context, queue, + num_elements); } else { - for (outType = (Type)0; outType < kTypeCount; - outType = (Type)(outType + 1)) - { - for (inType = (Type)0; inType < kTypeCount; - inType = (Type)(inType + 1)) - { - // skip longs on embedded - if (!gHasLong - && (inType == klong || outType == klong || inType == kulong - || outType == kulong)) - { - continue; - } - - for (sat = (SaturationMode)0; sat < kSaturationModeCount; - sat = (SaturationMode)(sat + 1)) - { - // skip illegal saturated conversions to float type - if (kSaturated == sat - && (outType == kfloat || outType == kdouble)) - { - continue; - } - - for (round = (RoundingMode)0; round < kRoundingModeCount; - round = (RoundingMode)(round + 1)) - { - if (++testNumber < gStartTestNumber) - { - // vlog( "%d) skipping convert_%sn%s%s( %sn - // )\n", testNumber, gTypeNames[ outType ], - // gSaturationNames[ sat ], - // gRoundingModeNames[round], gTypeNames[inType] - // ); - continue; - } - else - { - if (gEndTestNumber > 0 - && testNumber >= gEndTestNumber) - { - goto exit; - } - } - - vlog("%d) Testing convert_%sn%s%s( %sn ):\n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], gRoundingModeNames[round], - gTypeNames[inType]); - - // skip double if we don't have it - if (!gTestDouble - && (inType == kdouble || outType == kdouble)) - { - if (gHasDouble) - { - vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " - "FAILED ** \n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], - gRoundingModeNames[round], - gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double " - "testing turned off.\n"); - } - continue; - } - - // Skip the implicit converts if the rounding mode is - // not default or test is saturated - if (0 == startMinVectorSize) - { - if (sat || round != kDefaultRoundingMode) - gMinVectorSize = 1; - else - gMinVectorSize = 0; - } - - if ((error = DoTest(device, outType, inType, sat, round, - gMTdata))) - { - vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " - "FAILED ** \n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], - gRoundingModeNames[round], - gTypeNames[inType]); - } - } - } - } - } + return MakeAndRunTest(device, context, queue, + num_elements); } - -exit: - return gFailCount; } + test_definition test_list[] = { ADD_TEST(conversions), }; const int test_num = ARRAY_SIZE(test_list); -#pragma mark - int main(int argc, const char **argv) { @@ -378,8 +171,6 @@ int main(int argc, const char **argv) return ret; } -#pragma mark - -#pragma mark setup static int ParseArgs(int argc, const char **argv) { @@ -509,7 +300,7 @@ static int ParseArgs(int argc, const char **argv) gWimpyMode = 1; } - vlog( "\n" ); + vlog("\n"); PrintArch(); @@ -526,6 +317,7 @@ static int ParseArgs(int argc, const char **argv) return 0; } + static void PrintUsage(void) { int i; @@ -564,63 +356,6 @@ static void PrintUsage(void) } -static int GetTestCase(const char *name, Type *outType, Type *inType, - SaturationMode *sat, RoundingMode *round) -{ - int i; - - // Find the return type - for (i = 0; i < kTypeCount; i++) - if (name == strstr(name, gTypeNames[i])) - { - *outType = (Type)i; - name += strlen(gTypeNames[i]); - - break; - } - - if (i == kTypeCount) return -1; - - // Check to see if _sat appears next - *sat = (SaturationMode)0; - for (i = 1; i < kSaturationModeCount; i++) - if (name == strstr(name, gSaturationNames[i])) - { - *sat = (SaturationMode)i; - name += strlen(gSaturationNames[i]); - break; - } - - *round = (RoundingMode)0; - for (i = 1; i < kRoundingModeCount; i++) - if (name == strstr(name, gRoundingModeNames[i])) - { - *round = (RoundingMode)i; - name += strlen(gRoundingModeNames[i]); - break; - } - - if (*name != '_') return -2; - name++; - - for (i = 0; i < kTypeCount; i++) - if (name == strstr(name, gTypeNames[i])) - { - *inType = (Type)i; - name += strlen(gTypeNames[i]); - - break; - } - - if (i == kTypeCount) return -3; - - if (*name != '\0') return -4; - - return 0; -} - -#pragma mark - -#pragma mark OpenCL test_status InitCL(cl_device_id device) { @@ -678,6 +413,20 @@ test_status InitCL(cl_device_id device) } gTestDouble &= gHasDouble; + // detect whether profile of the device is embedded + char profile[1024] = ""; + if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), + profile, NULL))) + { + vlog_error("clGetDeviceInfo failed. (%d)\n", error); + return TEST_FAIL; + } + else if (strstr(profile, "EMBEDDED_PROFILE")) + { + gIsEmbedded = 1; + if (!is_extension_available(device, "cles_khr_int64")) gHasLong = 0; + } + gContext = clCreateContext(NULL, 1, &device, notify_callback, NULL, &error); if (NULL == gContext || error) { @@ -726,10 +475,8 @@ test_status InitCL(cl_device_id device) } } - gMTdata = init_genrand(gRandomSeed); - char c[1024]; static const char *no_yes[] = { "NO", "YES" }; vlog("\nCompute Device info:\n"); @@ -760,977 +507,4 @@ test_status InitCL(cl_device_id device) return TEST_PASS; } -static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, - size_t blockCount) -{ - // The global dimensions are just the blockCount to execute since we haven't - // set up multiple queues for multiple devices. - int error; - - error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf); - error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf); - - if (error) - { - vlog_error("FAILED -- could not set kernel args (%d)\n", error); - return error; - } - - if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, - NULL, 0, NULL, NULL))) - { - vlog_error("FAILED -- could not execute kernel (%d)\n", error); - return error; - } - - return 0; -} - -#if defined(__APPLE__) -#include -#endif - -uint64_t GetTime(void); -uint64_t GetTime(void) -{ -#if defined(__APPLE__) - return mach_absolute_time(); -#elif defined(_MSC_VER) - return ReadTime(); -#else - // mach_absolute_time is a high precision timer with precision < 1 - // microsecond. -#warning need accurate clock here. Times are invalid. - return 0; -#endif -} - - -#if defined(_MSC_VER) -/* function is defined in "compat.h" */ -#else -double SubtractTime(uint64_t endTime, uint64_t startTime); -double SubtractTime(uint64_t endTime, uint64_t startTime) -{ - uint64_t diff = endTime - startTime; - static double conversion = 0.0; - - if (0.0 == conversion) - { -#if defined(__APPLE__) - mach_timebase_info_data_t info = { 0, 0 }; - kern_return_t err = mach_timebase_info(&info); - if (0 == err) - conversion = 1e-9 * (double)info.numer / (double)info.denom; -#else - // This function consumes output from GetTime() above, and converts the - // time to secionds. -#warning need accurate ticks to seconds conversion factor here. Times are invalid. -#endif - } - - // strictly speaking we should also be subtracting out timer latency here - return conversion * (double)diff; -} -#endif - -typedef struct CalcReferenceValuesInfo -{ - struct WriteInputBufferInfo - *parent; // pointer back to the parent WriteInputBufferInfo struct - cl_kernel kernel; // the kernel for this vector size - cl_program program; // the program for this vector size - cl_uint vectorSize; // the vector size for this callback chain - void *p; // the pointer to mapped result data for this vector size - cl_int result; -} CalcReferenceValuesInfo; - -typedef struct WriteInputBufferInfo -{ - volatile cl_event - calcReferenceValues; // user event which signals when main thread is - // done calculating reference values - volatile cl_event - doneBarrier; // user event which signals when worker threads are done - cl_uint count; // the number of elements in the array - Type outType; // the data type of the conversion result - Type inType; // the data type of the conversion input - volatile int barrierCount; - CalcReferenceValuesInfo calcInfo[kCallStyleCount]; -} WriteInputBufferInfo; - -cl_uint RoundUpToNextPowerOfTwo(cl_uint x); -cl_uint RoundUpToNextPowerOfTwo(cl_uint x) -{ - if (0 == (x & (x - 1))) return x; - - while (x & (x - 1)) x &= x - 1; - - return x + x; -} - -void WriteInputBufferComplete(void *); - -typedef struct DataInitInfo -{ - cl_ulong start; - cl_uint size; - Type outType; - Type inType; - SaturationMode sat; - RoundingMode round; - MTdata *d; -} DataInitInfo; - -cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p); -cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) -{ - DataInitInfo *info = (DataInitInfo *)p; - - gInitFunctions[info->inType]( - (char *)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat, - info->round, info->outType, info->start + job_id * info->size, - info->size, info->d[thread_id]); - return CL_SUCCESS; -} - -static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) -{ - cl_uint i; - for (i = 0; i < count; ++i) - allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); -} - -cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); -cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) -{ - DataInitInfo *info = (DataInitInfo *)p; - cl_uint count = info->size; - Type inType = info->inType; - Type outType = info->outType; - RoundingMode round = info->round; - size_t j; - - Force64BitFPUPrecision(); - - void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType]; - void *a = (cl_uchar *)gAllowZ + job_id * count; - void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - if (outType != inType) - { - // create the reference while we wait - Convert f = gConversions[outType][inType]; - if (info->sat) f = gSaturatedConversions[outType][inType]; - -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point - * emulation code despite which -mfloat-abi setting it is. But the - * emulation code in libgcc.a has only one rounding mode (round to - * nearest even in this case) and ignores the user rounding mode setting - * in hardware. As a result setting rounding modes in hardware won't - * give correct rounding results for type covert from 64-bit integer to - * float using GCC for ARM compiler so for testing different rounding - * modes, we need to use alternative reference function. ARM64 does have - * an instruction, however we cannot guarantee the compiler will use it. - * On all ARM architechures use emulation to calculate reference.*/ - switch (round) - { - /* conversions to floating-point type use the current rounding mode. - * The only default floating-point rounding mode supported is round - * to nearest even i.e the current rounding mode will be _rte for - * floating-point types. */ - case kDefaultRoundingMode: qcom_rm = qcomRTE; break; - case kRoundToNearestEven: qcom_rm = qcomRTE; break; - case kRoundUp: qcom_rm = qcomRTP; break; - case kRoundDown: qcom_rm = qcomRTN; break; - case kRoundTowardZero: qcom_rm = qcomRTZ; break; - default: - vlog_error("ERROR: undefined rounding mode %d\n", round); - break; - } - qcom_sat = info->sat; -#endif - - RoundingMode oldRound = set_round(round, outType); - f(d, s, count); - set_round(oldRound, outType); - - // Decide if we allow a zero result in addition to the correctly rounded - // one - memset(a, 0, count); - if (gForceFTZ) - { - if (inType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count); - if (outType == kfloat) - setAllowZ((uint8_t *)a, (uint32_t *)d, count); - } - } - else - { - // Copy the input to the reference - memcpy(d, s, info->size * gTypeSizes[inType]); - } - - // Patch up NaNs conversions to integer to zero -- these can be converted to - // any integer - if (info->outType != kfloat && info->outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((double *)d)[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((float *)d)[j] = NAN; - } - } - } - - return CL_SUCCESS; -} - -static int DoTest(cl_device_id device, Type outType, Type inType, - SaturationMode sat, RoundingMode round, MTdata d) -{ -#ifdef __APPLE__ - cl_ulong wall_start = mach_absolute_time(); -#endif - - DataInitInfo init_info = { 0, 0, outType, inType, sat, round, NULL }; - WriteInputBufferInfo writeInputBufferInfo; - int vectorSize; - int error = 0; - cl_uint threads = GetThreadCount(); - uint64_t i; - - gTestCount++; - size_t blockCount = - BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); - size_t step = blockCount; - uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); - - memset(&writeInputBufferInfo, 0, sizeof(writeInputBufferInfo)); - init_info.d = (MTdata *)malloc(threads * sizeof(MTdata)); - if (NULL == init_info.d) - { - vlog_error( - "ERROR: Unable to allocate storage for random number generator!\n"); - return -1; - } - for (i = 0; i < threads; i++) - { - init_info.d[i] = init_genrand(genrand_int32(d)); - if (NULL == init_info.d[i]) - { - vlog_error("ERROR: Unable to allocate storage for random number " - "generator!\n"); - return -1; - } - } - - writeInputBufferInfo.outType = outType; - writeInputBufferInfo.inType = inType; - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - writeInputBufferInfo.calcInfo[vectorSize].program = - MakeProgram(outType, inType, sat, round, vectorSize, - &writeInputBufferInfo.calcInfo[vectorSize].kernel); - if (NULL == writeInputBufferInfo.calcInfo[vectorSize].program) - { - gFailCount++; - return -1; - } - if (NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel) - { - gFailCount++; - vlog_error("\t\tFAILED -- Failed to create kernel.\n"); - return -2; - } - - writeInputBufferInfo.calcInfo[vectorSize].parent = - &writeInputBufferInfo; - writeInputBufferInfo.calcInfo[vectorSize].vectorSize = vectorSize; - writeInputBufferInfo.calcInfo[vectorSize].result = -1; - } - - if (gSkipTesting) goto exit; - - // Patch up rounding mode if default is RTZ - // We leave the part above in default rounding mode so that the right kernel - // is compiled. - if (round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat)) - init_info.round = round = kRoundTowardZero; - - // Figure out how many elements are in a work block - - // we handle 64-bit types a bit differently. - if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; - - if (!gWimpyMode && gIsEmbedded) - step = blockCount * EMBEDDED_REDUCTION_FACTOR; - - if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor; - vlog("Testing... "); - fflush(stdout); - for (i = 0; i < (uint64_t)lastCase; i += step) - { - - if (0 == (i & ((lastCase >> 3) - 1))) - { - vlog("."); - fflush(stdout); - } - - cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); - writeInputBufferInfo.count = count; - - // Crate a user event to represent the status of the reference value - // computation completion - writeInputBufferInfo.calcReferenceValues = - clCreateUserEvent(gContext, &error); - if (error || NULL == writeInputBufferInfo.calcReferenceValues) - { - vlog_error("ERROR: Unable to create user event. (%d)\n", error); - gFailCount++; - goto exit; - } - - // retain for consumption by MapOutputBufferComplete - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - if ((error = - clRetainEvent(writeInputBufferInfo.calcReferenceValues))) - { - vlog_error("ERROR: Unable to retain user event. (%d)\n", error); - gFailCount++; - goto exit; - } - } - - // Crate a user event to represent when the callbacks are done verifying - // correctness - writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error); - if (error || NULL == writeInputBufferInfo.calcReferenceValues) - { - vlog_error("ERROR: Unable to create user event for barrier. (%d)\n", - error); - gFailCount++; - goto exit; - } - - // retain for use by the callback that calls this - if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier))) - { - vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n", - error); - gFailCount++; - goto exit; - } - - // Call this in a multithreaded manner - // gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d - // ); - cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2; - init_info.start = i; - init_info.size = count / chunks; - if (init_info.size < 16384) - { - chunks = RoundUpToNextPowerOfTwo(threads); - init_info.size = count / chunks; - if (init_info.size < 16384) - { - init_info.size = count; - chunks = 1; - } - } - ThreadPool_Do(InitData, chunks, &init_info); - - // Copy the results to the device - if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, - count * gTypeSizes[inType], gIn, 0, - NULL, NULL))) - { - vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error); - gFailCount++; - goto exit; - } - - // Call completion callback for the write, which will enqueue the rest - // of the work. - WriteInputBufferComplete((void *)&writeInputBufferInfo); - - // Make sure the work is actually running, so we don't deadlock - if ((error = clFlush(gQueue))) - { - vlog_error("clFlush failed with error %d\n", error); - gFailCount++; - goto exit; - } - - ThreadPool_Do(PrepareReference, chunks, &init_info); - - // signal we are done calculating the reference results - if ((error = clSetUserEventStatus( - writeInputBufferInfo.calcReferenceValues, CL_COMPLETE))) - { - vlog_error( - "Error: Failed to set user event status to CL_COMPLETE: %d\n", - error); - gFailCount++; - goto exit; - } - - // Wait for the event callbacks to finish verifying correctness. - if ((error = clWaitForEvents( - 1, (cl_event *)&writeInputBufferInfo.doneBarrier))) - { - vlog_error("Error: Failed to wait for barrier: %d\n", error); - gFailCount++; - goto exit; - } - - if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues))) - { - vlog_error("Error: Failed to release calcReferenceValues: %d\n", - error); - gFailCount++; - goto exit; - } - - if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier))) - { - vlog_error("Error: Failed to release done barrier: %d\n", error); - gFailCount++; - goto exit; - } - - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - if ((error = writeInputBufferInfo.calcInfo[vectorSize].result)) - { - switch (inType) - { - case kuchar: - case kchar: - vlog("Input value: 0x%2.2x ", - ((unsigned char *)gIn)[error - 1]); - break; - case kushort: - case kshort: - vlog("Input value: 0x%4.4x ", - ((unsigned short *)gIn)[error - 1]); - break; - case kuint: - case kint: - vlog("Input value: 0x%8.8x ", - ((unsigned int *)gIn)[error - 1]); - break; - case kfloat: - vlog("Input value: %a ", ((float *)gIn)[error - 1]); - break; - break; - case kulong: - case klong: - vlog("Input value: 0x%16.16llx ", - ((unsigned long long *)gIn)[error - 1]); - break; - case kdouble: - vlog("Input value: %a ", ((double *)gIn)[error - 1]); - break; - default: - vlog_error("Internal error at %s: %d\n", __FILE__, - __LINE__); - abort(); - break; - } - - // tell the user which conversion it was. - if (0 == vectorSize) - vlog(" (implicit scalar conversion from %s to %s)\n", - gTypeNames[inType], gTypeNames[outType]); - else - vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], - sizeNames[vectorSize], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType], - sizeNames[vectorSize]); - - gFailCount++; - goto exit; - } - } - } - - log_info("done.\n"); - - if (gTimeResults) - { - // Kick off tests for the various vector lengths - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - size_t workItemCount = blockCount / vectorSizes[vectorSize]; - if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4) - workItemCount /= - 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]); - - double sum = 0.0; - double bestTime = INFINITY; - cl_uint k; - for (k = 0; k < PERF_LOOP_COUNT; k++) - { - uint64_t startTime = GetTime(); - if ((error = RunKernel( - writeInputBufferInfo.calcInfo[vectorSize].kernel, - gInBuffer, gOutBuffers[vectorSize], workItemCount))) - { - gFailCount++; - goto exit; - } - - // Make sure OpenCL is done - if ((error = clFinish(gQueue))) - { - vlog_error("Error %d at clFinish\n", error); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime(endTime, startTime); - sum += time; - if (time < bestTime) bestTime = time; - } - - if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double)gDeviceFrequency - * gComputeDevices * gSimdSize * 1e6 - / (workItemCount * vectorSizes[vectorSize]); - if (0 == vectorSize) - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", - "implicit convert %s -> %s", gTypeNames[inType], - gTypeNames[outType]); - else - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", - "convert_%s%s%s%s( %s%s )", gTypeNames[outType], - sizeNames[vectorSize], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType], - sizeNames[vectorSize]); - } - } - - if (gWimpyMode) - vlog("\tWimp pass"); - else - vlog("\tpassed"); - -#ifdef __APPLE__ - // record the run time - vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start)); -#endif - vlog("\n\n"); - fflush(stdout); - - -exit: - // clean up - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - clReleaseProgram(writeInputBufferInfo.calcInfo[vectorSize].program); - clReleaseKernel(writeInputBufferInfo.calcInfo[vectorSize].kernel); - } - - if (init_info.d) - { - for (i = 0; i < threads; i++) free_mtdata(init_info.d[i]); - free(init_info.d); - } - - return error; -} - -void MapResultValuesComplete(void *data); - -// Note: not called reentrantly -void WriteInputBufferComplete(void *data) -{ - cl_int status; - WriteInputBufferInfo *info = (WriteInputBufferInfo *)data; - cl_uint count = info->count; - int vectorSize; - - info->barrierCount = gMaxVectorSize - gMinVectorSize; - - // now that we know that the write buffer is complete, enqueue callbacks to - // wait for the main thread to finish calculating the reference results. - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - size_t workItemCount = - (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]); - - if ((status = RunKernel(info->calcInfo[vectorSize].kernel, gInBuffer, - gOutBuffers[vectorSize], workItemCount))) - { - gFailCount++; - return; - } - - info->calcInfo[vectorSize].p = clEnqueueMapBuffer( - gQueue, gOutBuffers[vectorSize], CL_TRUE, - CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0, - NULL, NULL, &status); - { - if (status) - { - vlog_error("ERROR: WriteInputBufferComplete calback failed " - "with status: %d\n", - status); - gFailCount++; - return; - } - } - } - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - MapResultValuesComplete(info->calcInfo + vectorSize); - } - - // Make sure the work starts moving -- otherwise we may deadlock - if ((status = clFlush(gQueue))) - { - vlog_error( - "ERROR: WriteInputBufferComplete calback failed with status: %d\n", - status); - gFailCount++; - return; - } - - // e was already released by the main thread. It should be destroyed - // automatically soon after we exit. -} - -void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, - void *data); - -// Note: May be called reentrantly -void MapResultValuesComplete(void *data) -{ - cl_int status; - CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data; - cl_event calcReferenceValues = info->parent->calcReferenceValues; - - // we know that the map is done, wait for the main thread to finish - // calculating the reference values - if ((status = clSetEventCallback(calcReferenceValues, CL_COMPLETE, - CalcReferenceValuesComplete, data))) - { - vlog_error("ERROR: clSetEventCallback failed in " - "MapResultValuesComplete with status: %d\n", - status); - gFailCount++; // not thread safe -- being lazy here - } - - // this thread no longer needs its reference to info->calcReferenceValues, - // so release it - if ((status = clReleaseEvent(calcReferenceValues))) - { - vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed " - "with status: %d\n", - status); - gFailCount++; // not thread safe -- being lazy here - } - - // no need to flush since we didn't enqueue anything - - // e was already released by WriteInputBufferComplete. It should be - // destroyed automatically soon after we exit. -} - - -void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, - void *data) -{ - CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data; - cl_uint vectorSize = info->vectorSize; - cl_uint count = info->parent->count; - Type outType = - info->parent->outType; // the data type of the conversion result - Type inType = info->parent->inType; // the data type of the conversion input - size_t j; - cl_int error; - cl_event doneBarrier = info->parent->doneBarrier; - - // report spurious error condition - if (CL_SUCCESS != status) - { - vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", - status); - gFailCount++; // lazy about thread safety here - return; - } - - // Now we know that both results have been mapped back from the device, and - // the main thread is done calculating the reference results. It is now time - // to check the results. - - // verify results - void *mapped = info->p; - - // Patch up NaNs conversions to integer to zero -- these can be converted to - // any integer - if (outType != kfloat && outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)gIn; - double *outp = (double *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)gIn; - float *outp = (float *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - } - - if (memcmp(mapped, gRef, count * gTypeSizes[outType])) - info->result = gCheckResults[outType](mapped, gRef, gAllowZ, count, - vectorSizes[vectorSize]); - else - info->result = 0; - - // Fill the output buffer with junk and release it - { - cl_uint pattern = 0xffffdead; - memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]); - if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize], - mapped, 0, NULL, NULL))) - { - vlog_error("ERROR: clEnqueueUnmapMemObject failed in " - "CalcReferenceValuesComplete (%d)\n", - error); - gFailCount++; - } - } - - if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1)) - { - if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE))) - { - vlog_error("ERROR: clSetUserEventStatus failed in " - "CalcReferenceValuesComplete (err: %d). We're probably " - "going to deadlock.\n", - status); - gFailCount++; - return; - } - - if ((status = clReleaseEvent(doneBarrier))) - { - vlog_error("ERROR: clReleaseEvent failed in " - "CalcReferenceValuesComplete (err: %d).\n", - status); - gFailCount++; - return; - } - } - // e was already released by WriteInputBufferComplete. It should be - // destroyed automatically soon after all the calls to - // CalcReferenceValuesComplete exit. -} - -static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, - RoundingMode round, int vectorSize, - cl_kernel *outKernel) -{ - cl_program program; - char testName[256]; - int error = 0; - - std::ostringstream source; - if (outType == kdouble || inType == kdouble) - source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - - // Create the program. This is a bit complicated because we are trying to - // avoid byte and short stores. - if (0 == vectorSize) - { - // Create the type names. - char inName[32]; - char outName[32]; - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - sprintf(testName, "test_implicit_%s_%s", outName, inName); - - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " dest[i] = src[i];\n"; - source << "}\n"; - - vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], - gTypeNames[outType]); - fflush(stdout); - } - else - { - int vectorSizetmp = vectorSizes[vectorSize]; - - // Create the type names. - char convertString[128]; - char inName[32]; - char outName[32]; - switch (vectorSizetmp) - { - case 1: - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - snprintf(convertString, sizeof(convertString), "convert_%s%s%s", - outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s", convertString, inName); - vlog("Building %s( %s ) test\n", convertString, inName); - break; - case 3: - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - snprintf(convertString, sizeof(convertString), - "convert_%s3%s%s", outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s3", convertString, inName); - vlog("Building %s( %s3 ) test\n", convertString, inName); - break; - default: - snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], - vectorSizetmp); - snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], - vectorSizetmp); - snprintf(convertString, sizeof(convertString), "convert_%s%s%s", - outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s", convertString, inName); - vlog("Building %s( %s ) test\n", convertString, inName); - break; - } - fflush(stdout); - - if (vectorSizetmp == 3) - { - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " if( i + 1 < get_global_size(0))\n"; - source << " vstore3( " << convertString - << "( vload3( i, src)), i, dest );\n"; - source << " else\n"; - source << " {\n"; - source << " " << inName << "3 in;\n"; - source << " " << outName << "3 out;\n"; - source << " if( 0 == (i & 1) )\n"; - source << " in.y = src[3*i+1];\n"; - source << " in.x = src[3*i];\n"; - source << " out = " << convertString << "( in ); \n"; - source << " dest[3*i] = out.x;\n"; - source << " if( 0 == (i & 1) )\n"; - source << " dest[3*i+1] = out.y;\n"; - source << " }\n"; - source << "}\n"; - } - else - { - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " dest[i] = " << convertString << "( src[i] );\n"; - source << "}\n"; - } - } - *outKernel = NULL; - - const char *flags = NULL; - if (gForceFTZ) flags = "-cl-denorms-are-zero"; - - // build it - std::string sourceString = source.str(); - const char *programSource = sourceString.c_str(); - error = create_single_kernel_helper(gContext, &program, outKernel, 1, - &programSource, testName, flags); - if (error) - { - vlog_error("Failed to build kernel/program (err = %d).\n", error); - clReleaseProgram(program); - return NULL; - } - - return program; -} -- cgit v1.2.3 From 1ab4b26821406e2dc7a6d80fb33105e4e85ae43e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 11 Jul 2023 17:55:37 +0200 Subject: Add tests for external sharing not dependant on semaphores. (#1648) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add tests for external sharing not dependant on semaphores. Additional external sharing tests that use fences instead of semaphores. Signed-off-by: Paweł Jastrzębski * Fix clang-format Signed-off-by: Paweł Jastrzębski * Apply changes for review. Apply changes for review: - Make VkFence + clFinish a synchronization option to existing tests instead of creating a separate test that uses fence. Signed-off-by: Paweł Jastrzębski * Fix build break. Signed-off-by: Paweł Jastrzębski * Fix resource release conditions. Signed-off-by: Paweł Jastrzębski * Fix fence usage. Fixed following fence issues: - Add missing link to command buffer - Add fence reset before wait Signed-off-by: Paweł Jastrzębski * Add Vulkan wrapper for fence. Signed-off-by: Paweł Jastrzębski * Rework fence reset. Signed-off-by: Paweł Jastrzębski * Change synchronisation mechanisms. Changes made: - wait for fence with clFinish - queue submit with wait for fence Signed-off-by: Paweł Jastrzębski * Replace clFinish with vkWaitForFences. Replaced clFinish with vkWaitForFences in Vulkan exectution context. Signed-off-by: Paweł Jastrzębski * Replace remaining clFinish with vkWaitForFences. Replaced remaining clFinish with vkWaitForFences in Vulkan exectution context. Signed-off-by: Paweł Jastrzębski * Fix review comments for synchoronisation simplification. Signed-off-by: Paweł Jastrzębski * Fix review comments for synchoronisation simplification for remaining tests. Signed-off-by: Paweł Jastrzębski * Fix condition check. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../common/vulkan_wrapper/vulkan_wrapper.cpp | 47 ++++ .../common/vulkan_wrapper/vulkan_wrapper.hpp | 18 +- test_conformance/vulkan/main.cpp | 53 +++- .../vulkan/test_vulkan_interop_buffer.cpp | 299 ++++++++++++++++----- 4 files changed, 339 insertions(+), 78 deletions(-) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp index 3ce4af6b..21d8f226 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp @@ -604,6 +604,37 @@ VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily, VulkanDevice::operator VkDevice() const { return m_vkDevice; } +//////////////////////////////// +// VulkanFence implementation // +//////////////////////////////// + +VulkanFence::VulkanFence(const VulkanDevice &vkDevice) +{ + + device = vkDevice; + + VkFenceCreateInfo fenceInfo{}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceInfo.pNext = nullptr; + fenceInfo.flags = 0; + + VkResult vkStatus = vkCreateFence(device, &fenceInfo, nullptr, &fence); + + if (vkStatus != VK_SUCCESS) + { + throw std::runtime_error("Error: Failed create fence."); + } +} + +VulkanFence::~VulkanFence() { vkDestroyFence(device, fence, nullptr); } + +void VulkanFence::reset() { vkResetFences(device, 1, &fence); } + +void VulkanFence::wait() +{ + vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX); +} + //////////////////////////////// // VulkanQueue implementation // //////////////////////////////// @@ -615,6 +646,22 @@ VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {} VulkanQueue::~VulkanQueue() {} +void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer, + const std::shared_ptr &vkFence) +{ + VulkanCommandBufferList commandBufferList; + commandBufferList.add(commandBuffer); + + VkSubmitInfo vkSubmitInfo = {}; + vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + vkSubmitInfo.pNext = NULL; + vkSubmitInfo.waitSemaphoreCount = (uint32_t)0; + vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size(); + vkSubmitInfo.pCommandBuffers = commandBufferList(); + + vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, vkFence->fence); +} + void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList, const VulkanCommandBufferList &commandBufferList, const VulkanSemaphoreList &signalSemaphoreList) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp index 37925ee4..af478219 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp @@ -21,6 +21,7 @@ #include "vulkan_wrapper_types.hpp" #include "vulkan_list_map.hpp" #include "vulkan_api_list.hpp" +#include class VulkanInstance { friend const VulkanInstance &getVulkanInstance(); @@ -145,6 +146,20 @@ public: operator VkDevice() const; }; +class VulkanFence { + friend class VulkanQueue; + +protected: + VkFence fence; + VkDevice device; + +public: + VulkanFence(const VulkanDevice &device); + virtual ~VulkanFence(); + void reset(); + void wait(); +}; + class VulkanQueue { friend class VulkanDevice; @@ -157,6 +172,8 @@ protected: public: const VulkanQueueFamily &getQueueFamily(); + void submit(const VulkanCommandBuffer &commandBuffer, + const std::shared_ptr &fence); void submit(const VulkanSemaphoreList &waitSemaphoreList, const VulkanCommandBufferList &commandBufferList, const VulkanSemaphoreList &signalSemaphoreList); @@ -569,7 +586,6 @@ public: operator VkSemaphore() const; }; - #define VK_FUNC_DECL(name) extern "C" PFN_##name _##name; VK_FUNC_LIST #if defined(_WIN32) || defined(_WIN64) diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index 5901420a..3d7b30e7 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -52,7 +52,8 @@ static void params_reset() } extern int test_buffer_common(cl_device_id device_, cl_context context_, - cl_command_queue queue_, int numElements_); + cl_command_queue queue_, int numElements_, + float use_fence); extern int test_image_common(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_); @@ -61,7 +62,7 @@ int test_buffer_single_queue(cl_device_id device_, cl_context context_, { params_reset(); log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -69,7 +70,7 @@ int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, params_reset(); numCQ = 2; log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -78,7 +79,7 @@ int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, multiImport = true; log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " "IN SAME CONTEXT...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -88,7 +89,45 @@ int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, multiCtx = true; log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " "IN DIFFERENT CONTEXT...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); +} +int test_buffer_single_queue_fence(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiple_queue_fence(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + numCQ = 2; + log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiImport_sameCtx_fence(cl_device_id device_, + cl_context context_, + cl_command_queue queue_, + int numElements_) +{ + params_reset(); + multiImport = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN SAME CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiImport_diffCtx_fence(cl_device_id device_, + cl_context context_, + cl_command_queue queue_, + int numElements_) +{ + params_reset(); + multiImport = true; + multiCtx = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN DIFFERENT CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); } int test_image_single_queue(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -110,6 +149,10 @@ test_definition test_list[] = { ADD_TEST(buffer_single_queue), ADD_TEST(buffer_multiple_queue), ADD_TEST(buffer_multiImport_sameCtx), ADD_TEST(buffer_multiImport_diffCtx), + ADD_TEST(buffer_single_queue_fence), + ADD_TEST(buffer_multiple_queue_fence), + ADD_TEST(buffer_multiImport_sameCtx_fence), + ADD_TEST(buffer_multiImport_diffCtx_fence), ADD_TEST(image_single_queue), ADD_TEST(image_multiple_queue), ADD_TEST(consistency_external_buffer), diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 9b0bc9de..5390ef69 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "harness/errorHelpers.h" @@ -82,7 +83,8 @@ __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __g int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, cl_command_queue &cmd_queue2, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize) + uint32_t numBuffers, uint32_t bufferSize, + bool use_fence) { int err = CL_SUCCESS; size_t global_work_size[1]; @@ -117,6 +119,7 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -136,10 +139,17 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); @@ -227,16 +237,27 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); + err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); @@ -286,7 +307,14 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, goto CLEANUP; } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFlush(cmd_queue1); + clFlush(cmd_queue2); + clFinish(cmd_queue1); + clFinish(cmd_queue2); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue2); } @@ -387,8 +415,11 @@ CLEANUP: } if (program) clReleaseProgram(program); if (kernel_cq) clReleaseKernel(kernel_cq); - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); @@ -398,7 +429,7 @@ CLEANUP: int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, - uint32_t bufferSize) + uint32_t bufferSize, bool use_fence) { log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); size_t global_work_size[1]; @@ -416,6 +447,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -434,10 +466,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } + const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); @@ -526,16 +566,26 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); @@ -562,7 +612,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, " error\n"); goto CLEANUP; } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFlush(cmd_queue1); + clFinish(cmd_queue1); + } + else if (!use_fence && (iter != (maxIter - 1))) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -656,8 +711,13 @@ CLEANUP: delete externalMemory[i]; } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } + if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); return err; @@ -666,7 +726,7 @@ CLEANUP: int run_test_with_multi_import_same_ctx( cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, - uint32_t bufferSize, uint32_t bufferSizeForOffset) + uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence) { size_t global_work_size[1]; uint8_t *error_2; @@ -687,6 +747,7 @@ int run_test_with_multi_import_same_ctx( getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -706,10 +767,18 @@ int run_test_with_multi_import_same_ctx( VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } + const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); @@ -832,16 +901,34 @@ int run_test_with_multi_import_same_ctx( for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } } - clVk2CLExternalSemaphore->wait(cmd_queue1); + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore->wait(cmd_queue1); + } + for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) { @@ -874,7 +961,11 @@ int run_test_with_multi_import_same_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue1); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -987,8 +1078,13 @@ CLEANUP: } } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } + if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); return err; @@ -998,7 +1094,8 @@ int run_test_with_multi_import_diff_ctx( cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1, cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2, cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset) + uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset, + float use_fence) { size_t global_work_size[1]; uint8_t *error_3; @@ -1023,6 +1120,7 @@ int run_test_with_multi_import_diff_ctx( getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -1042,15 +1140,24 @@ int run_test_with_multi_import_diff_ctx( VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - - clVk2CLExternalSemaphore2 = new clExternalSemaphore( - vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore2 = new clExternalSemaphore( - vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + clVk2CLExternalSemaphore2 = + new clExternalSemaphore(vkVk2CLSemaphore, context2, + vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore2 = + new clExternalSemaphore(vkCl2VkSemaphore, context2, + vkExternalSemaphoreHandleType, deviceId); + } const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); @@ -1192,16 +1299,33 @@ int run_test_with_multi_import_diff_ctx( for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + } + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) @@ -1235,7 +1359,11 @@ int run_test_with_multi_import_diff_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue1); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -1243,16 +1371,33 @@ int run_test_with_multi_import_diff_ctx( clFinish(cmd_queue1); for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + } + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore2->wait(cmd_queue2); } - clVk2CLExternalSemaphore2->wait(cmd_queue2); for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) @@ -1286,7 +1431,11 @@ int run_test_with_multi_import_diff_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue2); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore2->signal(cmd_queue2); } @@ -1474,10 +1623,15 @@ CLEANUP: } } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; - if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; - if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; + if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + } + if (error_3) free(error_3); if (error_1) clReleaseMemObject(error_1); if (error_2) clReleaseMemObject(error_2); @@ -1485,7 +1639,8 @@ CLEANUP: } int test_buffer_common(cl_device_id device_, cl_context context_, - cl_command_queue queue_, int numElements_) + cl_command_queue queue_, int numElements_, + float use_fence) { int current_device = 0; @@ -1738,26 +1893,26 @@ int test_buffer_common(cl_device_id device_, cl_context context_, { errNum = run_test_with_multi_import_same_ctx( context, cmd_queue1, kernel, verify_kernel, vkDevice, - numBuffers, bufferSize, bufferSizeForOffset); + numBuffers, bufferSize, bufferSizeForOffset, use_fence); } else if (multiImport && multiCtx) { errNum = run_test_with_multi_import_diff_ctx( context, context2, cmd_queue1, cmd_queue3, kernel, kernel2, verify_kernel, verify_kernel2, vkDevice, numBuffers, - bufferSize, bufferSizeForOffset); + bufferSize, bufferSizeForOffset, use_fence); } else if (numCQ == 2) { errNum = run_test_with_two_queue( context, cmd_queue1, cmd_queue2, kernel, verify_kernel, - vkDevice, numBuffers + 1, bufferSize); + vkDevice, numBuffers + 1, bufferSize, use_fence); } else { - errNum = run_test_with_one_queue(context, cmd_queue1, kernel, - verify_kernel, vkDevice, - numBuffers, bufferSize); + errNum = run_test_with_one_queue( + context, cmd_queue1, kernel, verify_kernel, vkDevice, + numBuffers, bufferSize, use_fence); } if (errNum != CL_SUCCESS) { -- cgit v1.2.3 From 3b7fda2071266a2833ed6c2fe5f62c15fc8eec99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 11 Jul 2023 17:56:49 +0200 Subject: Add local size tests for cl_khr_command_buffer_mutable_dispatch. (#1745) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add local size tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Add kernel with observable output. We should check that's some observable output from the kernel as a result of the change to local work size, not just that clGetMutableCommandInfoKHR has been updated. For example, getting every work-item to call get_local_size() inside of the kernel and writing it to a buffer, then reading the buffer after the command-buffer enqueue has finished and check it matches what we expect. Signed-off-by: Paweł Jastrzębski * Fix review comments. Applied review comments for mutable dispatch local size test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR - Change variable and constant names for local size Applied review comments for mutable dispatch global arguments test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Fix skip conditions - Remove obsolete variable - Replace a variable with a constant Signed-off-by: Paweł Jastrzębski * Remove explicit base class call. Signed-off-by: Paweł Jastrzębski * Fix constant magic number. Signed-off-by: Paweł Jastrzębski * Update global size and local size to meet the spec requirements. Make sure work-groups number is not increased after update of command-buffer. Signed-off-by: Paweł Jastrzębski * Remove uneeded includes. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 1 + .../mutable_command_local_size.cpp | 174 +++++++++++++++++++++ .../cl_khr_command_buffer_mutable_dispatch/procs.h | 4 + 4 files changed, 180 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 80214609..8b84790d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_local_size.cpp mutable_command_global_offset.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index b53914dc..7fc25712 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_local_size), ADD_TEST(mutable_dispatch_global_offset), }; diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp new file mode 100644 index 00000000..22a9da6d --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp @@ -0,0 +1,174 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "typeWrappers.h" +#include "procs.h" +#include "testHarness.h" +#include "mutable_command_basic.h" +#include + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR + +struct MutableDispatchLocalSize : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchLocalSize(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *local_size_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_local_size(0); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &local_size_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, &local_work_size, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + &update_global_size /* global_work_size */, + &update_local_size /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR, + sizeof(info_local_size), &info_local_size, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_local_size != update_local_size) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_size && update_local_size != resultData[i]) + { + log_error("Data failed to verify: update_local_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_size + && local_work_size != resultData[i]) + { + log_error("Data failed to verify: update_local_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + size_t info_local_size = 0; + const size_t global_work_size = 16; + const size_t local_work_size = 8; + const size_t update_global_size = 8; + const size_t update_local_size = 4; + const size_t sizeToAllocate = 64; + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_local_size(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 588bdc81..e0d9b736 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,6 +59,10 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_local_size(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); extern int test_mutable_dispatch_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, -- cgit v1.2.3 From 64130824a8fe66d832b1e84905e82c5b17439759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 11 Jul 2023 18:41:43 +0200 Subject: Add global size tests for cl_khr_command_buffer_mutable_dispatch. (#1744) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add global size tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Add kernel with observable output. We should check that's some observable output from the kernel as a result of the change to global work size, not just that clGetMutableCommandInfoKHR has been updated. For example, getting every work-item to call get_global_size() inside of the kernel and writing it to a buffer, then reading the buffer after the command-buffer enqueue has finished and check it matches what we expect. Signed-off-by: Paweł Jastrzębski * Fix review comments. Applied review comments for mutable dispatch global arguments test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR - Change variable and constant names for global size Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Fix skip conditions - Remove obsolete variable - Replace a variable with a constant Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Remove explicit base class call - Fix condition check - Fix constant magic number Signed-off-by: Paweł Jastrzębski * Fix constant magic number. Signed-off-by: Paweł Jastrzębski * Remove uneeded comments. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 1 + .../mutable_command_global_size.cpp | 167 +++++++++++++++++++++ .../cl_khr_command_buffer_mutable_dispatch/procs.h | 4 + 4 files changed, 173 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 8b84790d..1df528ee 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_global_size.cpp mutable_command_local_size.cpp mutable_command_global_offset.cpp ../basic_command_buffer.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 7fc25712..7e3ef52b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_global_size), ADD_TEST(mutable_dispatch_local_size), ADD_TEST(mutable_dispatch_global_offset), }; diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp new file mode 100644 index 00000000..091f0c8d --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp @@ -0,0 +1,167 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR + +struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalSize(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_size_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_size(0); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &global_size_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + &update_global_size /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR, + sizeof(info_global_size), &info_global_size, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_size != update_global_size) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i >= update_global_size && global_work_size != resultData[i]) + { + log_error("Data failed to verify: update_global_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i < update_global_size + && update_global_size != resultData[i]) + { + log_error("Data failed to verify: update_global_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + size_t info_global_size = 0; + const size_t update_global_size = 3; + const size_t sizeToAllocate = global_work_size; + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_size(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index e0d9b736..5e1aa8e5 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,6 +59,10 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_global_size(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); extern int test_mutable_dispatch_local_size(cl_device_id device, cl_context context, cl_command_queue queue, -- cgit v1.2.3 From ae1a712e3ed14f87857575987389dff26bb74c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 11 Jul 2023 19:43:29 +0200 Subject: Add out of order tests for cl_khr_command_buffer_mutable_dispatch. (#1746) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add out of order tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Replace CL_KERNEL_EXEC_INFO_SVM_PTRS with cl_mutable_dispatch_arg_khr command-buffer mutable buffer update. CL_KERNEL_EXEC_INFO_SVM_PTRS limits the test to devices which support SVM. Updating arg_list with a cl_mutable_dispatch_arg_khr struct is one of the best supported, and also easiest to verify, configurations to change. Signed-off-by: Paweł Jastrzębski * Fix skip condition. Signed-off-by: Paweł Jastrzębski * Fix review changes. Changes made: - Fix skip condition - Add event - Add memory verification Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 2 + .../mutable_command_out_of_order.cpp | 454 +++++++++++++++++++++ .../cl_khr_command_buffer_mutable_dispatch/procs.h | 7 + 4 files changed, 464 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 1df528ee..edf12c8e 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_out_of_order.cpp mutable_command_global_size.cpp mutable_command_local_size.cpp mutable_command_global_offset.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 7e3ef52b..07c9550d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,8 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_out_of_order), + ADD_TEST(mutable_dispatch_simultaneous_out_of_order), ADD_TEST(mutable_dispatch_global_size), ADD_TEST(mutable_dispatch_local_size), ADD_TEST(mutable_dispatch_global_offset), diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp new file mode 100644 index 00000000..d507dadf --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp @@ -0,0 +1,454 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// - simultaneous use +// - cross-queue simultaneous-use + +namespace { + +template +struct OutOfOrderTest : public BasicMutableCommandBufferTest +{ + OutOfOrderTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue), + out_of_order_queue(nullptr), out_of_order_command_buffer(this), + user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr), + program_fill(nullptr) + { + simultaneous_use_requested = simultaneous_request; + if (simultaneous_request) buffer_size_multiplier = 2; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernel() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernel(); + test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed"); + + // create additional kernel to properly prepare output buffer for test + const char* kernel_str = + R"( + __kernel void fill(int pattern, __global int* out, __global int* + offset) + { + size_t id = get_global_id(0); + size_t ind = offset[0] + id ; + out[ind] = pattern; + })"; + + error = create_single_kernel_helper_create_program( + context, &program_fill, 1, &kernel_str); + test_error(error, "Failed to create program with source"); + + error = + clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr); + test_error(error, "Failed to build program"); + + kernel_fill = clCreateKernel(program_fill, "fill", &error); + test_error(error, "Failed to create copy kernel"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernelArgs() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs(); + test_error(error, + "BasicMutableCommandBufferTest::SetUpKernelArgs failed"); + + error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int), + &overwritten_pattern); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem); + test_error(error, "clSetKernelArg failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUp(int elements) override + { + cl_int error = BasicMutableCommandBufferTest::SetUp(elements); + test_error(error, "BasicMutableCommandBufferTest::SetUp failed"); + + error = SetUpKernel(); + test_error(error, "SetUpKernel failed"); + + out_of_order_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + test_error(error, "Unable to create command queue to test with"); + + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0 + }; + + out_of_order_command_buffer = clCreateCommandBufferKHR( + 1, &out_of_order_queue, properties, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + + return !out_of_order_support + || (simultaneous_use_requested && !simultaneous_use_support) + || !mutable_support || BasicMutableCommandBufferTest::Skip(); + } + + //-------------------------------------------------------------------------- + cl_int Run() override + { + cl_int error = CL_SUCCESS; + + if (simultaneous_use_support) + { + // enqueue simultaneous command-buffers with out-of-order calls + error = RunSimultaneous(); + test_error(error, "RunSimultaneous failed"); + } + else + { + // enqueue single command-buffer with out-of-order calls + error = RunSingle(); + test_error(error, "RunSingle failed"); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + const cl_int pattern = pattern_pri; + cl_int error = + clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, + &pattern, sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, + out_mem, &overwritten_pattern, + sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[1], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSingle() + { + cl_int error; + + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordSimultaneousCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + // for both simultaneous passes this call will fill entire in_mem buffer + cl_int error = clCommandFillBufferKHR( + out_of_order_command_buffer, nullptr, in_mem, &pattern_pri, + sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr, + &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + // to avoid overwriting the entire result buffer instead of filling + // only relevant part this additional kernel was introduced + + error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr, + nullptr, kernel_fill, 1, nullptr, + &num_elements, nullptr, 0, nullptr, + &sync_points[1], &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + struct SimulPassData + { + cl_int offset; + std::vector output_buffer; + // 0:user event, 1:offset-buffer fill event, 2:kernel done event + clEventWrapper wait_events[3]; + }; + + //-------------------------------------------------------------------------- + cl_int EnqueueSimultaneousPass(SimulPassData& pd) + { + cl_int error = CL_SUCCESS; + if (!user_event) + { + user_event = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + } + + pd.wait_events[0] = user_event; + + // filling offset buffer must wait for previous pass completeness + error = clEnqueueFillBuffer( + out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0, + sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0), + (wait_pass_event != nullptr ? &wait_pass_event : nullptr), + &pd.wait_events[1]); + test_error(error, "clEnqueueFillBuffer failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSimultaneous() + { + cl_int error = RecordSimultaneousCommandBuffer(); + test_error(error, "RecordSimultaneousCommandBuffer failed"); + + cl_int offset = static_cast(num_elements); + + std::vector simul_passes = { + { 0, std::vector(num_elements) }, + { offset, std::vector(num_elements) } + }; + + for (auto&& pass : simul_passes) + { + error = EnqueueSimultaneousPass(pass); + test_error(error, "EnqueueSimultaneousPass failed"); + + wait_pass_event = pass.wait_events[2]; + } + + error = clSetUserEventStatus(user_event, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); + + error = clFinish(out_of_order_queue); + test_error(error, "clFinish failed"); + + // verify the result buffers + for (auto&& pass : simul_passes) + { + auto& res_data = pass.output_buffer; + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); + } + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + clCommandQueueWrapper out_of_order_queue; + clCommandBufferWrapper out_of_order_command_buffer; + + clEventWrapper user_event; + clEventWrapper single_event; + clEventWrapper wait_pass_event; + + clKernelWrapper kernel_fill; + clProgramWrapper program_fill; + + const size_t test_global_work_size = 3 * sizeof(cl_int); + cl_mutable_command_khr command = nullptr; + + const cl_int overwritten_pattern = 0xACDC; + const cl_int pattern_pri = 42; +}; + +} // anonymous namespace + +int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 5e1aa8e5..3558401b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,6 +59,13 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_simultaneous_out_of_order( + cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements); extern int test_mutable_dispatch_global_size(cl_device_id device, cl_context context, cl_command_queue queue, -- cgit v1.2.3 From c69bc00b56fb7f659df334eeb42c4824430831b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 11 Jul 2023 22:04:30 +0200 Subject: Add arguments tests for cl_khr_command_buffer_mutable_dispatch. (#1749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add arguments tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Add clFinish and clEnqueueCommandBufferKHR to mutable dispatch global arguments test. Applied review comments for mutable dispatch global arguments test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR - Add second clEnqueueCommandBufferKHR for the command-buffer again before calling clEnqueueReadBuffer so that the kernel runs with the new argument Signed-off-by: Paweł Jastrzębski * Add clFinish and clEnqueueCommandBufferKHR to remaining mutable dispatch arguments test. Applied review comments for remaining mutable dispatch arguments test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR Signed-off-by: Paweł Jastrzębski * Fix clang format. Signed-off-by: Paweł Jastrzębski * Fix clang format. Signed-off-by: Paweł Jastrzębski * Recover proper MutableDispatchGlobalArguments test. Signed-off-by: Paweł Jastrzębski * Recover proper MutableDispatchNullArguments test. Signed-off-by: Paweł Jastrzębski * Fix constant magic number. Signed-off-by: Paweł Jastrzębski * Fix review changes. Changes made: - Remove unneeded headers - Remove unneeded variable - Remove unneeded skip condition - Fix type int -> cl_int Signed-off-by: Paweł Jastrzębski * Fix tests definitions. Signed-off-by: Paweł Jastrzębski * Fix compilation error. Signed-off-by: Paweł Jastrzębski * Fix clang format. Signed-off-by: Paweł Jastrzębski * Fix clang format. Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Fix clang format. Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Fix clang format. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 5 + .../mutable_command_arguments.cpp | 847 +++++++++++++++++++++ .../cl_khr_command_buffer_mutable_dispatch/procs.h | 20 + 4 files changed, 873 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index edf12c8e..ecdb9a3b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_arguments.cpp mutable_command_out_of_order.cpp mutable_command_global_size.cpp mutable_command_local_size.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 07c9550d..cccd58ce 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -31,6 +31,11 @@ test_definition test_list[] = { ADD_TEST(mutable_dispatch_global_size), ADD_TEST(mutable_dispatch_local_size), ADD_TEST(mutable_dispatch_global_offset), + ADD_TEST(mutable_dispatch_svm_arguments), + ADD_TEST(mutable_dispatch_local_arguments), + ADD_TEST(mutable_dispatch_global_arguments), + ADD_TEST(mutable_dispatch_pod_arguments), + ADD_TEST(mutable_dispatch_null_arguments), }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp new file mode 100644 index 00000000..5c8291f0 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp @@ -0,0 +1,847 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testHarness.h" +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases for +// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR: +// - __global arguments +// - __local arguments +// - plain-old-data arguments +// - NULL arguments +// - SVM arguments + +struct MutableDispatchGlobalArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchGlobalArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return 0; + } + + cl_int Run() override + { + cl_int error; + + // Create kernel + + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + })"; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + // Create and initialize buffers + + MTdataHolder d(gRandomSeed); + + std::vector srcData(num_elements); + for (size_t i = 0; i < num_elements; i++) + srcData[i] = (cl_int)genrand_int32(d); + + clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), + srcData.data(), &error); + test_error(error, "Creating src buffer"); + + clMemWrapper dstBuf0 = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating initial dst buffer failed"); + + clMemWrapper dstBuf1 = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating updated dst buffer failed"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf); + test_error(error, "Unable to set src kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(dstBuf0), &dstBuf0); + test_error(error, "Unable to set initial dst kernel argument"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the initial execution + + std::vector dstData0(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf0, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData0.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for initial dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData0[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData0[i]); + return TEST_FAIL; + } + } + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(dstBuf1), &dstBuf1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + std::vector dstData1(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf1, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for modified dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData1[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData1[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +struct MutableDispatchLocalArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchLocalArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return 0; + } + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src1, __local int + *src, __global int *dst) + { + size_t tid = get_global_id(0); + src[tid] = src1[tid]; + dst[tid] = src[tid]; + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threads[1], localThreads[1]; + std::vector constantData; + std::vector resultData; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + size_t sizeToAllocate = + ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData.resize(sizeToAllocate / sizeof(cl_int)); + resultData.resize(sizeToAllocate / sizeof(cl_int)); + + for (size_t i = 0; i < numberOfInts; i++) + constantData[i] = (cl_int)genrand_int32(d); + + clMemWrapper streams[2]; + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, + constantData.data(), &error); + test_error(error, "Creating test array failed"); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = + clSetKernelArg(kernel, 1, numberOfInts * sizeof(cl_int), nullptr); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + + threads[0] = numberOfInts; + localThreads[0] = 1; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, threads, + localThreads, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_mem), nullptr }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = + clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < numberOfInts; i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", + i, constantData[i], i, resultData[i]); + return TEST_FAIL; + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchPODArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchPODArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, int dst) + { + size_t tid = get_global_id(0); + dst = src[tid]; + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threads[1], localThreads[1]; + std::vector constantData; + std::vector resultData; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + size_t sizeToAllocate = + ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData.resize(sizeToAllocate / sizeof(cl_int)); + resultData.resize(sizeToAllocate / sizeof(cl_int)); + + for (size_t i = 0; i < numberOfInts; i++) + constantData[i] = (cl_int)genrand_int32(d); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, + constantData.data(), &error); + test_error(error, "Creating test array failed"); + + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + cl_int intarg = 10; + error = clSetKernelArg(kernel, 1, sizeof(cl_int), &intarg); + test_error(error, "Unable to set indexed kernel arguments"); + + threads[0] = numberOfInts; + localThreads[0] = 1; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, threads, + localThreads, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + intarg = 20; + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_int), &intarg }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < numberOfInts; i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", + i, constantData[i], i, resultData[i]); + return TEST_FAIL; + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchNullArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchNullArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + cl_int Run() override + { + cl_int error; + + // Create kernel + + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src ? src[tid] : 12345; + })"; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + std::vector srcData(num_elements); + for (size_t i = 0; i < num_elements; i++) + srcData[i] = (cl_int)genrand_int32(d); + + clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), + srcData.data(), &error); + test_error(error, "Creating src buffer"); + + clMemWrapper dstBuf = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating dst buffer failed"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf); + test_error(error, "Unable to set src kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(dstBuf), &dstBuf); + test_error(error, "Unable to set initial dst kernel argument"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the initial execution + + std::vector dstData0(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData0.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for initial dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData0[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData0[i]); + return TEST_FAIL; + } + } + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg{ 0, sizeof(cl_mem), nullptr }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + std::vector dstData1(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for modified dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (12345 != dstData1[i]) + { + log_error("Modified data failed to verify: %d != dst[%zu]=%d\n", + 12345, i, dstData1[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchSVMArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchSVMArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_device_svm_capabilities svm_caps; + bool svm_capabilities = + !clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, + sizeof(svm_caps), &svm_caps, NULL) + && svm_caps != 0; + + return !svm_capabilities || BasicMutableCommandBufferTest::Skip(); + } + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + const char *svm_arguments_kernel = + R"( + typedef struct { + global int* ptr; + } wrapper; + __kernel void test_svm_arguments(__global wrapper* pWrapper) + { + size_t i = get_global_id(0); + pWrapper->ptr[i]++; + })"; + + create_single_kernel_helper(context, &program, &kernel, 1, + &svm_arguments_kernel, + "test_svm_arguments"); + + return 0; + } + + cl_int Run() override + { + const cl_int zero = 0; + cl_int error; + + // Allocate and initialize SVM for initial execution + + cl_int *initWrapper = (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(cl_int *), 0); + cl_int *initBuffer = (cl_int *)clSVMAlloc( + context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0); + test_assert_error(initWrapper != nullptr && initBuffer != nullptr, + "clSVMAlloc failed for initial execution"); + + error = clEnqueueSVMMemcpy(queue, CL_TRUE, initWrapper, &initBuffer, + sizeof(cl_int *), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed for initWrapper"); + + error = clEnqueueSVMMemFill(queue, initBuffer, &zero, sizeof(zero), + num_elements * sizeof(cl_int), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed for initBuffer"); + + // Allocate and initialize SVM for modified execution + + cl_int *newWrapper = + (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); + cl_int *newBuffer = (cl_int *)clSVMAlloc( + context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0); + test_assert_error(newWrapper != nullptr && newBuffer != nullptr, + "clSVMAlloc failed for modified execution"); + + error = clEnqueueSVMMemcpy(queue, CL_TRUE, newWrapper, &newBuffer, + sizeof(cl_int *), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed for newWrapper"); + + error = clEnqueueSVMMemFill(queue, newBuffer, &zero, sizeof(zero), + num_elements * sizeof(cl_int), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed for newB"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArgSVMPointer(kernel, 0, initWrapper); + test_error(error, "clSetKernelArg failed for initWrapper"); + + error = clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(initBuffer), &initBuffer); + test_error(error, "clSetKernelExecInfo failed for initBuffer"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR + | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR, + 0 + }; + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Check the results of the initial execution + + error = + clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, initBuffer, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMap failed for initBuffer"); + + for (size_t i = 0; i < num_elements; i++) + { + if (initBuffer[i] != 1) + { + log_error("Initial verification failed at index %zu: Got %d, " + "wanted 1\n", + i, initBuffer[i]); + return TEST_FAIL; + } + } + + error = clEnqueueSVMUnmap(queue, initBuffer, 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMUnmap failed for initBuffer"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg_svm{}; + arg_svm.arg_index = 0; + arg_svm.arg_value = newWrapper; + + cl_mutable_dispatch_exec_info_khr exec_info{}; + exec_info.param_name = CL_KERNEL_EXEC_INFO_SVM_PTRS; + exec_info.param_value_size = sizeof(newBuffer); + exec_info.param_value = &newBuffer; + + cl_mutable_dispatch_config_khr dispatch_config{}; + dispatch_config.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR; + dispatch_config.command = command; + dispatch_config.num_svm_args = 1; + dispatch_config.arg_svm_list = &arg_svm; + dispatch_config.num_exec_infos = 1; + dispatch_config.exec_info_list = &exec_info; + + cl_mutable_base_config_khr mutable_config{}; + mutable_config.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR; + mutable_config.num_mutable_dispatch = 1; + mutable_config.mutable_dispatch_list = &dispatch_config; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + error = + clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, newBuffer, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMap failed for newBuffer"); + + for (size_t i = 0; i < num_elements; i++) + { + if (newBuffer[i] != 1) + { + log_error("Modified verification failed at index %zu: Got %d, " + "wanted 1\n", + i, newBuffer[i]); + return TEST_FAIL; + } + } + + error = clEnqueueSVMUnmap(queue, newBuffer, 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMUnmap failed for newBuffer"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Clean up + + clSVMFree(context, initWrapper); + clSVMFree(context, initBuffer); + clSVMFree(context, newWrapper); + clSVMFree(context, newBuffer); + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + + +int test_mutable_dispatch_local_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_global_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} + +int test_mutable_dispatch_pod_arguments(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_null_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_svm_arguments(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 3558401b..578dda50 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,6 +59,26 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_global_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_local_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_pod_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_null_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_svm_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); extern int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context, cl_command_queue queue, -- cgit v1.2.3 From 6c841fbad817439fa37037571c42596cbb2e406f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 11 Jul 2023 22:52:16 +0200 Subject: Add image arguments tests for cl_khr_command_buffer_mutable_dispatch. (#1750) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add image arguments tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Add fixes for MutableDispatchImage1DArguments and MutableDispatchImage2DArguments. Signed-off-by: Paweł Jastrzębski * Fix constant magic number. Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Fix condition for result check. Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Fix kernel parameters - Fix indentation - Add skip condition for CL_DEVICE_IMAGE_SUPPORT Signed-off-by: Paweł Jastrzębski * Change array data type. Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Fix skip conditions - Fix kernel arguments update - Change random generated values for dst_image Signed-off-by: Paweł Jastrzębski * Fix review comments. Signed-off-by: Paweł Jastrzębski * Remove unneeded headers. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 2 + .../mutable_command_global_offset.cpp | 9 - .../mutable_command_image_arguments.cpp | 427 +++++++++++++++++++++ .../cl_khr_command_buffer_mutable_dispatch/procs.h | 8 + 5 files changed, 438 insertions(+), 9 deletions(-) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index ecdb9a3b..0d4dd039 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_image_arguments.cpp mutable_command_arguments.cpp mutable_command_out_of_order.cpp mutable_command_global_size.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index cccd58ce..a2fae497 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,8 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_image_1d_arguments), + ADD_TEST(mutable_dispatch_image_2d_arguments), ADD_TEST(mutable_dispatch_out_of_order), ADD_TEST(mutable_dispatch_simultaneous_out_of_order), ADD_TEST(mutable_dispatch_global_size), diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp index 70e1d9b1..80bc015a 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp @@ -15,16 +15,7 @@ // #include -#include "typeWrappers.h" -#include "procs.h" -#include "testHarness.h" #include "imageHelpers.h" -#include -#include -#include -#include -#include -#include #include "mutable_command_basic.h" #include diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp new file mode 100644 index 00000000..b1ce25ec --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp @@ -0,0 +1,427 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases for +// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR: +// - image arguments + +struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchImage1DArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return CL_SUCCESS; + } + + bool Skip() override + { + cl_bool image_support; + + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(image_support), &image_support, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + return (!mutable_support || !image_support) + || BasicMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"(__kernel void sample_test( read_only image1d_t source, sampler_t + sampler, write_only image1d_t dest) + { + int offset = get_global_id(0); + + int4 color = read_imagei( source, sampler, offset ); + + write_imagei( dest, offset, color ); + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE1D; + image_desc.image_width = 4; + image_desc.image_row_pitch = 0; + image_desc.num_mip_levels = 0; + + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; + + image_descriptor imageInfo = { 0 }; + imageInfo.type = CL_MEM_OBJECT_IMAGE1D; + imageInfo.format = &formats; + imageInfo.width = 4; + + BufferOwningPtr imageValues_input, imageValues_output, outputData; + MTdataHolder d(gRandomSeed); + generate_random_image_data(&imageInfo, imageValues_input, d); + generate_random_image_data(&imageInfo, imageValues_output, d); + generate_random_image_data(&imageInfo, outputData, d); + + char *host_ptr_input = (char *)imageValues_input; + char *host_ptr_output = (char *)imageValues_output; + + clMemWrapper src_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_input, nullptr, &error); + test_error(error, "create_image_1d failed"); + + clMemWrapper dst_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_output, nullptr, &error); + test_error(error, "create_image_2d failed"); + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image); + test_error(error, "Unable to set indexed kernel arguments"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + size_t globalDim[3] = { 4, 1, 1 }, localDim[3] = { 1, 1, 1 }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, globalDim, + localDim, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + clMemWrapper new_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_output, nullptr, &error); + test_error(error, "create_image_1d failed"); + + cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image }; + cl_mutable_dispatch_arg_khr args[] = { arg_2 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { image_desc.image_width, 1, 1 }; + + error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0, + 0, outputData, 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < imageInfo.width; ++i) + { + if (imageValues_input[i] != outputData[i]) + { + log_error("Data failed to verify: imageValues[%d]=%d != " + "outputData[%d]=%d\n", + i, imageValues_input[i], i, outputData[i]); + + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchImage2DArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return CL_SUCCESS; + } + + bool Skip() override + { + cl_bool image_support; + + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(image_support), &image_support, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + return (!mutable_support || !image_support) + || BasicMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + + const char *sample_const_arg_kernel = + R"(__kernel void sample_test( read_only image2d_t source, sampler_t + sampler, write_only image2d_t dest) + { + int x = get_global_id(0); + int y = get_global_id(1); + + int4 color = read_imagei( source, sampler, (int2) (x, y) ); + + write_imagei( dest, (int2) (x, y), color ); + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; + image_desc.image_width = 4; + image_desc.image_height = 4; + image_desc.image_row_pitch = 0; + image_desc.num_mip_levels = 0; + + size_t data_size = + image_desc.image_width * image_desc.image_height * sizeof(cl_int); + + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; + + image_descriptor imageInfo = { 0 }; + imageInfo.type = CL_MEM_OBJECT_IMAGE2D; + imageInfo.width = 4; + imageInfo.height = 4; + imageInfo.format = &formats; + + BufferOwningPtr imageValues_input, imageValues_output; + + MTdataHolder d(gRandomSeed); + generate_random_image_data(&imageInfo, imageValues_input, d); + generate_random_image_data(&imageInfo, imageValues_output, d); + + char *host_ptr_input = (char *)imageValues_input; + char *host_ptr_output = (char *)imageValues_output; + std::vector outputData(data_size); + + clMemWrapper src_image = + create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &formats, image_desc.image_width, + image_desc.image_height, 0, host_ptr_input, &error); + test_error(error, "create_image_2d failed"); + + clMemWrapper dst_image = create_image_2d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, image_desc.image_height, 0, host_ptr_output, + &error); + test_error(error, "create_image_2d failed"); + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image); + test_error(error, "Unable to set indexed kernel arguments"); + + size_t globalDim[3] = { 4, 4, 1 }, localDim[3] = { 1, 1, 1 }; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, globalDim, + localDim, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + clMemWrapper new_image = create_image_2d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, image_desc.image_height, 0, + imageValues_output, &error); + test_error(error, "create_image_2d failed"); + + cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image }; + cl_mutable_dispatch_arg_khr args[] = { arg_2 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { image_desc.image_width, image_desc.image_height, + 1 }; + + error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0, + 0, outputData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < imageInfo.width * imageInfo.height; ++i) + { + if (imageValues_input[i] != outputData[i]) + { + log_error("Data failed to verify: imageValues[%d]=%d != " + "outputData[%d]=%d\n", + i, imageValues_input[i], i, outputData[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_image_1d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} + +int test_mutable_dispatch_image_2d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 578dda50..1db48917 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,6 +59,14 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_image_1d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_image_2d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); extern int test_mutable_dispatch_global_arguments(cl_device_id device, cl_context context, cl_command_queue queue, -- cgit v1.2.3 From b32d566bca446548c07b53a069ce28e6f95282e3 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 13 Jul 2023 17:17:16 +0100 Subject: conversions: fix undefined behaviour in DataInfoSpec For conversion from integers to float, the DataInfoSpec constructor tries to convert `CL_FLT_MAX` to an integer. The float value cannot be represented as an integer, which is undefined behaviour. Fix by only doing this conversion when `InType` is a floating point value. While at it, use `static_cast` for the conversions. Signed-off-by: Sven van Haastregt --- test_conformance/conversions/conversions_data_info.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index b02773b1..8440c2c7 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -150,13 +150,13 @@ DataInfoSpec::DataInfoSpec(const DataInitInfo &agg) else if (std::is_same::value) ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX); - InType outMin = ((InType)ranges.first); - InType outMax = ((InType)ranges.second); - // clang-format off // for readability sake keep this section unformatted if (std::is_floating_point::value) { // from float/double + InType outMin = static_cast(ranges.first); + InType outMax = static_cast(ranges.second); + InType eps = std::is_same::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON; if (std::is_integral::value) { // to char/uchar/short/ushort/int/uint/long/ulong -- cgit v1.2.3 From 19bddc90e4c56663686c1416cb148386026cd167 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 14 Jul 2023 13:41:49 +0100 Subject: conversions: restore optimization workaround The refactoring of the conversions test dropped the workaround added by 59a12047a ("Fix for test_conversions failure with Clang build on Linux #1057 (#1062)", 2021-05-11). Signed-off-by: Sven van Haastregt --- test_conformance/conversions/conversions_data_info.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index 8440c2c7..4f46a24e 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -408,7 +408,9 @@ void DataInfoSpec::conv(OutType *out, InType *in) // always convert to +0.0 } #else - *out = (*in == 0 ? 0.0 : (OutType)*in); + // Use volatile to prevent optimization by Clang compiler + volatile InType vi = *in; + *out = (vi == 0 ? 0.0 : static_cast(vi)); #endif } else if (std::is_same::value) @@ -467,12 +469,21 @@ void DataInfoSpec::conv(OutType *out, InType *in) else { if (std::is_same::value) - *out = (*in == 0 ? 0.f : *in); // Per IEEE-754-2008 5.4.1, 0's - // always convert to +0.0 + { + // Use volatile to prevent optimization by Clang compiler + volatile InType vi = *in; + // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 + *out = (vi == 0 ? 0.0f : vi); + } else if (std::is_same::value) + { + // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 *out = (*in == 0 ? 0.0 : *in); + } else + { *out = (OutType)*in; + } } } -- cgit v1.2.3 From 3c1f2814b87a7d54c17954c6937df3097f54d274 Mon Sep 17 00:00:00 2001 From: Nora <92854373+norablackcat@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:07:17 -0600 Subject: add cl_khr_expect_assume to test_compiler_defines_for_extensions (#1735) --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index d53af8dc..89626b79 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -76,6 +76,7 @@ const char *known_extensions[] = { "cl_khr_device_uuid", "cl_khr_pci_bus_info", "cl_khr_suggested_local_work_size", + "cl_khr_expect_assume", "cl_khr_spirv_linkonce_odr", "cl_khr_semaphore", "cl_khr_external_semaphore", -- cgit v1.2.3 From c3babfeebbc7eb09399331ca17bb89da2b77c777 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 19 Jul 2023 09:48:59 +0100 Subject: conversions: fix undefined behaviour from shift by 64 (#1788) Avoid a shift by 64 on a `uint64_t`. The value resulting from the spurious shift was overwritten later, so just avoid the shift in that case. Signed-off-by: Sven van Haastregt --- test_conformance/conversions/basic_test_conversions.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 43fb449b..1020638a 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -538,7 +538,6 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, cl_ulong wall_start = mach_absolute_time(); #endif - uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); cl_uint threads = GetThreadCount(); DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; @@ -601,7 +600,9 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, // Figure out how many elements are in a work block // we handle 64-bit types a bit differently. - if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; + uint64_t lastCase = (8 * gTypeSizes[inType] > 32) + ? 0x100000000ULL + : 1ULL << (8 * gTypeSizes[inType]); if (!gWimpyMode && gIsEmbedded) step = blockCount * EMBEDDED_REDUCTION_FACTOR; -- cgit v1.2.3 From 3a1daafd5a0dc7104db6df39020edf7c2688292e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 19 Jul 2023 13:47:18 +0100 Subject: harness: add missing stdexcept header (#1783) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Required for std::runtime_error. Signed-off-by: Kévin Petit --- test_common/harness/stringHelpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h index a02624d6..e1275f10 100644 --- a/test_common/harness/stringHelpers.h +++ b/test_common/harness/stringHelpers.h @@ -18,6 +18,7 @@ #define STRING_HELPERS_H #include +#include #include inline std::string concat_kernel(const char *sstr[], int num) -- cgit v1.2.3 From cd5c1659469862b9a927f88e6e8d0112e774620f Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Wed, 19 Jul 2023 13:51:30 +0100 Subject: Deduplicate test_barrier (#1542) Merge test_barrier and test_wg_barrier. Reformat using clang-format kernel source code. Signed-off-by: John Kesapides --- test_conformance/basic/CMakeLists.txt | 2 - test_conformance/basic/test_barrier.cpp | 189 ++++++++++++++--------------- test_conformance/basic/test_wg_barrier.cpp | 159 ------------------------ 3 files changed, 91 insertions(+), 259 deletions(-) delete mode 100644 test_conformance/basic/test_wg_barrier.cpp diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 47c1c980..9dcf1d5a 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -52,14 +52,12 @@ set(${MODULE_NAME}_SOURCES test_kernel_call_kernel_function.cpp test_local_kernel_scope.cpp test_progvar.cpp - test_wg_barrier.cpp test_global_linear_id.cpp test_local_linear_id.cpp test_enqueued_local_size.cpp test_simple_image_pitch.cpp test_get_linear_ids.cpp test_rw_image_access_qualifier.cpp - test_wg_barrier.cpp test_enqueued_local_size.cpp test_global_linear_id.cpp test_local_linear_id.cpp diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp index d20af14a..6352b42f 100644 --- a/test_conformance/basic/test_barrier.cpp +++ b/test_conformance/basic/test_barrier.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,143 +21,136 @@ #include #include +#include +#include +#include #include "procs.h" -const char *barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *outptr, int n) +namespace { +const char *barrier_kernel_code = R"( +__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, + __global int *sum) { - int r = 0; - int i; + int tid = get_local_id(0); + int lsize = get_local_size(0); + int i; - for (i=0; i 1; i = hadd(i, 1)) { - log_error("BARRIER test failed\n"); - return -1; + BARRIER(CLK_GLOBAL_MEM_FENCE); + if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i]; + lsize = i; } - log_info("BARRIER test passed\n"); - return 0; + // no barrier is required here because last person to write to tmp_sum[0] + // was tid 0 + if (tid == 0) *sum = tmp_sum[0]; } +)"; -int -test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +void generate_random_inputs(std::vector &v) { - cl_mem streams[3]; - cl_int *input_ptr = NULL, *output_ptr = NULL; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - size_t local_threads[3]; - int err; - int i; - size_t max_local_workgroup_size[3]; - size_t max_threadgroup_size = 0; - MTdata d; + RandomSeed seed(gRandomSeed); - err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" ); - test_error(err, "Failed to build kernel/program."); + auto random_generator = [&seed]() { + return static_cast( + get_random_float(-0x01000000, 0x01000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); +} + +int test_barrier_common(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + std::string barrier_str) +{ + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_threadgroup_size), &max_threadgroup_size, NULL); - test_error(err, "clGetKernelWorkgroupInfo failed."); + cl_int output; + int err; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); - test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + size_t max_threadgroup_size = 0; + std::string build_options = std::string("-DBARRIER=") + barrier_str; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &barrier_kernel_code, "compute_sum", + build_options.c_str()); + test_error(err, "Failed to build kernel/program."); - // Pick the minimum of the device and the kernel - if (max_threadgroup_size > max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, + &max_threadgroup_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed."); // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; + while (num_elements % max_threadgroup_size) max_threadgroup_size--; - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); + std::vector input(num_elements); streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); + sizeof(cl_int) * num_elements, nullptr, &err); test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), + nullptr, &err); test_error(err, "clCreateBuffer failed."); streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); + sizeof(cl_int) * max_threadgroup_size, nullptr, &err); test_error(err, "clCreateBuffer failed."); - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include -#include - - -#include "procs.h" - -const char *wg_barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *tmpptr, int *outptr, int n) -{ - int i; - int reference = 0; - - for (i=0; i max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; - - // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; - - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[2] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); - test_error(err, "clCreateBuffer failed."); - - d = init_genrand( gRandomSeed ); - for (i=0; i Date: Wed, 19 Jul 2023 14:18:49 +0100 Subject: Use the CTS typewrappers in image_r8 (#1539) Signed-off-by: John Kesapides --- test_conformance/basic/test_image_r8.cpp | 196 ++++++++++++------------------- 1 file changed, 72 insertions(+), 124 deletions(-) diff --git a/test_conformance/basic/test_image_r8.cpp b/test_conformance/basic/test_image_r8.cpp index b633d6ab..2dca1611 100644 --- a/test_conformance/basic/test_image_r8.cpp +++ b/test_conformance/basic/test_image_r8.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,163 +21,111 @@ #include #include +#include +#include #include "procs.h" -static const char *r_uint8_kernel_code = -"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n" -"{\n" -" int tid_x = get_global_id(0);\n" -" int tid_y = get_global_id(1);\n" -" int indx = tid_y * get_image_width(srcimg) + tid_x;\n" -" uint4 color;\n" -"\n" -" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n" -" dst[indx] = (unsigned char)(color.x);\n" -"\n" -"}\n"; - - -static unsigned char * -generate_8bit_image(int w, int h, MTdata d) +namespace { +const char *r_uint8_kernel_code = R"( +__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler) { - unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char)); - int i; + int tid_x = get_global_id(0); + int tid_y = get_global_id(1); + int indx = tid_y * get_image_width(srcimg) + tid_x; + uint4 color; - for (i=0; i &v) { - int i; + RandomSeed seed(gRandomSeed); - for (i=0; i(genrand_int32(seed)); + }; - log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n"); - return 0; + std::generate(v.begin(), v.end(), random_generator); } -int -test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +} +int test_image_r8(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - cl_mem streams[2]; - cl_image_format img_format; - cl_uchar *input_ptr, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[3]; - int img_width = 512; - int img_height = 512; - int err; - MTdata d; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - - img_format.image_channel_order = CL_R; - img_format.image_channel_data_type = CL_UNSIGNED_INT8; + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + const size_t img_width = 512; + const size_t img_height = 512; + const size_t length = img_width * img_height; + int err; + + PASSIVE_REQUIRE_IMAGE_SUPPORT(device) + + const cl_image_format img_format = { CL_R, CL_UNSIGNED_INT8 }; // early out if this image type is not supported if (!is_image_format_supported(context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &img_format)) { log_info("WARNING: Image type not supported; skipping test.\n"); - return 0; + return TEST_SKIPPED_ITSELF; } - d = init_genrand( gRandomSeed ); - input_ptr = generate_8bit_image(img_width, img_height, d); - free_mtdata(d); d = NULL; + std::vector input(length); + std::vector output(length); + + generate_random_inputs(input); - output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height); streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &img_format, - img_width, img_height, 0, NULL, NULL); - if (!streams[0]) - { - log_error("create_image_2d failed\n"); - return -1; - } + img_width, img_height, 0, nullptr, &err); + test_error(err, "create_image_2d failed."); streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uchar) * img_width * img_height, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err); + test_error(err, "clCreateBuffer failed."); - size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1}; - err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, - origin, region, 0, 0, - input_ptr, - 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clWriteImage failed: %d\n", err); - return -1; - } + const size_t origin[3] = { 0, 0, 0 }, + region[3] = { img_width, img_height, 1 }; + err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, + input.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteImage failed."); - err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" ); - if (err) { - log_error("Failed to create kernel and program: %d\n", err); - return -1; - } + err = create_single_kernel_helper(context, &program, &kernel, 1, + &r_uint8_kernel_code, "test_r_uint8"); + test_error(err, "create_single_kernel_helper failed."); - cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); - test_error(err, "clCreateSampler failed"); + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); + test_error(err, "clCreateSampler failed"); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); + test_error(err, "clSetKernelArgs failed\n"); + + size_t threads[] = { img_width, img_height }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed\n"); - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed: %d\n", err); - return -1; - } - threads[0] = (size_t)img_width; - threads[1] = (size_t)img_height; - err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, + output.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed\n"); + + if (0 != memcmp(input.data(), output.data(), length)) { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; + log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n"); + err = -1; } - - err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) + else { - log_error("clEnqueueReadBuffer failed\n"); - return -1; + log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n"); } - err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height); - - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseSampler(sampler); - free(input_ptr); - free(output_ptr); - return err; } - - - - - -- cgit v1.2.3 From 25ce398037f2be72069b8118502fbfc35bc0b4e3 Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Thu, 20 Jul 2023 16:13:42 +0100 Subject: Use CTS type wrappers for test_constant. (#1543) Signed-off-by: John Kesapides --- test_conformance/basic/test_constant.cpp | 351 ++++++++++++++----------------- 1 file changed, 156 insertions(+), 195 deletions(-) diff --git a/test_conformance/basic/test_constant.cpp b/test_conformance/basic/test_constant.cpp index ed25c6ef..fc2667ee 100644 --- a/test_conformance/basic/test_constant.cpp +++ b/test_conformance/basic/test_constant.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,41 +21,44 @@ #include #include +#include +#include #include "procs.h" -const char *constant_kernel_code = -"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" float ftmp = tmpF[tid]; \n" -" float Itmp = tmpI[tid]; \n" -" out[tid] = ftmp * Itmp; \n" -"}\n"; - -const char *loop_constant_kernel_code = -"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n" -"{\n" -" int tid = get_global_id(0);\n" -" float sum = 0;\n" -" for (int i = 0; i < num; i++) {\n" -" float pos = i_pos[i*3];\n" -" sum += pos;\n" -" }\n" -" out[tid] = sum;\n" -"}\n"; - - -static int -verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) +namespace { +const char* constant_kernel_code = R"( +__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI) +{ + int tid = get_global_id(0); + + float ftmp = tmpF[tid]; + float Itmp = tmpI[tid]; + out[tid] = ftmp * Itmp; +} +)"; + +const char* loop_constant_kernel_code = R"( +kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num) { - int i; + int tid = get_global_id(0); + float sum = 0; + for (int i = 0; i < num; i++) { + float pos = i_pos[i*3]; + sum += pos; + } + out[tid] = sum; +} +)"; + - for (i=0; i < n; i++) +int verify(std::vector& tmpF, std::vector& tmpI, + std::vector& out) +{ + for (int i = 0; i < out.size(); i++) { float f = tmpF[i] * tmpI[i]; - if( out[i] != f ) + if (out[i] != f) { log_error("CONSTANT test failed\n"); return -1; @@ -66,214 +69,172 @@ verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) return 0; } - -static int -verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n) +int verify_loop_constant(const std::vector& tmp, + std::vector& out, cl_int l) { - int i; - cl_int j; - for (i=0; i < n; i++) - { - float sum = 0; - for (j=0; j < l; ++j) - sum += tmp[j*3]; + float sum = 0; + for (int j = 0; j < l; ++j) sum += tmp[j * 3]; - if( out[i] != sum ) - { - log_error("loop CONSTANT test failed\n"); - return -1; - } + auto predicate = [&sum](cl_float elem) { return sum != elem; }; + + if (std::any_of(out.cbegin(), out.cend(), predicate)) + { + log_error("loop CONSTANT test failed\n"); + return -1; } log_info("loop CONSTANT test passed\n"); return 0; } -int -test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +template void generate_random_inputs(std::vector& v) +{ + RandomSeed seed(gRandomSeed); + + auto random_generator = [&seed]() { + return static_cast(get_random_float(-0x02000000, 0x02000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); +} +} + +int test_constant(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - cl_mem streams[3]; - cl_int *tmpI; - cl_float *tmpF, *out; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - int err; - unsigned int i; + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t global_threads[3]; + int err; cl_ulong maxSize, maxGlobalSize, maxAllocSize; size_t num_floats, num_ints, constant_values; - MTdata d; - RoundingMode oldRoundMode; + RoundingMode oldRoundMode; int isRTZ = 0; - /* Verify our test buffer won't be bigger than allowed */ - err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( err, "Unable to get max constant buffer size" ); - - log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize); - - // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE - err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); - test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - - if (maxSize > maxGlobalSize / 4) - maxSize = maxGlobalSize / 4; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); - test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); - - if (maxSize > maxAllocSize) - maxSize = maxAllocSize; - - maxSize/=4; - num_ints = (size_t)maxSize/sizeof(cl_int); - num_floats = (size_t)maxSize/sizeof(cl_float); - if (num_ints >= num_floats) { - constant_values = num_floats; - } else { - constant_values = num_ints; - } - - log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n", - constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float)); - - tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values); - tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values); - out = (cl_float*)malloc(sizeof(cl_float) * constant_values); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * constant_values, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + /* Verify our test buffer won't be bigger than allowed */ + err = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(err, "Unable to get max constant buffer size"); + log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", + maxSize); - d = init_genrand( gRandomSeed ); - for (i=0; i(maxSize / sizeof(cl_int)); + num_floats = static_cast(maxSize / sizeof(cl_float)); + constant_values = std::min(num_floats, num_ints); + + + log_info( + "Test will attempt to use %lu bytes with one %lu byte constant int " + "buffer and one %lu byte constant float buffer.\n", + constant_values * sizeof(cl_int) + constant_values * sizeof(cl_float), + constant_values * sizeof(cl_int), constant_values * sizeof(cl_float)); + + std::vector tmpI(constant_values); + std::vector tmpF(constant_values); + std::vector out(constant_values); + + + streams[0] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); - err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" ); - if (err) { - log_error("Failed to create kernel and program: %d\n", err); - return -1; - } + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); + + streams[2] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_int) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); + + generate_random_inputs(tmpI); + generate_random_inputs(tmpF); + + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(cl_float) * constant_values, tmpF.data(), + 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed"); + err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, + sizeof(cl_int) * constant_values, tmpI.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer faile."); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &constant_kernel_code, "constant_kernel"); + test_error(err, "Failed to create kernel and program"); err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } + test_error(err, "clSetKernelArgs failed"); global_threads[0] = constant_values; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed: %d\n", err); - return -1; - } - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); + + err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_float) * constant_values, out.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); - //If we only support rtz mode - if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded) + // If we only support rtz mode + if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded) { oldRoundMode = set_round(kRoundTowardZero, kfloat); isRTZ = 1; } - err = verify(tmpF, tmpI, out, (int)constant_values); + err = verify(tmpF, tmpI, out); - if (isRTZ) - (void)set_round(oldRoundMode, kfloat); + if (isRTZ) (void)set_round(oldRoundMode, kfloat); // Loop constant buffer test - cl_program loop_program; - cl_kernel loop_kernel; + clProgramWrapper loop_program; + clKernelWrapper loop_kernel; cl_int limit = 2; - memset(out, 0, sizeof(cl_float) * constant_values); + memset(out.data(), 0, sizeof(cl_float) * constant_values); err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1, - &loop_constant_kernel_code, "loop_constant_kernel" ); - if (err) { - log_error("Failed to create loop kernel and program: %d\n", err); - return -1; - } + &loop_constant_kernel_code, + "loop_constant_kernel"); + test_error(err, "Failed to create kernel and program"); err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]); err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit); - if (err != CL_SUCCESS) { - log_error("clSetKernelArgs for loop kernel failed\n"); - return -1; - } + test_error(err, "clSetKernelArgs failed"); - err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed: %d\n", err); - return -1; - } - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL ); - if (err != CL_SUCCESS) { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueNDRangeKernel(queue, loop_kernel, 1, nullptr, global_threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); - err = verify_loop_constant(tmpF, out, limit, (int)constant_values); + err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_float) * constant_values, out.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + err = verify_loop_constant(tmpF, out, limit); - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseKernel(loop_kernel); - clReleaseProgram(loop_program); - free(tmpI); - free(tmpF); - free(out); return err; } - - - - - -- cgit v1.2.3 From 0460756f6eab686572b284ec7a96977b507126eb Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Sun, 23 Jul 2023 09:36:23 +0100 Subject: basic/int2fp: fix missing include (#1789) With GCC 13 some headers are no longer included transitively through C++ Standard Library headers. Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_int2fp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp index 8b1203a7..dd5cc9a1 100644 --- a/test_conformance/basic/test_int2fp.cpp +++ b/test_conformance/basic/test_int2fp.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From a01349c44e71453dc45e14507de9724521841dc9 Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Thu, 27 Jul 2023 14:00:03 +0100 Subject: Use CTS type wrappers for test_basic test_loop (#1541) * Use CTS type wrappers for test_basic test_loop * Move variable declaration to first use in verify_loop Signed-off-by: John Kesapides --- test_conformance/basic/test_loop.cpp | 210 +++++++++++++---------------------- 1 file changed, 80 insertions(+), 130 deletions(-) diff --git a/test_conformance/basic/test_loop.cpp b/test_conformance/basic/test_loop.cpp index 1a91d9e4..1c9acd1a 100644 --- a/test_conformance/basic/test_loop.cpp +++ b/test_conformance/basic/test_loop.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,45 +21,45 @@ #include #include +#include #include "procs.h" -const char *loop_kernel_code = -"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -" int n = get_global_size(0);\n" -" int i, j;\n" -"\n" -" dst[tid] = 0;\n" -" for (i=0,j=loopindx[tid]; i= n)\n" -" j = 0;\n" -" dst[tid] += src[j];\n" -" }\n" -"\n" -"}\n"; - - -int -verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) +namespace { +const char *loop_kernel_code = R"( +__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst) { - int r, i, j, k; + int tid = get_global_id(0); + int n = get_global_size(0); + int i, j; - for (i=0; i= n) + j = 0; + dst[tid] += src[j]; + } +} +)"; + + +int verify_loop(std::vector inptr, std::vector loopindx, + std::vector loopcnt, std::vector outptr, int n) +{ + for (int i = 0; i < n; i++) + { + int r = 0; + for (int j = 0, k = loopindx[i]; j < loopcnt[i]; j++, k++) { - if (k >= n) - k = 0; + if (k >= n) k = 0; r += inptr[k]; } if (r != outptr[i]) { - log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r); + log_error("LOOP test failed: %d found, expected %d\n", outptr[i], + r); return -1; } } @@ -67,119 +67,69 @@ verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) log_info("LOOP test passed\n"); return 0; } - -int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +} +int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) { - cl_mem streams[4]; - cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - int err, i; + clMemWrapper streams[4]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; size_t length = sizeof(cl_int) * num_elements; - input_ptr = (cl_int*)malloc(length); - loop_indx = (cl_int*)malloc(length); - loop_cnt = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); + std::vector input(length); + std::vector loop_indx(length); + std::vector loop_cnt(length); + std::vector output(length); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[1]) + for (auto &stream : streams) { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; + stream = + clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err); + test_error(err, "clCreateBuffer failed."); } - MTdata d = init_genrand( gRandomSeed ); - for (i=0; i(genrand_int32(seed)); + loop_indx[i] = + static_cast(get_random_float(0, num_elements - 1, seed)); + loop_cnt[i] = + static_cast(get_random_float(0, num_elements / 32, seed)); + }; + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, + input.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, + loop_indx.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, + loop_cnt.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &loop_kernel_code, "test_loop"); + test_error(err, "create_single_kernel_helper failed."); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) { - log_error("clSetKernelArgs failed\n"); - return -1; + err = clSetKernelArg(kernel, i, sizeof streams[i], &streams[i]); + test_error(err, "clSetKernelArgs failed\n"); } - threads[0] = (unsigned int)num_elements; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clReadArray failed\n"); - return -1; - } - - err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(loop_indx); - free(loop_cnt); - free(output_ptr); + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed\n"); - return err; -} + err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, + output.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed\n"); + + err = verify_loop(input, loop_indx, loop_cnt, output, num_elements); + return err; +} -- cgit v1.2.3 From e29d0fd3a1bceefe9c34bd7361a9a2a91971b0d6 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 1 Aug 2023 09:49:43 +0100 Subject: conversions: fix memory leak from excess gMTdata initialization (#1787) `gMTdata` was initialized twice, but freed only once. Drop the first initialization with a local seed, and initialize with `gRandomSeed` instead. Signed-off-by: Sven van Haastregt --- test_conformance/conversions/test_conversions.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index a8be2098..dab61dc5 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -118,7 +118,6 @@ const int test_num = ARRAY_SIZE(test_list); int main(int argc, const char **argv) { int error; - cl_uint seed = (cl_uint)time(NULL); argc = parseCustomParam(argc, argv); if (argc == -1) @@ -145,8 +144,8 @@ int main(int argc, const char **argv) #endif vlog("===========================================================\n"); - vlog("Random seed: %u\n", seed); - gMTdata = init_genrand(seed); + vlog("Random seed: %u\n", gRandomSeed); + gMTdata = init_genrand(gRandomSeed); const char *arg[] = { argv[0] }; int ret = @@ -475,8 +474,6 @@ test_status InitCL(cl_device_id device) } } - gMTdata = init_genrand(gRandomSeed); - char c[1024]; static const char *no_yes[] = { "NO", "YES" }; vlog("\nCompute Device info:\n"); -- cgit v1.2.3 From aa23f345c3540f48d9c0380a0cd90e9d41bb131a Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 1 Aug 2023 12:03:00 -0600 Subject: Add testing for sync_fd (#1747) Modify the external semaphore extension test to use SYNC_FD, if available on the device. Deleted tests that are not compatible with blocking semaphores. --- .../vulkan_wrapper/opencl_vulkan_wrapper.cpp | 57 ++- .../common/vulkan_wrapper/vulkan_utility.cpp | 30 ++ .../common/vulkan_wrapper/vulkan_utility.hpp | 2 + .../common/vulkan_wrapper/vulkan_wrapper.cpp | 2 + .../common/vulkan_wrapper/vulkan_wrapper_types.hpp | 4 +- .../test_external_semaphore.cpp | 427 +-------------------- 6 files changed, 87 insertions(+), 435 deletions(-) diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp index 0a459e97..4e92e709 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp @@ -740,21 +740,42 @@ clExternalSemaphore::clExternalSemaphore( cl_int err = 0; cl_device_id devList[] = { deviceId, NULL }; -#ifdef _WIN32 - if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32")) - { - throw std::runtime_error("Device does not support " - "cl_khr_external_semaphore_win32 extension\n"); - } -#elif !defined(__APPLE__) - if (!is_extension_available(devList[0], - "cl_khr_external_semaphore_opaque_fd")) + switch (externalSemaphoreHandleType) { - throw std::runtime_error( - "Device does not support cl_khr_external_semaphore_opaque_fd " - "extension \n"); + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_opaque_fd")) + { + throw std::runtime_error("Device does not support " + "cl_khr_external_semaphore_opaque_fd " + "extension \n"); + } + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_win32")) + { + throw std::runtime_error( + "Device does not support " + "cl_khr_external_semaphore_win32 extension\n"); + } + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_sync_fd")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_semaphore_sync_fd " + "extension \n"); + } + break; + default: + throw std::runtime_error( + "Unsupported external semaphore handle type\n"); + break; } -#endif std::vector sema_props{ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, @@ -803,6 +824,16 @@ clExternalSemaphore::clExternalSemaphore( sema_props.push_back((cl_semaphore_properties_khr)handle); #endif break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + err = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_SYNC_FD_KHR); + sema_props.push_back(static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR)); + sema_props.push_back(static_cast( + CL_SEMAPHORE_HANDLE_SYNC_FD_KHR)); + sema_props.push_back(static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR)); + break; default: ASSERT(0); log_error("Unsupported external memory handle type\n"); diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp index 1a313cce..96c5adbc 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp @@ -248,6 +248,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList() } externalSemaphoreHandleTypeList.push_back( VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT); +#elif defined(__ANDROID__) + externalSemaphoreHandleTypeList.push_back( + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD); #else externalSemaphoreHandleTypeList.push_back( VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD); @@ -480,6 +483,33 @@ const std::vector getSupportedVulkanFormatList() return formatList; } +cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType( + VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType) +{ + cl_external_semaphore_handle_type_khr clExternalSemaphoreHandleTypeKhr = 0; + switch (vulkanExternalSemaphoreHandleType) + { + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + clExternalSemaphoreHandleTypeKhr = CL_SEMAPHORE_HANDLE_SYNC_FD_KHR; + break; + default: break; + } + return clExternalSemaphoreHandleTypeKhr; +} + uint32_t getVulkanFormatElementSize(VulkanFormat format) { switch (format) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp index 04f5a594..98913257 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp @@ -51,6 +51,8 @@ const std::vector getSupportedVulkanFormatList(); uint32_t getVulkanFormatElementSize(VulkanFormat format); const char* getVulkanFormatGLSLFormat(VulkanFormat format); const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format); +cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType( + VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType); std::string prepareVulkanShader( std::string shaderCode, diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp index 21d8f226..4d803be4 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp @@ -72,6 +72,8 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE) #if defined(_WIN32) || defined(_WIN64) const char *vulkanLoaderLibraryName = "vulkan-1.dll"; +#elif defined(__ANDROID__) + const char *vulkanLoaderLibraryName = "libvulkan.so"; #elif defined(__linux__) const char *vulkanLoaderLibraryName = "libvulkan.so.1"; #endif diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp index 2473a1d7..fcd19373 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp @@ -169,7 +169,9 @@ enum VulkanExternalSemaphoreHandleType VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR - | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR + | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR }; enum VulkanBufferUsage diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp index a7ed307e..89ab17b3 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp @@ -120,9 +120,11 @@ int test_external_semaphores_queries(cl_device_id deviceID, cl_context context, SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr, CL_SEMAPHORE_TYPE_BINARY_KHR); - SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_uint, 1); + SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_device_id, deviceID); - SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, 1); + SEMAPHORE_PARAM_TEST( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, + getCLSemaphoreTypeFromVulkanType(vkExternalSemaphoreHandleType)); // Confirm that querying CL_SEMAPHORE_CONTEXT_KHR returns the right context SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_CONTEXT_KHR, cl_context, context); @@ -290,7 +292,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID, nullptr, 0, nullptr, &wait_event); test_error(err, "Could not wait semaphore"); - // Finish queue_1 and queue_2 + // Finish queue_1 and queue_2 err = clFinish(queue_1); test_error(err, "Could not finish queue"); @@ -304,7 +306,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID, return TEST_PASS; } -// Confirm that a signal followed by a wait will complete successfully +// Confirm that a signal followed by a wait will complete successfully int test_external_semaphores_simple_1(cl_device_id deviceID, cl_context context, cl_command_queue defaultQueue, int num_elements) @@ -931,420 +933,3 @@ int test_external_semaphores_multi_wait(cl_device_id deviceID, return TEST_PASS; } - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first one deferred wait, then one non deferred signal. -int test_external_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Wait semaphore (dependency on user_event) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure signal event is completed while wait event is not - test_assert_event_complete(signal_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then one deferred wait. Unblock -// signal, then unblock wait. When wait completes, unblock the other signal. -int test_external_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_3, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for second signal - test_assert_event_complete(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_complete(wait_event); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then two deferred waits. Unblock -// one signal and one wait (both blocked by the same user event). When wait -// completes, unblock the other signal. Then unblock the other wait. -int test_external_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_1_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_3, &wait_1_event); - test_error(err, "Could not wait semaphore"); - - // Wait semaphore (dependency on user_event_2) - clEventWrapper wait_2_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, &wait_2_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure only second signal and second wait completed - cl_event event_list[] = { signal_2_event, wait_2_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait for events"); - - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(wait_1_event); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_1_event); - test_assert_event_complete(wait_2_event); - - return TEST_PASS; -} - -// Test that an invalid semaphore command results in the invalidation of the -// command's event and the dependencies' events -int test_external_semaphores_invalid_command(cl_device_id deviceID, - cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType); - VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context, - vkExternalSemaphoreHandleType, deviceID); - clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore_1 (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore_1 and semaphore_2 (dependency on user_event_1) - clEventWrapper wait_event; - cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(), - sema_ext_2.getCLSemaphore() }; - err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1, - &user_event_1, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore_1 (dependency on wait_event and user_event_2) - clEventWrapper signal_2_event; - cl_event wait_list[] = { user_event_2, wait_event }; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 2, wait_list, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are not completed - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event_1 (expect failure as waiting on semaphore_2 is not - // allowed (unsignaled) - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_assert_error(err != CL_SUCCESS, - "signal_2_event completed unexpectedly"); - - // Ensure signal_1 is completed while others failed (the second signal - // should fail as it depends on wait) - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - test_assert_event_complete(signal_1_event); - test_assert_event_terminated(wait_event); - test_assert_event_terminated(signal_2_event); - - return TEST_PASS; -} -- cgit v1.2.3 From ddf46ca9e8c4c0bf3d1ab0aaa693eab34df8f35b Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 7 Aug 2023 13:51:29 +0100 Subject: math_brute_force: always initialize oldMode (#1796) Avoid a maybe-uninitialized warning by ensuring that `oldMode` is always initialized to 0. There is no need to use `memset` for this, as `FPU_mode_type` is either an `int` or an `int64_t`. Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/unary_two_results_float.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 74c5a160..8d423408 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -189,12 +189,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) // Get that moving if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); - FPU_mode_type oldMode; + FPU_mode_type oldMode = 0; RoundingMode oldRoundMode = kRoundToNearestEven; if (isFract) { // Calculate the correctly rounded reference result - memset(&oldMode, 0, sizeof(oldMode)); if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device -- cgit v1.2.3 From df53e02a12453340f418c8544291e7b3c08e3aa5 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 17 Aug 2023 11:15:59 +0100 Subject: [command-buffer] Remove deleted enum (#1798) The enum value `CL_COMMAND_BUFFER_STATE_INVALID_KHR` no longer exists: * https://github.com/KhronosGroup/OpenCL-Headers/pull/235 * https://github.com/KhronosGroup/OpenCL-Docs/pull/885 Replace variable initialization using this enum with a value where all the bits are set, requiring the runtime to write to it for the test to pass. --- .../cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index 1ada904d..2ad77dbe 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -211,8 +211,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest // lambda to verify given state auto verify_state = [&](const cl_command_buffer_state_khr &expected) { - cl_command_buffer_state_khr state = - CL_COMMAND_BUFFER_STATE_INVALID_KHR; + cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0); cl_int error = clGetCommandBufferInfoKHR( command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), -- cgit v1.2.3 From 0702f2ecee4b250818a7fe289508ef9c54c1b48e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Sat, 19 Aug 2023 11:15:17 +0100 Subject: Make genrand_int32 thread safe (#1797) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initialisation code is clearly meant to be run once but the volatile flag did not guarantee that at all: - Volatile does not mean atomic and loading the flag vs. other writes was not safe. - Multiple threads could have loaded 0 and performed the initialisation resulting in write collisions. Rely on std::call_once to provide the guarantee. This issue was flagged by TSAN. Signed-off-by: Kévin Petit --- test_common/harness/mt19937.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp index f5665deb..2d503eb5 100644 --- a/test_common/harness/mt19937.cpp +++ b/test_common/harness/mt19937.cpp @@ -51,6 +51,7 @@ #include "harness/alloc.h" #ifdef __SSE2__ +#include #include #endif @@ -107,7 +108,7 @@ cl_uint genrand_int32(MTdata d) /* mag01[x] = x * MATRIX_A for x=0,1 */ static const cl_uint mag01[2] = { 0x0UL, MATRIX_A }; #ifdef __SSE2__ - static volatile int init = 0; + static std::once_flag init_flag; static union { __m128i v; cl_uint s[4]; @@ -123,8 +124,7 @@ cl_uint genrand_int32(MTdata d) int kk; #ifdef __SSE2__ - if (0 == init) - { + auto init_fn = []() { upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK; lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = @@ -134,8 +134,8 @@ cl_uint genrand_int32(MTdata d) MATRIX_A; c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL; c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL; - init = 1; - } + }; + std::call_once(init_flag, init_fn); #endif kk = 0; -- cgit v1.2.3 From ddbb0de4b9864b82e1012653b6faa06e1a80ec62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 22 Aug 2023 18:13:22 +0100 Subject: ci: use ubuntu-22.04 instead of 20.04 (#1795) * ci: use ubuntu-22.04 instead of 20.04 Signed-off-by: Kevin Petit * bump clang-format version Oldest supported by 22.04 * update check-format script --------- Signed-off-by: Kevin Petit --- .github/workflows/presubmit.yml | 12 ++++++------ check-format.sh | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 1ba63abd..635e4a7e 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -13,16 +13,16 @@ jobs: fail-fast: false matrix: mainmatrix: [true] - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest] include: - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: true gl: 1 extra: " gl" - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: false arch: arm - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: false arch: aarch64 debug: 1 @@ -55,10 +55,10 @@ jobs: run: ./presubmit.sh formatcheck: name: Check code format - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Install packages - run: sudo apt install -y clang-format clang-format-9 + run: sudo apt install -y clang-format clang-format-11 - uses: actions/checkout@v3 with: fetch-depth: 0 diff --git a/check-format.sh b/check-format.sh index be8f9d78..b5dc0a72 100755 --- a/check-format.sh +++ b/check-format.sh @@ -2,7 +2,7 @@ # Arg used to specify non-'origin/main' comparison branch ORIGIN_BRANCH=${1:-"origin/main"} -CLANG_BINARY=${2:-"`which clang-format-9`"} +CLANG_BINARY=${2:-"`which clang-format-11`"} # Run git-clang-format to check for violations CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY) -- cgit v1.2.3 From 46fde8d051759b6a04c3852cbf40f2e158479f85 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 29 Aug 2023 17:13:03 +0100 Subject: atomics: fix memory leaks on error paths (#1732) Before this change, `add_index_bin_test` would not release `cl_mem` resources or `malloc`ed memory when encountering an error. Fix by using `clMemWrapper` and `std::unique_ptr` to automatically release resources. Signed-off-by: Sven van Haastregt --- test_conformance/atomics/test_indexed_cases.cpp | 48 +++++++++++-------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp index 7da2dfa7..ce0410bc 100644 --- a/test_conformance/atomics/test_indexed_cases.cpp +++ b/test_conformance/atomics/test_indexed_cases.cpp @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. // + +#include + #include "testBase.h" #include "harness/conversions.h" @@ -226,13 +229,13 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, (int)global_threads[0], (int)local_threads[0]); // Allocate our storage - cl_mem bin_counters = + clMemWrapper bin_counters = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * number_of_bins, NULL, NULL); - cl_mem bins = clCreateBuffer( + clMemWrapper bins = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL); - cl_mem bin_assignments = + clMemWrapper bin_assignments = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * number_of_items, NULL, NULL); @@ -253,7 +256,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, } // Initialize our storage - cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + std::unique_ptr l_bin_counts(new cl_int[number_of_bins]); if (!l_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -263,8 +266,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, int i; for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0; err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, - sizeof(cl_int) * number_of_bins, l_bin_counts, 0, - NULL, NULL); + sizeof(cl_int) * number_of_bins, + l_bin_counts.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for " @@ -273,8 +276,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } - cl_int *values = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + std::unique_ptr values( + new cl_int[number_of_bins * max_counts_per_bin]); if (!values) { log_error( @@ -285,7 +288,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int) * number_of_bins * max_counts_per_bin, - values, 0, NULL, NULL); + values.get(), 0, NULL, NULL); if (err) { log_error( @@ -293,10 +296,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err); return -1; } - free(values); - cl_int *l_bin_assignments = - (cl_int *)malloc(sizeof(cl_int) * number_of_items); + std::unique_ptr l_bin_assignments(new cl_int[number_of_items]); if (!l_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -326,7 +327,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, } err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int) * number_of_items, - l_bin_assignments, 0, NULL, NULL); + l_bin_assignments.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for " @@ -355,8 +356,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } - cl_int *final_bin_assignments = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + std::unique_ptr final_bin_assignments( + new cl_int[number_of_bins * max_counts_per_bin]); if (!final_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -366,15 +367,14 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err = clEnqueueReadBuffer(queue, bins, true, 0, sizeof(cl_int) * number_of_bins * max_counts_per_bin, - final_bin_assignments, 0, NULL, NULL); + final_bin_assignments.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); return -1; } - cl_int *final_bin_counts = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + std::unique_ptr final_bin_counts(new cl_int[number_of_bins]); if (!final_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -382,8 +382,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } err = clEnqueueReadBuffer(queue, bin_counters, true, 0, - sizeof(cl_int) * number_of_bins, final_bin_counts, - 0, NULL, NULL); + sizeof(cl_int) * number_of_bins, + final_bin_counts.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", @@ -460,13 +460,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, errors++; } } - free(l_bin_counts); - free(l_bin_assignments); - free(final_bin_assignments); - free(final_bin_counts); - clReleaseMemObject(bin_counters); - clReleaseMemObject(bins); - clReleaseMemObject(bin_assignments); + if (errors == 0) { log_info("add_index_bin_test passed. Each item was put in the correct " -- cgit v1.2.3 From c23631c6904f4c789408e3263d062a225df0737a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 29 Aug 2023 17:14:23 +0100 Subject: subgroups: fix Wsign-compare warnings (#1778) The subgroup and workgroup sizes reported by clGetKernelSubGroupInfo and clGetKernelWorkGroupInfo are of type `size_t`. Avoid changing the values to an `int` type as they are propagated through the tests and then compared against `size_t` again. Signed-off-by: Sven van Haastregt --- test_conformance/subgroups/subgroup_common_templates.h | 15 ++++++++------- test_conformance/subgroups/subhelpers.cpp | 2 +- test_conformance/subgroups/subhelpers.h | 2 +- test_conformance/subgroups/test_workitem.cpp | 8 ++++---- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index f779ef37..d9dfc3b8 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -483,29 +483,30 @@ template struct SHF static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, const WorkGroupParams &test_params) { - int ii, i, j, k, n; + int ii, k; + size_t n; cl_uint l; - int nw = test_params.local_workgroup_size; - int ns = test_params.subgroup_size; + size_t nw = test_params.local_workgroup_size; + size_t ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - int nj = (nw + ns - 1) / ns; + size_t nj = (nw + ns - 1) / ns; Ty tr, rr; ng = ng / nw; for (k = 0; k < ng; ++k) { // for each work_group - for (j = 0; j < nw; ++j) + for (size_t j = 0; j < nw; ++j) { // inside the work_group mx[j] = x[j]; // read host inputs for work_group my[j] = y[j]; // read device outputs for work_group } - for (j = 0; j < nj; ++j) + for (size_t j = 0; j < nj; ++j) { // for each subgroup ii = j * ns; n = ii + ns > nw ? nw - ii : ns; - for (i = 0; i < n; ++i) + for (size_t i = 0; i < n; ++i) { // inside the subgroup // shuffle index storage int midx = 4 * ii + 4 * i + 2; diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp index 11268f64..440cde20 100644 --- a/test_conformance/subgroups/subhelpers.cpp +++ b/test_conformance/subgroups/subhelpers.cpp @@ -206,7 +206,7 @@ void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups, } void fill_and_shuffle_safe_values(std::vector &safe_values, - int sb_size) + size_t sb_size) { // max product is 720, cl_half has enough precision for it const std::vector non_one_values{ 2, 3, 4, 5, 6 }; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index bcb523cf..ed92e5d3 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -44,7 +44,7 @@ cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, // for each subgroup values defined different values // for rest of workitems set 1 shuffle values void fill_and_shuffle_safe_values(std::vector &safe_values, - int sb_size); + size_t sb_size); struct WorkGroupParams { diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index b69f3138..5b2a5eb8 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -36,7 +36,7 @@ struct get_test_data }; static int check_group(const get_test_data *result, int nw, cl_uint ensg, - int maxwgs) + size_t maxwgs) { int first = -1; int last = -1; @@ -168,7 +168,7 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg, j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5); - if (j < sizeof(hit) / 4) + if (j < static_cast(sizeof(hit) / 4)) { cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU); if ((hit[j] & b) != 0) @@ -191,7 +191,7 @@ int test_work_item_functions(cl_device_id device, cl_context context, static const size_t lsize = 200; int error; int i, j, k, q, r, nw; - int maxwgs; + size_t maxwgs; cl_uint ensg; size_t global; size_t local; @@ -235,7 +235,7 @@ int test_work_item_functions(cl_device_id device, cl_context context, error = get_max_allowed_work_group_size(context, kernel, &local, NULL); if (error != 0) return error; - maxwgs = (int)local; + maxwgs = local; // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple subgroups -- cgit v1.2.3 From a3262eb4b6b878bec2b0aeeedc7741f2c2e05641 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 29 Aug 2023 17:15:39 +0100 Subject: [Command-buffer] Test finalizing empty command-buffer (#1782) This patch adds two test cases related to command-buffer finalization: 1) That it is an error to finalize and already finalized command-buffer. See https://github.com/KhronosGroup/OpenCL-Docs/pull/817 2) That it is not an error to create, finalize, and execute an empty command-buffer. Closes issue #1781 --- .../cl_khr_command_buffer/CMakeLists.txt | 1 + .../command_buffer_finalize.cpp | 85 ++++++++++++++++++++++ .../extensions/cl_khr_command_buffer/main.cpp | 4 +- .../extensions/cl_khr_command_buffer/procs.h | 4 + 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 4b9968c3..098fb5be 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -14,6 +14,7 @@ set(${MODULE_NAME}_SOURCES command_buffer_test_copy.cpp command_buffer_test_barrier.cpp command_buffer_test_event_info.cpp + command_buffer_finalize.cpp ) include(../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp new file mode 100644 index 00000000..bd669165 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp @@ -0,0 +1,85 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "basic_command_buffer.h" +#include "procs.h" + +namespace { + +// Test that finalizing a command-buffer that has already been finalized returns +// the correct error code. +struct FinalizeInvalid : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Finalizing an already finalized command-buffer must return + // CL_INVALID_OPERATION + error = clFinalizeCommandBufferKHR(command_buffer); + test_failure_error_ret( + error, CL_INVALID_OPERATION, + "clFinalizeCommandBufferKHR should return CL_INVALID_OPERATION", + TEST_FAIL); + + return CL_SUCCESS; + } +}; + +// Check that an empty command-buffer can be finalized and then executed. +struct FinalizeEmpty : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + // Finalize an empty command-buffer + cl_int error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Execute empty command-buffer and then wait to complete + clEventWrapper event; + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clWaitForEvents(1, &event); + test_error(error, "clWaitForEvents failed"); + + return CL_SUCCESS; + } +}; +} // anonymous namespace + +int test_finalize_invalid(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_finalize_empty(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 35622827..3e923f6c 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -59,7 +59,9 @@ test_definition test_list[] = { ADD_TEST(event_info_command_queue), ADD_TEST(event_info_execution_status), ADD_TEST(event_info_context), - ADD_TEST(event_info_reference_count) + ADD_TEST(event_info_reference_count), + ADD_TEST(finalize_invalid), + ADD_TEST(finalize_empty) }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 5c4e67fe..cd839cbb 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -132,5 +132,9 @@ extern int test_event_info_reference_count(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_finalize_invalid(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_finalize_empty(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); #endif // CL_KHR_COMMAND_BUFFER_PROCS_H -- cgit v1.2.3 From bd548e63e694aae7dda7e9a680fab1ed24dff322 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 29 Aug 2023 09:16:53 -0700 Subject: fix a sporadic failure in the mutable_command_dimensions test (#1785) --- .../mutable_command_info.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index a8ed325a..61600dc9 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -140,7 +140,7 @@ struct PropertiesArray : public InfoMutableCommandBufferTest if (size != sizeof(props) || test_props[0] != props[0] || test_props[1] != props[1]) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect properties returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -181,7 +181,7 @@ struct Kernel : public InfoMutableCommandBufferTest // opaque object. if (test_kernel != kernel) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect kernel returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -210,8 +210,7 @@ struct Dimensions : public InfoMutableCommandBufferTest &global_work_size, nullptr, 0, nullptr, nullptr, &command); test_error(error, "clCommandNDRangeKernelKHR failed"); - size_t test_dimensions; - + cl_uint test_dimensions = 0; error = clGetMutableCommandInfoKHR( command, CL_MUTABLE_DISPATCH_DIMENSIONS_KHR, sizeof(test_dimensions), &test_dimensions, nullptr); @@ -219,7 +218,7 @@ struct Dimensions : public InfoMutableCommandBufferTest if (test_dimensions != dimensions) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect dimensions returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -330,7 +329,7 @@ struct InfoGlobalWorkOffset : public InfoMutableCommandBufferTest if (test_global_work_offset != global_work_offset) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong global work offset returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -368,7 +367,7 @@ struct InfoGlobalWorkSize : public InfoMutableCommandBufferTest if (test_global_work_size != global_work_size) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong global work size returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -405,7 +404,7 @@ struct InfoLocalWorkSize : public InfoMutableCommandBufferTest if (test_local_work_size != local_work_size) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong local work size returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } -- cgit v1.2.3 From 8fd55dc889a7e5dacd4c15a41e7703890310dacd Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Tue, 29 Aug 2023 17:18:11 +0100 Subject: Fix minimum image size for cl_ext_image_requirements_info (#1790) Some new extension formats require the width to be a multiple of 4 or 2 (see cl_ext_image_raw10_raw12). Change has not functional impact as the affected image is only used for generic queries. Signed-off-by: John Kesapides --- .../images/kernel_read_write/test_cl_ext_image_buffer.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp index 56d15808..887c9dca 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp @@ -48,8 +48,10 @@ static inline size_t get_format_size(cl_context context, cl_image_desc image_desc = { 0 }; image_desc.image_type = imageType; - /* Size 1 only to query element size */ - image_desc.image_width = 1; + /* We use a width of 4 to query element size, as this is + the smallest possible value that satisfies the requirements + of all image formats (including extensions). */ + image_desc.image_width = 4; if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType && CL_MEM_OBJECT_IMAGE1D != imageType) { -- cgit v1.2.3 From c511ac62b095a6d54a34f188aa7e87c62c2fa98a Mon Sep 17 00:00:00 2001 From: Vishal Patil <70944016+incognito1729@users.noreply.github.com> Date: Tue, 29 Aug 2023 21:49:45 +0530 Subject: fix memory leak from argList and from wrong ordering of free and return statement (#1792) --- test_conformance/allocations/main.cpp | 1 + test_conformance/api/test_queries.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp index 43e81277..827072fc 100644 --- a/test_conformance/allocations/main.cpp +++ b/test_conformance/allocations/main.cpp @@ -326,6 +326,7 @@ int main(int argc, const char *argv[]) else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 ) { printUsage( argv[0] ); + free(argList); return -1; } diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index fa5c227f..92ae1d7b 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -799,8 +799,8 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c test_error(error, "clFinish failed"); if (max_dimensions == 2) { - return 0; free(source); + return 0; } local[1]--; local[2]++; -- cgit v1.2.3 From 15b54aa0bdf8a0321e8a4c950843ff5ccb14a748 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 5 Sep 2023 10:04:38 -0600 Subject: External memory updates (#1676) * Vulkan: Fix descriptor sets Use descriptor set arrays when programming arrays for compute shader. Change-Id: Idabab775a256a223660eb7a850e26f290453659e * Vulkan: Fix queue propertyies Transfer bit for queue family is not required to be reported by the implementation, it is implicit for compute. Change-Id: I7424b00e25e35145433dd74b0b4dfe7eeeaf98c8 * Vulkan: Allow implementation to choose dedicated memory Dedicated vs non-dedicated memory must be queried by the app. Implementations are not required to support exportable non-dedicated memory. Change-Id: Idbc46ace1be20f61d1b58b34756f6d79a7745911 * Fix formatting Auto-generated formatting fix * Fix bug in dedicated memory. * Add check for if OpenCL assumes linear tiling Change-Id: Idd2e24d9d69e1fbc3ccb4a279067533104185332 * Changed macro name to reflect spec CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_HANDLE_TYPES_KHR to CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR Also changed some functions to not use the KHR variants. --------- Co-authored-by: Joshua Kelly --- .../vulkan_wrapper/opencl_vulkan_wrapper.cpp | 66 +++- .../vulkan_wrapper/opencl_vulkan_wrapper.hpp | 4 + .../common/vulkan_wrapper/vulkan_api_list.hpp | 8 +- .../common/vulkan_wrapper/vulkan_list_map.cpp | 23 +- .../common/vulkan_wrapper/vulkan_list_map.hpp | 8 +- .../common/vulkan_wrapper/vulkan_utility.cpp | 1 + .../common/vulkan_wrapper/vulkan_utility.hpp | 3 +- .../common/vulkan_wrapper/vulkan_wrapper.cpp | 241 +++++++++++++-- .../common/vulkan_wrapper/vulkan_wrapper.hpp | 20 +- test_conformance/vulkan/main.cpp | 5 - .../vulkan/test_vulkan_api_consistency.cpp | 30 +- .../vulkan/test_vulkan_interop_buffer.cpp | 43 +-- .../vulkan/test_vulkan_interop_image.cpp | 332 +++++++++------------ test_conformance/vulkan/vulkan_interop_common.hpp | 1 - 14 files changed, 518 insertions(+), 267 deletions(-) diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp index 4e92e709..5d0e99e0 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp @@ -19,7 +19,7 @@ #include "harness/errorHelpers.h" #include "harness/deviceInfo.h" #include -#include +#include #include #define ASSERT(x) assert((x)) @@ -887,3 +887,67 @@ cl_semaphore_khr &clExternalSemaphore::getCLSemaphore() { return m_externalSemaphore; } + +cl_external_memory_handle_type_khr vkToOpenCLExternalMemoryHandleType( + VulkanExternalMemoryHandleType vkExternalMemoryHandleType) +{ + switch (vkExternalMemoryHandleType) + { + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: + return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: + return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE: return 0; + } + return 0; +} + +VulkanImageTiling vkClExternalMemoryHandleTilingAssumption( + cl_device_id deviceId, + VulkanExternalMemoryHandleType vkExternalMemoryHandleType, int *error_ret) +{ + size_t size = 0; + VulkanImageTiling mode = VULKAN_IMAGE_TILING_OPTIMAL; + + assert(error_ret + != nullptr); // errcode_ret is not optional, it must be checked + + *error_ret = clGetDeviceInfo( + deviceId, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR, + 0, nullptr, &size); + if (*error_ret != CL_SUCCESS) + { + return mode; + } + + if (size == 0) + { + return mode; + } + + std::vector assume_linear_types( + size / sizeof(cl_external_memory_handle_type_khr)); + + *error_ret = clGetDeviceInfo( + deviceId, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR, + size, assume_linear_types.data(), nullptr); + if (*error_ret != CL_SUCCESS) + { + return mode; + } + + if (std::find( + assume_linear_types.begin(), assume_linear_types.end(), + vkToOpenCLExternalMemoryHandleType(vkExternalMemoryHandleType)) + != assume_linear_types.end()) + { + mode = VULKAN_IMAGE_TILING_LINEAR; + } + + return mode; +} diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp index 5143332d..4a1d453e 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp @@ -129,4 +129,8 @@ public: extern void init_cl_vk_ext(cl_platform_id); +VulkanImageTiling vkClExternalMemoryHandleTilingAssumption( + cl_device_id deviceId, + VulkanExternalMemoryHandleType vkExternalMemoryHandleType, int *error_ret); + #endif // _opencl_vulkan_wrapper_hpp_ diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp index c62a71e1..e9c06f98 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp @@ -75,6 +75,7 @@ VK_FUNC_DECL(vkDestroyImageView) \ VK_FUNC_DECL(vkCreateImage) \ VK_FUNC_DECL(vkGetImageMemoryRequirements) \ + VK_FUNC_DECL(vkGetImageMemoryRequirements2) \ VK_FUNC_DECL(vkDestroyImage) \ VK_FUNC_DECL(vkDestroyBuffer) \ VK_FUNC_DECL(vkDestroyPipeline) \ @@ -87,8 +88,9 @@ VK_FUNC_DECL(vkDestroyDescriptorSetLayout) \ VK_FUNC_DECL(vkGetPhysicalDeviceQueueFamilyProperties) \ VK_FUNC_DECL(vkGetPhysicalDeviceFeatures) \ - VK_FUNC_DECL(vkGetPhysicalDeviceProperties2KHR) \ + VK_FUNC_DECL(vkGetPhysicalDeviceProperties2) \ VK_FUNC_DECL(vkGetBufferMemoryRequirements) \ + VK_FUNC_DECL(vkGetBufferMemoryRequirements2) \ VK_FUNC_DECL(vkGetMemoryFdKHR) \ VK_FUNC_DECL(vkGetSemaphoreFdKHR) \ VK_FUNC_DECL(vkEnumeratePhysicalDeviceGroups) \ @@ -160,6 +162,7 @@ #define vkDestroyImageView _vkDestroyImageView #define vkCreateImage _vkCreateImage #define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements +#define vkGetImageMemoryRequirements2 _vkGetImageMemoryRequirements2 #define vkDestroyImage _vkDestroyImage #define vkDestroyBuffer _vkDestroyBuffer #define vkDestroyPipeline _vkDestroyPipeline @@ -173,8 +176,9 @@ #define vkGetPhysicalDeviceQueueFamilyProperties \ _vkGetPhysicalDeviceQueueFamilyProperties #define vkGetPhysicalDeviceFeatures _vkGetPhysicalDeviceFeatures -#define vkGetPhysicalDeviceProperties2KHR _vkGetPhysicalDeviceProperties2KHR +#define vkGetPhysicalDeviceProperties2 _vkGetPhysicalDeviceProperties2 #define vkGetBufferMemoryRequirements _vkGetBufferMemoryRequirements +#define vkGetBufferMemoryRequirements2 _vkGetBufferMemoryRequirements2 #define vkGetMemoryFdKHR _vkGetMemoryFdKHR #define vkGetSemaphoreFdKHR _vkGetSemaphoreFdKHR #define vkEnumeratePhysicalDeviceGroups _vkEnumeratePhysicalDeviceGroups diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp index 4e276519..a5ca0901 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp @@ -141,6 +141,16 @@ VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList() {} +void VulkanDescriptorSetLayoutBindingList::addBinding( + size_t binding, VulkanDescriptorType descriptorType, + uint32_t descriptorCount, VulkanShaderStage shaderStage) +{ + VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding = + new VulkanDescriptorSetLayoutBinding(binding, descriptorType, + descriptorCount, shaderStage); + add(*descriptorSetLayoutBinding); +} + VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( size_t numDescriptorSetLayoutBindings, VulkanDescriptorType descriptorType, uint32_t descriptorCount, VulkanShaderStage shaderStage) @@ -268,6 +278,7 @@ VulkanImage2DList::VulkanImage2DList( size_t numImages, std::vector &deviceMemory, uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device, VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels, + VulkanImageTiling vulkanImageTiling, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) @@ -275,8 +286,8 @@ VulkanImage2DList::VulkanImage2DList( for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++) { VulkanImage2D *image2D = new VulkanImage2D( - device, format, width, height, mipLevels, externalMemoryHandleType, - imageCreateFlag, imageUsage, sharingMode); + device, format, width, height, vulkanImageTiling, mipLevels, + externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode); add(*image2D); deviceMemory[i2DIdx]->bindImage( *image2D, baseOffset + (i2DIdx * interImageOffset)); @@ -285,16 +296,16 @@ VulkanImage2DList::VulkanImage2DList( VulkanImage2DList::VulkanImage2DList( size_t numImages, const VulkanDevice &device, VulkanFormat format, - uint32_t width, uint32_t height, uint32_t mipLevels, - VulkanExternalMemoryHandleType externalMemoryHandleType, + uint32_t width, uint32_t height, VulkanImageTiling vulkanImageTiling, + uint32_t mipLevels, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) { for (size_t bIdx = 0; bIdx < numImages; bIdx++) { VulkanImage2D *image2D = new VulkanImage2D( - device, format, width, height, mipLevels, externalMemoryHandleType, - imageCreateFlag, imageUsage, sharingMode); + device, format, width, height, vulkanImageTiling, mipLevels, + externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode); add(*image2D); } } diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp index 52206779..ef00b70a 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp @@ -154,6 +154,10 @@ public: VulkanDescriptorType descriptorType0, uint32_t descriptorCount0, VulkanDescriptorType descriptorType1, uint32_t descriptorCount1, VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); + void + addBinding(size_t binding, VulkanDescriptorType descriptorType, + uint32_t descriptorCount, + VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); virtual ~VulkanDescriptorSetLayoutBindingList(); }; @@ -208,6 +212,7 @@ public: uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device, VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels, + VulkanImageTiling vulkanImageTiling, VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, @@ -216,7 +221,8 @@ public: VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); VulkanImage2DList( size_t numImages, const VulkanDevice &device, VulkanFormat format, - uint32_t width, uint32_t height, uint32_t mipLevels = 1, + uint32_t width, uint32_t height, VulkanImageTiling vulkanImageTiling, + uint32_t mipLevels = 1, VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp index 96c5adbc..2124a275 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #if defined(_WIN32) || defined(_WIN64) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp index 98913257..51284125 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp @@ -33,7 +33,8 @@ const VulkanInstance& getVulkanInstance(); const VulkanPhysicalDevice& getVulkanPhysicalDevice(); const VulkanQueueFamily& -getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_MASK_ALL); +getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_GRAPHICS + | VULKAN_QUEUE_FLAG_COMPUTE); const VulkanMemoryType& getVulkanMemoryType(const VulkanDevice& device, VulkanMemoryTypeProperty memoryTypeProperty); diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp index 4d803be4..73c5e9a1 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp @@ -74,7 +74,7 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE) const char *vulkanLoaderLibraryName = "vulkan-1.dll"; #elif defined(__ANDROID__) const char *vulkanLoaderLibraryName = "libvulkan.so"; -#elif defined(__linux__) +#else const char *vulkanLoaderLibraryName = "libvulkan.so.1"; #endif #ifdef _WIN32 @@ -276,13 +276,13 @@ VulkanPhysicalDevice::VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice) VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR; vkPhysicalDeviceIDPropertiesKHR.pNext = NULL; - VkPhysicalDeviceProperties2KHR vkPhysicalDeviceProperties2KHR = {}; - vkPhysicalDeviceProperties2KHR.sType = + VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {}; + vkPhysicalDeviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - vkPhysicalDeviceProperties2KHR.pNext = &vkPhysicalDeviceIDPropertiesKHR; + vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDPropertiesKHR; - vkGetPhysicalDeviceProperties2KHR(m_vkPhysicalDevice, - &vkPhysicalDeviceProperties2KHR); + vkGetPhysicalDeviceProperties2(m_vkPhysicalDevice, + &vkPhysicalDeviceProperties2); memcpy(m_vkDeviceUUID, vkPhysicalDeviceIDPropertiesKHR.deviceUUID, sizeof(m_vkDeviceUUID)); @@ -1013,12 +1013,14 @@ void VulkanDescriptorPool::VulkanDescriptorPoolCommon( == vkDescriptorTypeToDescriptorCountMap.end()) { vkDescriptorTypeToDescriptorCountMap - [vkDescriptorSetLayoutBinding.descriptorType] = 1; + [vkDescriptorSetLayoutBinding.descriptorType] = + vkDescriptorSetLayoutBinding.descriptorCount; } else { vkDescriptorTypeToDescriptorCountMap - [vkDescriptorSetLayoutBinding.descriptorType]++; + [vkDescriptorSetLayoutBinding.descriptorType] += + vkDescriptorSetLayoutBinding.descriptorCount; } } @@ -1159,6 +1161,35 @@ void VulkanDescriptorSet::update(uint32_t binding, const VulkanBuffer &buffer) vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); } +void VulkanDescriptorSet::updateArray(uint32_t binding, unsigned numBuffers, + const VulkanBufferList &buffers) +{ + VkDescriptorBufferInfo *vkDescriptorBufferInfo = + (VkDescriptorBufferInfo *)calloc(numBuffers, + sizeof(VkDescriptorBufferInfo)); + for (unsigned i = 0; i < numBuffers; i++) + { + vkDescriptorBufferInfo[i].buffer = buffers[i]; + vkDescriptorBufferInfo[i].offset = 0; + vkDescriptorBufferInfo[i].range = VK_WHOLE_SIZE; + } + + VkWriteDescriptorSet vkWriteDescriptorSet = {}; + vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vkWriteDescriptorSet.pNext = NULL; + vkWriteDescriptorSet.dstSet = m_vkDescriptorSet; + vkWriteDescriptorSet.dstBinding = binding; + vkWriteDescriptorSet.dstArrayElement = 0; + vkWriteDescriptorSet.descriptorCount = numBuffers; + vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + vkWriteDescriptorSet.pImageInfo = NULL; + vkWriteDescriptorSet.pBufferInfo = vkDescriptorBufferInfo; + vkWriteDescriptorSet.pTexelBufferView = NULL; + + vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); + free(vkDescriptorBufferInfo); +} + void VulkanDescriptorSet::update(uint32_t binding, const VulkanImageView &imageView) { @@ -1182,6 +1213,34 @@ void VulkanDescriptorSet::update(uint32_t binding, vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); } +void VulkanDescriptorSet::updateArray(uint32_t binding, + const VulkanImageViewList &imageViewList) +{ + VkDescriptorImageInfo *vkDescriptorImageInfo = + new VkDescriptorImageInfo[imageViewList.size()]; + for (size_t i = 0; i < imageViewList.size(); i++) + { + vkDescriptorImageInfo[i].sampler = VK_NULL_HANDLE; + vkDescriptorImageInfo[i].imageView = imageViewList[i]; + vkDescriptorImageInfo[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + VkWriteDescriptorSet vkWriteDescriptorSet = {}; + vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vkWriteDescriptorSet.pNext = NULL; + vkWriteDescriptorSet.dstSet = m_vkDescriptorSet; + vkWriteDescriptorSet.dstBinding = binding; + vkWriteDescriptorSet.dstArrayElement = 0; + vkWriteDescriptorSet.descriptorCount = imageViewList.size(); + vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + vkWriteDescriptorSet.pImageInfo = vkDescriptorImageInfo; + vkWriteDescriptorSet.pBufferInfo = NULL; + vkWriteDescriptorSet.pTexelBufferView = NULL; + + vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); + delete[] vkDescriptorImageInfo; +} + VulkanDescriptorSet::operator VkDescriptorSet() const { return m_vkDescriptorSet; @@ -1505,12 +1564,14 @@ VulkanBuffer::VulkanBuffer(const VulkanBuffer &buffer) m_memoryTypeList(buffer.m_memoryTypeList) {} +bool VulkanBuffer::isDedicated() const { return m_dedicated; } + VulkanBuffer::VulkanBuffer( const VulkanDevice &device, uint64_t size, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode, const VulkanQueueFamilyList &queueFamilyList) - : m_device(device), m_vkBuffer(VK_NULL_HANDLE) + : m_device(device), m_vkBuffer(VK_NULL_HANDLE), m_dedicated(false) { std::vector queueFamilyIndexList; if (queueFamilyList.size() == 0) @@ -1556,16 +1617,36 @@ VulkanBuffer::VulkanBuffer( vkCreateBuffer(m_device, &vkBufferCreateInfo, NULL, &m_vkBuffer); - VkMemoryRequirements vkMemoryRequirements = {}; - vkGetBufferMemoryRequirements(m_device, m_vkBuffer, &vkMemoryRequirements); - m_size = vkMemoryRequirements.size; - m_alignment = vkMemoryRequirements.alignment; + VkMemoryDedicatedRequirements vkMemoryDedicatedRequirements = {}; + vkMemoryDedicatedRequirements.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + vkMemoryDedicatedRequirements.pNext = NULL; + + VkMemoryRequirements2 vkMemoryRequirements = {}; + vkMemoryRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + vkMemoryRequirements.pNext = &vkMemoryDedicatedRequirements; + + VkBufferMemoryRequirementsInfo2 vkMemoryRequirementsInfo = {}; + + vkMemoryRequirementsInfo.sType = + VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2; + vkMemoryRequirementsInfo.buffer = m_vkBuffer; + vkMemoryRequirementsInfo.pNext = NULL; + + vkGetBufferMemoryRequirements2(m_device, &vkMemoryRequirementsInfo, + &vkMemoryRequirements); + + m_dedicated = vkMemoryDedicatedRequirements.requiresDedicatedAllocation; + + m_size = vkMemoryRequirements.memoryRequirements.size; + m_alignment = vkMemoryRequirements.memoryRequirements.alignment; const VulkanMemoryTypeList &memoryTypeList = m_device.getPhysicalDevice().getMemoryTypeList(); for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) { uint32_t memoryTypeIndex = memoryTypeList[mtIdx]; - if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits) + if ((1 << memoryTypeIndex) + & vkMemoryRequirements.memoryRequirements.memoryTypeBits) { m_memoryTypeList.add(memoryTypeList[mtIdx]); } @@ -1640,16 +1721,36 @@ VulkanImage::VulkanImage( vkCreateImage(m_device, &vkImageCreateInfo, NULL, &m_vkImage); VulkanImageCreateInfo = vkImageCreateInfo; - VkMemoryRequirements vkMemoryRequirements = {}; - vkGetImageMemoryRequirements(m_device, m_vkImage, &vkMemoryRequirements); - m_size = vkMemoryRequirements.size; - m_alignment = vkMemoryRequirements.alignment; + + VkMemoryDedicatedRequirements vkMemoryDedicatedRequirements = {}; + vkMemoryDedicatedRequirements.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + vkMemoryDedicatedRequirements.pNext = NULL; + + VkMemoryRequirements2 vkMemoryRequirements = {}; + vkMemoryRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + vkMemoryRequirements.pNext = &vkMemoryDedicatedRequirements; + + VkImageMemoryRequirementsInfo2 vkMemoryRequirementsInfo = {}; + + vkMemoryRequirementsInfo.sType = + VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2; + vkMemoryRequirementsInfo.image = m_vkImage; + vkMemoryRequirementsInfo.pNext = NULL; + + vkGetImageMemoryRequirements2(m_device, &vkMemoryRequirementsInfo, + &vkMemoryRequirements); + m_size = vkMemoryRequirements.memoryRequirements.size; + m_alignment = vkMemoryRequirements.memoryRequirements.alignment; + m_dedicated = vkMemoryDedicatedRequirements.requiresDedicatedAllocation; + const VulkanMemoryTypeList &memoryTypeList = m_device.getPhysicalDevice().getMemoryTypeList(); for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) { uint32_t memoryTypeIndex = memoryTypeList[mtIdx]; - if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits) + if ((1 << memoryTypeIndex) + & vkMemoryRequirements.memoryRequirements.memoryTypeBits) { m_memoryTypeList.add(memoryTypeList[mtIdx]); } @@ -1678,6 +1779,8 @@ uint64_t VulkanImage::getSize() const { return m_size; } uint64_t VulkanImage::getAlignment() const { return m_alignment; } +bool VulkanImage::isDedicated() const { return m_dedicated; } + const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const { return m_memoryTypeList; @@ -1694,14 +1797,14 @@ VulkanImage2D::VulkanImage2D(const VulkanImage2D &image2D): VulkanImage(image2D) VulkanImage2D::VulkanImage2D( const VulkanDevice &device, VulkanFormat format, uint32_t width, - uint32_t height, uint32_t numMipLevels, + uint32_t height, VulkanImageTiling imageTiling, uint32_t numMipLevels, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) : VulkanImage(device, VULKAN_IMAGE_TYPE_2D, format, VulkanExtent3D(width, height, 1), numMipLevels, 1, - externalMemoryHandleType, imageCreateFlag, - VULKAN_IMAGE_TILING_OPTIMAL, imageUsage, sharingMode) + externalMemoryHandleType, imageCreateFlag, imageTiling, + imageUsage, sharingMode) {} VulkanImage2D::~VulkanImage2D() {} @@ -1888,7 +1991,8 @@ VulkanDeviceMemory::VulkanDeviceMemory( const VulkanDevice &device, const VulkanImage &image, const VulkanMemoryType &memoryType, VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name) - : m_device(device), m_size(image.getSize()), m_isDedicated(true) + : m_device(device), m_size(image.getSize()), + m_isDedicated(image.isDedicated()) { #if defined(_WIN32) || defined(_WIN64) WindowsSecurityAttributes winSecurityAttributes; @@ -1921,20 +2025,95 @@ VulkanDeviceMemory::VulkanDeviceMemory( VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {}; vkMemoryDedicatedAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; - vkMemoryDedicatedAllocateInfo.pNext = - externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + vkMemoryDedicatedAllocateInfo.pNext = NULL; vkMemoryDedicatedAllocateInfo.image = image; vkMemoryDedicatedAllocateInfo.buffer = VK_NULL_HANDLE; VkMemoryAllocateInfo vkMemoryAllocateInfo = {}; vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; vkMemoryAllocateInfo.allocationSize = m_size; vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType; + if (m_isDedicated) + { + vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; + vkMemoryDedicatedAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + else + { + vkMemoryAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory); } +VulkanDeviceMemory::VulkanDeviceMemory( + const VulkanDevice &device, const VulkanBuffer &buffer, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name) + : m_device(device), m_size(buffer.getSize()), + m_isDedicated(buffer.isDedicated()) +{ +#if defined(_WIN32) || defined(_WIN64) + WindowsSecurityAttributes winSecurityAttributes; + + VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {}; + vkExportMemoryWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR; + vkExportMemoryWin32HandleInfoKHR.pNext = NULL; + vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; + vkExportMemoryWin32HandleInfoKHR.dwAccess = + DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; + vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name; + +#endif + + VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {}; + vkExportMemoryAllocateInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; +#if defined(_WIN32) || defined(_WIN64) + vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType + & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT + ? &vkExportMemoryWin32HandleInfoKHR + : NULL; +#else + vkExportMemoryAllocateInfoKHR.pNext = NULL; +#endif + vkExportMemoryAllocateInfoKHR.handleTypes = + (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType; + + VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {}; + vkMemoryDedicatedAllocateInfo.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + vkMemoryDedicatedAllocateInfo.pNext = NULL; + vkMemoryDedicatedAllocateInfo.image = VK_NULL_HANDLE; + vkMemoryDedicatedAllocateInfo.buffer = buffer; + + VkMemoryAllocateInfo vkMemoryAllocateInfo = {}; + vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + vkMemoryAllocateInfo.allocationSize = m_size; + vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType; + + if (m_isDedicated) + { + vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; + vkMemoryDedicatedAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + else + { + vkMemoryAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + + + VkResult res = vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, + &m_vkDeviceMemory); + ASSERT_SUCCESS(res, "Failed to allocate device memory"); +} + VulkanDeviceMemory::~VulkanDeviceMemory() { vkFreeMemory(m_device, m_vkDeviceMemory, NULL); @@ -2001,11 +2180,21 @@ void VulkanDeviceMemory::unmap() { vkUnmapMemory(m_device, m_vkDeviceMemory); } void VulkanDeviceMemory::bindBuffer(const VulkanBuffer &buffer, uint64_t offset) { + if (buffer.isDedicated() && !m_isDedicated) + { + throw std::runtime_error( + "Buffer requires dedicated memory. Failed to bind"); + } vkBindBufferMemory(m_device, buffer, m_vkDeviceMemory, offset); } void VulkanDeviceMemory::bindImage(const VulkanImage &image, uint64_t offset) { + if (image.isDedicated() && !m_isDedicated) + { + throw std::runtime_error( + "Image requires dedicated memory. Failed to bind"); + } vkBindImageMemory(m_device, image, m_vkDeviceMemory, offset); } diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp index af478219..7fcc70f3 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp @@ -328,7 +328,11 @@ public: const VulkanDescriptorSetLayout &descriptorSetLayout); virtual ~VulkanDescriptorSet(); void update(uint32_t binding, const VulkanBuffer &buffer); + void updateArray(uint32_t binding, unsigned numBuffers, + const VulkanBufferList &buffers); void update(uint32_t binding, const VulkanImageView &imageView); + void updateArray(uint32_t binding, + const VulkanImageViewList &imageViewList); operator VkDescriptorSet() const; }; @@ -424,6 +428,7 @@ protected: VkBuffer m_vkBuffer; uint64_t m_size; uint64_t m_alignment; + bool m_dedicated; VulkanMemoryTypeList m_memoryTypeList; VulkanBuffer(const VulkanBuffer &buffer); @@ -441,6 +446,7 @@ public: uint64_t getSize() const; uint64_t getAlignment() const; const VulkanMemoryTypeList &getMemoryTypeList() const; + bool isDedicated() const; operator VkBuffer() const; }; @@ -452,6 +458,7 @@ protected: const VulkanFormat m_format; const uint32_t m_numMipLevels; const uint32_t m_numLayers; + bool m_dedicated; VkImage m_vkImage; uint64_t m_size; uint64_t m_alignment; @@ -478,6 +485,7 @@ public: uint32_t getNumLayers() const; uint64_t getSize() const; uint64_t getAlignment() const; + bool isDedicated() const; const VulkanMemoryTypeList &getMemoryTypeList() const; VkImageCreateInfo getVkImageCreateInfo() const; operator VkImage() const; @@ -487,12 +495,11 @@ class VulkanImage2D : public VulkanImage { protected: VkImageView m_vkImageView; - VulkanImage2D(const VulkanImage2D &image2D); - public: VulkanImage2D( const VulkanDevice &device, VulkanFormat format, uint32_t width, - uint32_t height, uint32_t numMipLevels = 1, + uint32_t height, VulkanImageTiling imageTiling, + uint32_t numMipLevels = 1, VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, @@ -501,6 +508,8 @@ public: VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); virtual ~VulkanImage2D(); virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const; + + VulkanImage2D(const VulkanImage2D &image2D); }; class VulkanImageView { @@ -541,6 +550,11 @@ public: VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, const void *name = NULL); + VulkanDeviceMemory(const VulkanDevice &device, const VulkanBuffer &buffer, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + const void *name = NULL); virtual ~VulkanDeviceMemory(); uint64_t getSize() const; #ifdef _WIN32 diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index 3d7b30e7..eb1afeb0 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -185,7 +185,6 @@ bool useSingleImageKernel = false; bool useDeviceLocal = false; bool disableNTHandleType = false; bool enableOffset = false; -bool non_dedicated = false; static void printUsage(const char *execName) { @@ -232,10 +231,6 @@ size_t parseParams(int argc, const char *argv[], const char **argList) { enableOffset = true; } - if (!strcmp(argv[i], "--non_dedicated")) - { - non_dedicated = true; - } if (strcmp(argv[i], "-h") == 0) { printUsage(argv[0]); diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index f22ac319..d12b3bfe 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -81,10 +81,11 @@ int test_consistency_external_buffer(cl_device_id deviceID, cl_context _context, const VulkanMemoryTypeList& memoryTypeList = vkDummyBuffer.getMemoryTypeList(); - VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( - vkDevice, bufferSize, memoryTypeList[0], vkExternalMemoryHandleType); VulkanBufferList vkBufferList(1, vkDevice, bufferSize, vkExternalMemoryHandleType); + VulkanDeviceMemory* vkDeviceMem = + new VulkanDeviceMemory(vkDevice, vkBufferList[0], memoryTypeList[0], + vkExternalMemoryHandleType); vkDeviceMem->bindBuffer(vkBufferList[0], 0); @@ -231,22 +232,27 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context, VulkanExternalMemoryHandleType vkExternalMemoryHandleType = getSupportedVulkanExternalMemoryHandleTypeList()[0]; - VulkanImage2D* vkImage2D = - new VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height, - 1, vkExternalMemoryHandleType); - const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList(); - uint64_t totalImageMemSize = vkImage2D->getSize(); + VulkanImageTiling vulkanImageTiling = + vkClExternalMemoryHandleTilingAssumption( + deviceID, vkExternalMemoryHandleType, &errNum); + ASSERT_SUCCESS(errNum, "Failed to query OpenCL tiling mode"); + + VulkanImage2D vkImage2D = + VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height, + vulkanImageTiling, 1, vkExternalMemoryHandleType); + + const VulkanMemoryTypeList& memoryTypeList = vkImage2D.getMemoryTypeList(); + uint64_t totalImageMemSize = vkImage2D.getSize(); log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); log_info("Image size : %d\n", totalImageMemSize); - VulkanDeviceMemory* vkDeviceMem = - new VulkanDeviceMemory(vkDevice, totalImageMemSize, memoryTypeList[0], - vkExternalMemoryHandleType); - vkDeviceMem->bindImage(*vkImage2D, 0); + VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( + vkDevice, vkImage2D, memoryTypeList[0], vkExternalMemoryHandleType); + vkDeviceMem->bindImage(vkImage2D, 0); void* handle = NULL; int fd; @@ -299,7 +305,7 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context, extMemProperties.push_back(0); const VkImageCreateInfo VulkanImageCreateInfo = - vkImage2D->getVkImageCreateInfo(); + vkImage2D.getVkImageCreateInfo(); errNum = getCLImageInfoFromVkImageInfo( &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc); diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 5390ef69..559625d7 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -126,8 +126,11 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, std::vector vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -189,9 +192,9 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) { - vkBufferListDeviceMemory.push_back( - new VulkanDeviceMemory(vkDevice, bufferSize, memoryType, - vkExternalMemoryHandleType)); + vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory( + vkDevice, vkBufferList[bIdx], memoryType, + vkExternalMemoryHandleType)); externalMemory.push_back(new clExternalMemory( vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType, 0, bufferSize, context, deviceId)); @@ -210,8 +213,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); - vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]); } + vkDescriptorSet.updateArray(1, numBuffers, vkBufferList); vkCommandBuffer.begin(); vkCommandBuffer.bindPipeline(vkComputePipeline); vkCommandBuffer.bindDescriptorSets( @@ -453,8 +456,11 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, std::vector vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -517,9 +523,9 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) { - vkBufferListDeviceMemory.push_back( - new VulkanDeviceMemory(vkDevice, bufferSize, memoryType, - vkExternalMemoryHandleType)); + vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory( + vkDevice, vkBufferList[bIdx], memoryType, + vkExternalMemoryHandleType)); externalMemory.push_back(new clExternalMemory( vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType, 0, bufferSize, context, deviceId)); @@ -538,8 +544,9 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); - vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]); } + vkDescriptorSet.updateArray(1, vkBufferList.size(), vkBufferList); + vkCommandBuffer.begin(); vkCommandBuffer.bindPipeline(vkComputePipeline); vkCommandBuffer.bindDescriptorSets( @@ -754,8 +761,11 @@ int run_test_with_multi_import_same_ctx( std::vector vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -836,7 +846,7 @@ int run_test_with_multi_import_same_ctx( if (withOffset == 0) { vkBufferListDeviceMemory.push_back( - new VulkanDeviceMemory(vkDevice, pBufferSize, + new VulkanDeviceMemory(vkDevice, vkBufferList[bIdx], memoryType, vkExternalMemoryHandleType)); } @@ -880,9 +890,8 @@ int run_test_with_multi_import_same_ctx( externalMemory[bIdx][cl_bIdx] ->getExternalMemoryBuffer(); } - vkDescriptorSet.update((uint32_t)bIdx + 1, - vkBufferList[bIdx]); } + vkDescriptorSet.updateArray(1, numBuffers, vkBufferList); vkCommandBuffer.begin(); vkCommandBuffer.bindPipeline(vkComputePipeline); vkCommandBuffer.bindDescriptorSets( diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index 47a31665..5f1f6e4b 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -226,9 +226,11 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, srcBufferPtr = (char *)malloc(maxImage2DSize); dstBufferPtr = (char *)malloc(maxImage2DSize); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, - VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -255,10 +257,10 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, clCl2VkExternalSemaphore = new clExternalSemaphore( vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - std::vector vkNonDedicatedImage2DListDeviceMemory1; - std::vector vkNonDedicatedImage2DListDeviceMemory2; - std::vector nonDedicatedExternalMemory1; - std::vector nonDedicatedExternalMemory2; + std::vector vkImage2DListDeviceMemory1; + std::vector vkImage2DListDeviceMemory2; + std::vector externalMemory1; + std::vector externalMemory2; std::vector vkImage2DShader; for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) @@ -352,8 +354,6 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanExternalMemoryHandleType vkExternalMemoryHandleType = vkExternalMemoryHandleTypeList[emhtIdx]; - log_info("External memory handle type: %d \n", - vkExternalMemoryHandleType); if ((true == disableNTHandleType) && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT == vkExternalMemoryHandleType)) @@ -361,9 +361,19 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, // Skip running for WIN32 NT handle. continue; } + log_info("External memory handle type: %d \n", + vkExternalMemoryHandleType); + VulkanImageTiling vulkanImageTiling = + vkClExternalMemoryHandleTilingAssumption( + deviceId, + vkExternalMemoryHandleTypeList[emhtIdx], &err); + ASSERT_SUCCESS(err, + "Failed to query OpenCL tiling mode"); + VulkanImage2D vkDummyImage2D( vkDevice, vkFormatList[0], widthList[0], - heightList[0], 1, vkExternalMemoryHandleType); + heightList[0], vulkanImageTiling, 1, + vkExternalMemoryHandleType); const VulkanMemoryTypeList &memoryTypeList = vkDummyImage2D.getMemoryTypeList(); @@ -390,118 +400,73 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize); totalImageMemSize = ROUND_UP(vkImage2D.getSize(), vkImage2D.getAlignment()); } - VulkanImage2DList vkNonDedicatedImage2DList( + VulkanImage2DList vkImage2DList( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); for (size_t bIdx = 0; bIdx < num2DImages; bIdx++) { - if (non_dedicated) - { - vkNonDedicatedImage2DListDeviceMemory1 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, - memoryType, - vkExternalMemoryHandleType)); - } - else - { - vkNonDedicatedImage2DListDeviceMemory1 - .push_back(new VulkanDeviceMemory( - vkDevice, - vkNonDedicatedImage2DList[bIdx], - memoryType, - vkExternalMemoryHandleType)); - } - vkNonDedicatedImage2DListDeviceMemory1[bIdx] - ->bindImage(vkNonDedicatedImage2DList[bIdx], - 0); - nonDedicatedExternalMemory1.push_back( + vkImage2DListDeviceMemory1.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList[bIdx], + memoryType, + vkExternalMemoryHandleType)); + vkImage2DListDeviceMemory1[bIdx]->bindImage( + vkImage2DList[bIdx], 0); + externalMemory1.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory1 - [bIdx], + *vkImage2DListDeviceMemory1[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList[bIdx], - deviceId)); + vkImage2DList[bIdx], deviceId)); } - VulkanImageViewList vkNonDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList); - VulkanImage2DList vkNonDedicatedImage2DList2( + VulkanImageViewList vkImage2DViewList( + vkDevice, vkImage2DList); + VulkanImage2DList vkImage2DList2( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); for (size_t bIdx = 0; bIdx < num2DImages; bIdx++) { - if (non_dedicated) - { - vkNonDedicatedImage2DListDeviceMemory2 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, - memoryType, - vkExternalMemoryHandleType)); - } - else - { - vkNonDedicatedImage2DListDeviceMemory2 - .push_back(new VulkanDeviceMemory( - vkDevice, - vkNonDedicatedImage2DList2[bIdx], - memoryType, - vkExternalMemoryHandleType)); - } - vkNonDedicatedImage2DListDeviceMemory2[bIdx] - ->bindImage( - vkNonDedicatedImage2DList2[bIdx], 0); - nonDedicatedExternalMemory2.push_back( + vkImage2DListDeviceMemory2.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList2[bIdx], + memoryType, + vkExternalMemoryHandleType)); + vkImage2DListDeviceMemory2[bIdx]->bindImage( + vkImage2DList2[bIdx], 0); + externalMemory2.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory2 - [bIdx], + *vkImage2DListDeviceMemory2[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList2[bIdx], - deviceId)); + vkImage2DList2[bIdx], deviceId)); } - VulkanImageViewList vkDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList2); cl_mem external_mem_image1[5]; cl_mem external_mem_image2[5]; for (int i = 0; i < num2DImages; i++) { external_mem_image1[i] = - nonDedicatedExternalMemory1[i] + externalMemory1[i] ->getExternalMemoryImage(); external_mem_image2[i] = - nonDedicatedExternalMemory2[i] + externalMemory2[i] ->getExternalMemoryImage(); } - VulkanImage2DList &vkImage2DList = - vkNonDedicatedImage2DList; - VulkanImageViewList &vkImage2DViewList = - vkNonDedicatedImage2DViewList; clCl2VkExternalSemaphore->signal(cmd_queue1); if (!useSingleImageKernel) { - for (size_t i2DIdx = 0; - i2DIdx < vkImage2DList.size(); i2DIdx++) - { - for (uint32_t mipLevel = 0; - mipLevel < numMipLevels; mipLevel++) - { - uint32_t i2DvIdx = - (uint32_t)(i2DIdx * numMipLevels) - + mipLevel; - vkDescriptorSet.update( - 1 + i2DvIdx, - vkImage2DViewList[i2DvIdx]); - } - } + vkDescriptorSet.updateArray(1, + vkImage2DViewList); vkCopyCommandBuffer.begin(); vkCopyCommandBuffer.pipelineBarrier( vkImage2DList, @@ -743,29 +708,25 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, } for (int i = 0; i < num2DImages; i++) { - delete vkNonDedicatedImage2DListDeviceMemory1 - [i]; - delete vkNonDedicatedImage2DListDeviceMemory2 - [i]; - delete nonDedicatedExternalMemory1[i]; - delete nonDedicatedExternalMemory2[i]; + delete vkImage2DListDeviceMemory1[i]; + delete vkImage2DListDeviceMemory2[i]; + delete externalMemory1[i]; + delete externalMemory2[i]; } - vkNonDedicatedImage2DListDeviceMemory1.erase( - vkNonDedicatedImage2DListDeviceMemory1.begin(), - vkNonDedicatedImage2DListDeviceMemory1.begin() - + num2DImages); - vkNonDedicatedImage2DListDeviceMemory2.erase( - vkNonDedicatedImage2DListDeviceMemory2.begin(), - vkNonDedicatedImage2DListDeviceMemory2.begin() + vkImage2DListDeviceMemory1.erase( + vkImage2DListDeviceMemory1.begin(), + vkImage2DListDeviceMemory1.begin() + num2DImages); - nonDedicatedExternalMemory1.erase( - nonDedicatedExternalMemory1.begin(), - nonDedicatedExternalMemory1.begin() - + num2DImages); - nonDedicatedExternalMemory2.erase( - nonDedicatedExternalMemory2.begin(), - nonDedicatedExternalMemory2.begin() + vkImage2DListDeviceMemory2.erase( + vkImage2DListDeviceMemory2.begin(), + vkImage2DListDeviceMemory2.begin() + num2DImages); + externalMemory1.erase(externalMemory1.begin(), + externalMemory1.begin() + + num2DImages); + externalMemory2.erase(externalMemory2.begin(), + externalMemory2.begin() + + num2DImages); if (CL_SUCCESS != err) { goto CLEANUP; @@ -822,9 +783,11 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, srcBufferPtr = (char *)malloc(maxImage2DSize); dstBufferPtr = (char *)malloc(maxImage2DSize); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, - VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -851,10 +814,10 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, clCl2VkExternalSemaphore = new clExternalSemaphore( vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - std::vector vkNonDedicatedImage2DListDeviceMemory1; - std::vector vkNonDedicatedImage2DListDeviceMemory2; - std::vector nonDedicatedExternalMemory1; - std::vector nonDedicatedExternalMemory2; + std::vector vkImage2DListDeviceMemory1; + std::vector vkImage2DListDeviceMemory2; + std::vector externalMemory1; + std::vector externalMemory2; std::vector vkImage2DShader; for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) @@ -957,9 +920,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, // Skip running for WIN32 NT handle. continue; } + + VulkanImageTiling vulkanImageTiling = + vkClExternalMemoryHandleTilingAssumption( + deviceId, + vkExternalMemoryHandleTypeList[emhtIdx], &err); + ASSERT_SUCCESS(err, + "Failed to query OpenCL tiling mode"); + VulkanImage2D vkDummyImage2D( vkDevice, vkFormatList[0], widthList[0], - heightList[0], 1, vkExternalMemoryHandleType); + heightList[0], vulkanImageTiling, 1, + vkExternalMemoryHandleType); const VulkanMemoryTypeList &memoryTypeList = vkDummyImage2D.getMemoryTypeList(); @@ -985,98 +957,78 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize); totalImageMemSize = ROUND_UP(vkImage2D.getSize(), vkImage2D.getAlignment()); } - VulkanImage2DList vkNonDedicatedImage2DList( + VulkanImage2DList vkImage2DList( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); - for (size_t bIdx = 0; - bIdx < vkNonDedicatedImage2DList.size(); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); + for (size_t bIdx = 0; bIdx < vkImage2DList.size(); bIdx++) { // Create list of Vulkan device memories and // bind the list of Vulkan images. - vkNonDedicatedImage2DListDeviceMemory1 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, memoryType, + vkImage2DListDeviceMemory1.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList[bIdx], + memoryType, vkExternalMemoryHandleType)); - vkNonDedicatedImage2DListDeviceMemory1[bIdx] - ->bindImage(vkNonDedicatedImage2DList[bIdx], - 0); - nonDedicatedExternalMemory1.push_back( + vkImage2DListDeviceMemory1[bIdx]->bindImage( + vkImage2DList[bIdx], 0); + externalMemory1.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory1 - [bIdx], + *vkImage2DListDeviceMemory1[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList[bIdx], - deviceId)); + vkImage2DList[bIdx], deviceId)); } - VulkanImageViewList vkNonDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList); + VulkanImageViewList vkImage2DViewList( + vkDevice, vkImage2DList); - VulkanImage2DList vkNonDedicatedImage2DList2( + VulkanImage2DList vkImage2DList2( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); - for (size_t bIdx = 0; - bIdx < vkNonDedicatedImage2DList2.size(); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); + for (size_t bIdx = 0; bIdx < vkImage2DList2.size(); bIdx++) { - vkNonDedicatedImage2DListDeviceMemory2 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, memoryType, + vkImage2DListDeviceMemory2.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList2[bIdx], + memoryType, vkExternalMemoryHandleType)); - vkNonDedicatedImage2DListDeviceMemory2[bIdx] - ->bindImage( - vkNonDedicatedImage2DList2[bIdx], 0); - nonDedicatedExternalMemory2.push_back( + vkImage2DListDeviceMemory2[bIdx]->bindImage( + vkImage2DList2[bIdx], 0); + externalMemory2.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory2 - [bIdx], + *vkImage2DListDeviceMemory2[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList2[bIdx], - deviceId)); + vkImage2DList2[bIdx], deviceId)); } - VulkanImageViewList vkDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList2); + cl_mem external_mem_image1[4]; cl_mem external_mem_image2[4]; for (int i = 0; i < num2DImages; i++) { external_mem_image1[i] = - nonDedicatedExternalMemory1[i] + externalMemory1[i] ->getExternalMemoryImage(); external_mem_image2[i] = - nonDedicatedExternalMemory2[i] + externalMemory2[i] ->getExternalMemoryImage(); } - VulkanImage2DList &vkImage2DList = - vkNonDedicatedImage2DList; - VulkanImageViewList &vkImage2DViewList = - vkNonDedicatedImage2DViewList; clCl2VkExternalSemaphore->signal(cmd_queue1); if (!useSingleImageKernel) { - for (size_t i2DIdx = 0; - i2DIdx < vkImage2DList.size(); i2DIdx++) - { - for (uint32_t mipLevel = 0; - mipLevel < numMipLevels; mipLevel++) - { - uint32_t i2DvIdx = - (uint32_t)(i2DIdx * numMipLevels) - + mipLevel; - vkDescriptorSet.update( - 1 + i2DvIdx, - vkImage2DViewList[i2DvIdx]); - } - } + vkDescriptorSet.updateArray(1, + vkImage2DViewList); vkCopyCommandBuffer.begin(); vkCopyCommandBuffer.pipelineBarrier( vkImage2DList, @@ -1275,29 +1227,25 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, } for (int i = 0; i < num2DImages; i++) { - delete vkNonDedicatedImage2DListDeviceMemory1 - [i]; - delete vkNonDedicatedImage2DListDeviceMemory2 - [i]; - delete nonDedicatedExternalMemory1[i]; - delete nonDedicatedExternalMemory2[i]; + delete vkImage2DListDeviceMemory1[i]; + delete vkImage2DListDeviceMemory2[i]; + delete externalMemory1[i]; + delete externalMemory2[i]; } - vkNonDedicatedImage2DListDeviceMemory1.erase( - vkNonDedicatedImage2DListDeviceMemory1.begin(), - vkNonDedicatedImage2DListDeviceMemory1.begin() - + num2DImages); - vkNonDedicatedImage2DListDeviceMemory2.erase( - vkNonDedicatedImage2DListDeviceMemory2.begin(), - vkNonDedicatedImage2DListDeviceMemory2.begin() - + num2DImages); - nonDedicatedExternalMemory1.erase( - nonDedicatedExternalMemory1.begin(), - nonDedicatedExternalMemory1.begin() + vkImage2DListDeviceMemory1.erase( + vkImage2DListDeviceMemory1.begin(), + vkImage2DListDeviceMemory1.begin() + num2DImages); - nonDedicatedExternalMemory2.erase( - nonDedicatedExternalMemory2.begin(), - nonDedicatedExternalMemory2.begin() + vkImage2DListDeviceMemory2.erase( + vkImage2DListDeviceMemory2.begin(), + vkImage2DListDeviceMemory2.begin() + num2DImages); + externalMemory1.erase(externalMemory1.begin(), + externalMemory1.begin() + + num2DImages); + externalMemory2.erase(externalMemory2.begin(), + externalMemory2.begin() + + num2DImages); if (CL_SUCCESS != err) { goto CLEANUP; diff --git a/test_conformance/vulkan/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common.hpp index 18d84f09..a1162407 100644 --- a/test_conformance/vulkan/vulkan_interop_common.hpp +++ b/test_conformance/vulkan/vulkan_interop_common.hpp @@ -45,6 +45,5 @@ extern bool useDeviceLocal; extern bool disableNTHandleType; // Enable offset for multiImport of vulkan device memory extern bool enableOffset; -extern bool non_dedicated; #endif // _vulkan_interop_common_hpp_ -- cgit v1.2.3 From d20de8ddea697e2d1372eaabce2718b5280ca748 Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 5 Sep 2023 17:09:04 +0100 Subject: Command buffer wait_for_sec_queue_event subtest, call clFinish in the correct order. (#1758) * Fix for Command buffer wait_for_sec_queue_event subtest, call clFinish in the correct order. queue has a command that depends on a command that resides in queue_sec, calling clFinish(queue) before clFinish(queue_sec) causes the test to hang as the queue_sec command never got a chance to finish. * Update PR change as per the suggestion. --- .../extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp index be8530b2..6ef26bb9 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp @@ -603,12 +603,15 @@ struct CommandBufferEventSync : public BasicCommandBufferTest event_ptrs[1], nullptr); test_error(error, "clEnqueueReadBuffer failed"); - error = clFinish(queue); - test_error(error, "clFinish failed"); + error = clFlush(queue); + test_error(error, "clFlush failed"); error = clFinish(queue_sec); test_error(error, "clFinish failed"); + error = clFinish(queue); + test_error(error, "clFinish failed"); + // verify the result - result buffer must contain initial pattern for (size_t i = 0; i < num_elements; i++) { -- cgit v1.2.3 From 39cca992b8b2ce92896c33ba01a01ea3f68ffe36 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 6 Sep 2023 13:32:19 +0100 Subject: math_brute_force: remove gotos in macro_unary_float (#1725) Simplify code by returning directly instead of using goto statements. Although intended as an NFC commit, this changes the behaviour when verification fails: the output buffer is no longer unmapped. Although not ideal, this aligns the behaviour of `macro_unary_float` to the other math_brute_force tests. Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/macro_unary_float.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 53679788..34f49a5a 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -81,7 +81,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) int ftz = job->ftz; bool relaxedMode = job->relaxedMode; cl_int error = CL_SUCCESS; - cl_int ret = CL_SUCCESS; const char *name = job->f->name; int signbit_test = 0; @@ -245,8 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (q[j] > t[j]) err = q[j] - t[j]; vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name, err, ((float *)s)[j], t[j], q[j]); - error = -1; - goto exit; + return -1; } @@ -272,15 +270,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float *)s)[j], -t[j], q[j]); - error = -1; - goto exit; + return -1; } } } } -exit: - ret = error; for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], @@ -315,7 +310,7 @@ exit: fflush(stdout); } - return ret; + return CL_SUCCESS; } } // anonymous namespace -- cgit v1.2.3 From d7f24a7986f7cb92b75093dc119171ff39d402ae Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 14 Sep 2023 11:00:30 +0100 Subject: Fix more -Wsign-compare warnings (#1779) Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_enqueue_map.cpp | 6 ++++-- test_conformance/basic/test_fpmath.cpp | 10 +++++----- test_conformance/basic/test_intmath.cpp | 4 ++-- test_conformance/basic/test_vector_creation.cpp | 4 ++-- test_conformance/c11_atomics/test_atomics.cpp | 2 +- .../command_buffer_get_command_buffer_info.cpp | 2 +- .../command_buffer_profiling.cpp | 2 +- test_conformance/pipes/test_pipe_limits.cpp | 13 +++++++------ test_conformance/select/test_select.cpp | 19 +++++++++---------- 9 files changed, 32 insertions(+), 30 deletions(-) diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp index 6b650c0d..c2ea24ef 100644 --- a/test_conformance/basic/test_enqueue_map.cpp +++ b/test_conformance/basic/test_enqueue_map.cpp @@ -54,7 +54,8 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, BufferOwningPtr referenceData{ malloc(bufferSize) }; BufferOwningPtr finalData{ malloc(bufferSize) }; - for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) + for (size_t src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); + src_flag_id++) { clMemWrapper memObject; log_info("Testing with cl_mem_flags src: %s\n", @@ -155,7 +156,8 @@ int test_enqueue_map_image(cl_device_id deviceID, cl_context context, BufferOwningPtr finalData{ malloc(imageDataSize) }; MTdataHolder d{ gRandomSeed }; - for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) + for (size_t src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); + src_flag_id++) { clMemWrapper memObject; log_info("Testing with cl_mem_flags src: %s\n", diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp index 6719e728..9bdb192e 100644 --- a/test_conformance/basic/test_fpmath.cpp +++ b/test_conformance/basic/test_fpmath.cpp @@ -94,7 +94,7 @@ int verify_fp(std::vector (&input)[2], std::vector &output, { auto &inA = input[0]; auto &inB = input[1]; - for (int i = 0; i < output.size(); i++) + for (size_t i = 0; i < output.size(); i++) { bool nan_test = false; @@ -106,7 +106,7 @@ int verify_fp(std::vector (&input)[2], std::vector &output, if (r != output[i] && nan_test) { log_error("FP math test for type: %s, vec size: %zu, failed at " - "index %d, %a '%c' %a, expected %a, get %a\n", + "index %zu, %a '%c' %a, expected %a, get %a\n", test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]), test.op, toDouble(inB[i]), toDouble(r), toDouble(output[i])); @@ -238,13 +238,13 @@ struct TypesIterator generate_random_inputs(inputs); - for (int i = 0; i < ARRAY_SIZE(streams); i++) + for (size_t i = 0; i < ARRAY_SIZE(streams); i++) { streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err); test_error(err, "clCreateBuffer failed."); } - for (int i = 0; i < ARRAY_SIZE(inputs); i++) + for (size_t i = 0; i < ARRAY_SIZE(inputs); i++) { err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, @@ -264,7 +264,7 @@ struct TypesIterator test_error(err, "create_single_kernel_helper failed"); - for (int i = 0; i < ARRAY_SIZE(streams); i++) + for (size_t i = 0; i < ARRAY_SIZE(streams); i++) { err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); diff --git a/test_conformance/basic/test_intmath.cpp b/test_conformance/basic/test_intmath.cpp index 6fd41abb..5a4e9c2a 100644 --- a/test_conformance/basic/test_intmath.cpp +++ b/test_conformance/basic/test_intmath.cpp @@ -123,7 +123,7 @@ int test_intmath(cl_device_id device, cl_context context, size_t datasize = sizeof(T) * num_elements * N; // Create device buffers. - for (int i = 0; i < ARRAY_SIZE(streams); i++) + for (size_t i = 0; i < ARRAY_SIZE(streams); i++) { streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err); @@ -175,7 +175,7 @@ int test_intmath(cl_device_id device, cl_context context, test_error(err, "clEnqueueReadBuffer failed\n"); // Verify results - for (int i = 0; i < num_elements * N; i++) + for (unsigned i = 0; i < num_elements * N; i++) { T r = test.ref(inputA[i], inputB[i], inputC[i]); if (r != output[i]) diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index 801c72b1..6bae156a 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -260,7 +260,7 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, std::vector output_data; // Iterate over all the types - for (int type_index = 0; type_index < vecType.size(); type_index++) + for (size_t type_index = 0; type_index < vecType.size(); type_index++) { if (!gHasLong @@ -336,7 +336,7 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, } // Iterate over all the vector sizes. - for (int size_index = 1; size_index < vecSizes.size(); size_index++) + for (size_t size_index = 1; size_index < vecSizes.size(); size_index++) { size_t global[] = { 1, 1, 1 }; int number_generated = -1; diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index d905b2ca..ca2c2242 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -3145,7 +3145,7 @@ public: } private: - int _subCaseId; + size_t _subCaseId; struct TestDefinition _subCase; }; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index 2ad77dbe..63441970 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -136,7 +136,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest // We can not check if this is the right queue because this is an opaque // object, test against NULL. - for (int i = 0; i < queue_list.size(); i++) + for (size_t i = 0; i < queue_list.size(); i++) { test_assert_error( queue_list[i] == queue, diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp index 28d80450..c06bbf76 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp @@ -160,7 +160,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest // verify the results by comparing timestamps bool all_vals_0 = prof_params.front().value != 0; - for (int i = 1; i < prof_params.size(); i++) + for (size_t i = 1; i < prof_params.size(); i++) { all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0; if (prof_params[i - 1].value > prof_params[i].value) diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp index e1048f5f..76b80b15 100644 --- a/test_conformance/pipes/test_pipe_limits.cpp +++ b/test_conformance/pipes/test_pipe_limits.cpp @@ -274,8 +274,7 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm size_t global_work_size[3]; cl_int err; size_t size; - int num_pipe_elements = 1024; - int i; + cl_uint num_pipe_elements = 1024; cl_uint max_pipe_packet_size; clEventWrapper producer_sync_event = NULL; clEventWrapper consumer_sync_event = NULL; @@ -287,7 +286,7 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm size_t min_alignment = get_min_alignment(context); - global_work_size[0] = (cl_uint)num_pipe_elements; + global_work_size[0] = num_pipe_elements; std::stringstream source; @@ -312,7 +311,8 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm inptr = (cl_char *)align_malloc(size, min_alignment); - for(i = 0; i < size; i++){ + for (size_t i = 0; i < size; i++) + { inptr[i] = (char)genrand_int32(d); } BufferInPtr.reset(inptr, nullptr, 0, size, true); @@ -412,7 +412,7 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, clMemWrapper buf_reserve_id_t_size_aligned; cl_int *inptr; void *outptr; - int size, i; + int size; clProgramWrapper program; clKernelWrapper kernel[3]; size_t global_work_size[3]; @@ -565,7 +565,8 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, size = sizeof(cl_int) * max_active_reservations; inptr = (cl_int *)align_malloc(size, min_alignment); - for(i = 0; i < max_active_reservations; i++){ + for (cl_uint i = 0; i < max_active_reservations; i++) + { inptr[i] = (int)genrand_int32(d); } BufferInPtr.reset(inptr, nullptr, 0, size, true); diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 8a0567c3..127f4538 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -119,34 +119,32 @@ static void initSrcBuffer(void* src1, Type stype, MTdata d) s1[i] = genrand_int32(d); } -static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) { - int i; +static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, size_t count) +{ assert(cmptype != kfloat); switch (type_size[cmptype]) { case 1: { uint8_t* ub = (uint8_t *)cmp; - for (i=0; i < count; ++i) - ub[i] = (uint8_t)start++; + for (size_t i = 0; i < count; ++i) ub[i] = (uint8_t)start++; break; } case 2: { uint16_t* us = (uint16_t *)cmp; - for (i=0; i < count; ++i) - us[i] = (uint16_t)start++; + for (size_t i = 0; i < count; ++i) us[i] = (uint16_t)start++; break; } case 4: { if (!s_wimpy_mode) { uint32_t* ui = (uint32_t *)cmp; - for (i=0; i < count; ++i) - ui[i] = (uint32_t)start++; + for (size_t i = 0; i < count; ++i) ui[i] = (uint32_t)start++; } else { // The short test doesn't iterate over the entire 32 bit space so // we alternate between positive and negative values int32_t* ui = (int32_t *)cmp; int32_t sign = 1; - for (i=0; i < count; ++i, ++start) { + for (size_t i = 0; i < count; ++i, ++start) + { ui[i] = (int32_t)start*sign; sign = sign * -1; } @@ -158,7 +156,8 @@ static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) // selects, we want to test positive and negative values int64_t* ll = (int64_t *)cmp; int64_t sign = 1; - for (i=0; i < count; ++i, ++start) { + for (size_t i = 0; i < count; ++i, ++start) + { ll[i] = start*sign; sign = sign * -1; } -- cgit v1.2.3 From d20913b4e3683de75cbfe89a0812a78f1eb0fee0 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 19 Sep 2023 12:26:48 +0100 Subject: test_common: fix -Wformat warnings (#1771) In preparation of re-enabling -Wformat globally, fix format string warnings in test_common. Printing a `size_t` requires the `%zu` specifier. Signed-off-by: Sven van Haastregt --- test_common/gl/helpers.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index 1fb85035..0b47f1d9 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -1047,8 +1047,10 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples GLint max_samples = get_gl_max_samples(target, internalFormat); check_gl_error() - if (max_samples < (GLint)samples) - log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples); + if (max_samples < (GLint)samples) + log_error("GL error: requested samples (%zu) exceeds renderer max " + "samples (%d)\n", + samples, max_samples); // Setup the GLSL program const GLchar *vertex_source = @@ -1145,7 +1147,9 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples check_gl_error(); if (fbo_samples < (GLint)samples) - log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples); + log_error( + "GL Error: requested samples (%zu) exceeds FBO capability (%d)\n", + samples, fbo_samples); glUseProgram(prog); check_gl_error() @@ -1306,7 +1310,9 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, GLint max_samples = get_gl_max_samples(target, internalFormat); if (max_samples < (GLint)samples) - log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples); + log_error("GL error: requested samples (%zu) exceeds renderer max " + "samples (%d)\n", + samples, max_samples); // Setup the GLSL program const GLchar *vertex_source = @@ -1438,7 +1444,9 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, check_gl_error(); if (fbo_samples < (GLint)samples) - log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples); + log_error( + "GL Error: requested samples (%zu) exceeds FBO capability (%d)\n", + samples, fbo_samples); glUseProgram(prog); check_gl_error() -- cgit v1.2.3 From 1c616238bc9e7c587cbe8c60a372031bc7cf95c5 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 20 Sep 2023 10:19:13 +0100 Subject: test_common: Capitalize macro and clang-format (#1813) The `CHECK_GL_ERROR` macro was lowercase before, which confused clang-format and resulted in some odd indentations. Capitalize the macro and run clang-format to fix the indentation. Signed-off-by: Sven van Haastregt --- test_common/gl/helpers.cpp | 87 +++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index 0b47f1d9..62f63253 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -966,12 +966,13 @@ void reorder_verification_buffer(GLenum glFormat, GLenum glType, char* buffer, s #ifdef GL_VERSION_3_2 -#define check_gl_error() \ -{ \ - GLenum errnom = GL_NO_ERROR;\ - if ((errnom = glGetError()) != GL_NO_ERROR)\ - log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, __LINE__);\ -} +#define CHECK_GL_ERROR() \ + { \ + GLenum errnom = GL_NO_ERROR; \ + if ((errnom = glGetError()) != GL_NO_ERROR) \ + log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, \ + __LINE__); \ + } const char *get_gl_vector_type( GLenum internalformat ) { @@ -1045,12 +1046,12 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples // Check if the renderer supports enough samples GLint max_samples = get_gl_max_samples(target, internalFormat); - check_gl_error() + CHECK_GL_ERROR() - if (max_samples < (GLint)samples) - log_error("GL error: requested samples (%zu) exceeds renderer max " - "samples (%d)\n", - samples, max_samples); + if (max_samples < (GLint)samples) + log_error("GL error: requested samples (%zu) exceeds renderer max " + "samples (%d)\n", + samples, max_samples); // Setup the GLSL program const GLchar *vertex_source = @@ -1077,36 +1078,36 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vertex_shader, 1, &vertex_source, NULL); glCompileShader(vertex_shader); - check_gl_error() + CHECK_GL_ERROR() glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fragment_shader, 1, &fragment_source, NULL); glCompileShader(fragment_shader); - check_gl_error() + CHECK_GL_ERROR() GLuint prog = glCreateProgram(); glAttachShader(prog, vertex_shader); glAttachShader(prog, fragment_shader); - check_gl_error() + CHECK_GL_ERROR() glBindAttribLocation(prog, 0, "att0"); glLinkProgram(prog); - check_gl_error() + CHECK_GL_ERROR() // Setup the FBO and texture glFramebufferWrapper fbo; glGenFramebuffers(1, &fbo); glBindFramebuffer(GL_FRAMEBUFFER, fbo); - check_gl_error() + CHECK_GL_ERROR() glViewport(0, 0, width, height); - check_gl_error() + CHECK_GL_ERROR() GLuint tex = 0; glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, tex); glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, internalFormat, width, height, fixedSampleLocations); - check_gl_error() + CHECK_GL_ERROR() GLint attachment; switch (internalFormat) { @@ -1124,7 +1125,7 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples } glFramebufferTexture(GL_FRAMEBUFFER, attachment, tex, 0); - check_gl_error() + CHECK_GL_ERROR() GLint status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status == GL_FRAMEBUFFER_UNSUPPORTED) { @@ -1144,7 +1145,7 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples // Check if the framebuffer supports enough samples GLint fbo_samples = 0; glGetIntegerv(GL_SAMPLES, &fbo_samples); - check_gl_error(); + CHECK_GL_ERROR(); if (fbo_samples < (GLint)samples) log_error( @@ -1152,16 +1153,16 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples samples, fbo_samples); glUseProgram(prog); - check_gl_error() + CHECK_GL_ERROR() if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) { glDisable(GL_DEPTH_TEST); - check_gl_error() + CHECK_GL_ERROR() } else { glEnable(GL_DEPTH_TEST); glDepthFunc(GL_ALWAYS); - check_gl_error() + CHECK_GL_ERROR() } // Setup the VBO for rendering a quad @@ -1176,14 +1177,14 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW); - check_gl_error() + CHECK_GL_ERROR() glVertexArraysWrapper vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0); - check_gl_error() + CHECK_GL_ERROR() //clearing color and depth buffer glClearColor(0, 0, 0, 0); @@ -1227,13 +1228,13 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples color += color_delta; glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - check_gl_error(); + CHECK_GL_ERROR(); glFlush(); } glDisable(GL_SAMPLE_MASK); - check_gl_error(); + CHECK_GL_ERROR(); *outTextureID = tex; @@ -1339,36 +1340,36 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vertex_shader, 1, &vertex_source, NULL); glCompileShader(vertex_shader); - check_gl_error() + CHECK_GL_ERROR() glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fragment_shader, 1, &fragment_source, NULL); glCompileShader(fragment_shader); - check_gl_error() + CHECK_GL_ERROR() glProgramWrapper prog = glCreateProgram(); glAttachShader(prog, vertex_shader); glAttachShader(prog, fragment_shader); - check_gl_error() + CHECK_GL_ERROR() glBindAttribLocation(prog, 0, "att0"); glLinkProgram(prog); - check_gl_error() + CHECK_GL_ERROR() // Setup the FBO and texture glFramebufferWrapper fbo; glGenFramebuffers(1, &fbo); glBindFramebuffer(GL_FRAMEBUFFER, fbo); - check_gl_error() + CHECK_GL_ERROR() glViewport(0, 0, width, height); - check_gl_error() + CHECK_GL_ERROR() GLuint tex = 0; glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, tex); glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, samples, internalFormat, width, height, total_layers, fixedSampleLocations); - check_gl_error() + CHECK_GL_ERROR() GLint attachment; switch (internalFormat) { @@ -1390,12 +1391,12 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) { glDisable(GL_DEPTH_TEST); - check_gl_error() + CHECK_GL_ERROR() } else { glEnable(GL_DEPTH_TEST); glDepthFunc(GL_ALWAYS); - check_gl_error() + CHECK_GL_ERROR() } // Setup the VBO for rendering a quad @@ -1410,18 +1411,18 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW); - check_gl_error() + CHECK_GL_ERROR() glVertexArraysWrapper vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0); - check_gl_error() + CHECK_GL_ERROR() for (size_t l=0; l!=total_layers; ++l) { glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, tex, 0, l); - check_gl_error() + CHECK_GL_ERROR() GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status == GL_FRAMEBUFFER_UNSUPPORTED) { @@ -1441,7 +1442,7 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, // Check if the framebuffer supports enough samples GLint fbo_samples = 0; glGetIntegerv(GL_SAMPLES, &fbo_samples); - check_gl_error(); + CHECK_GL_ERROR(); if (fbo_samples < (GLint)samples) log_error( @@ -1449,7 +1450,7 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, samples, fbo_samples); glUseProgram(prog); - check_gl_error() + CHECK_GL_ERROR() //clearing color and depth buffer glClearColor(0, 0, 0, 0); @@ -1490,13 +1491,13 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, glUniform1f(glGetUniformLocation(prog, "depthVal"), val); glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - check_gl_error(); + CHECK_GL_ERROR(); glFlush(); } glDisable(GL_SAMPLE_MASK); - check_gl_error(); + CHECK_GL_ERROR(); } *outTextureID = tex; -- cgit v1.2.3 From aa953aaa51af331ca3e6bbd4fa787e5115d81077 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 20 Sep 2023 15:48:25 +0100 Subject: [NFC] cmake: do not suppress -Wsign-compare globally (#1810) Only disable `-Wsign-compare` for tests that do not compile cleanly with this warning enabled. Re-enable the warning for the other tests, so that it can catch any new occurrences. Signed-off-by: Sven van Haastregt --- CMakeLists.txt | 3 --- test_conformance/SVM/CMakeLists.txt | 2 +- test_conformance/allocations/CMakeLists.txt | 2 ++ test_conformance/basic/CMakeLists.txt | 2 ++ test_conformance/c11_atomics/CMakeLists.txt | 2 ++ test_conformance/conversions/CMakeLists.txt | 2 +- test_conformance/device_execution/CMakeLists.txt | 4 ++-- test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt | 2 ++ .../cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt | 2 ++ test_conformance/geometrics/CMakeLists.txt | 2 ++ test_conformance/images/kernel_read_write/CMakeLists.txt | 2 +- 11 files changed, 17 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d56b64d..6c9bbf6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,9 +103,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" if(NOT CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo|MinSizeRel") # Enable more warnings if not doing a release build. add_cxx_flag_if_supported(-Wall) - # Suppress warnings that currently trigger on the code base. - # This list should shrink over time when warnings are fixed. - add_cxx_flag_if_supported(-Wno-sign-compare) endif() add_cxx_flag_if_supported(-Wno-narrowing) add_cxx_flag_if_supported(-Wno-format) diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt index efa597d1..2ad2f821 100644 --- a/test_conformance/SVM/CMakeLists.txt +++ b/test_conformance/SVM/CMakeLists.txt @@ -17,6 +17,6 @@ set(${MODULE_NAME}_SOURCES test_migrate.cpp ) -set_gnulike_module_compile_flags("-Wno-sometimes-uninitialized") +set_gnulike_module_compile_flags("-Wno-sometimes-uninitialized -Wno-sign-compare") include(../CMakeCommon.txt) diff --git a/test_conformance/allocations/CMakeLists.txt b/test_conformance/allocations/CMakeLists.txt index a4043806..b6031225 100644 --- a/test_conformance/allocations/CMakeLists.txt +++ b/test_conformance/allocations/CMakeLists.txt @@ -8,4 +8,6 @@ set(${MODULE_NAME}_SOURCES allocation_utils.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 9dcf1d5a..684a7d1d 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -68,4 +68,6 @@ if(APPLE) list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp) endif(APPLE) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/c11_atomics/CMakeLists.txt b/test_conformance/c11_atomics/CMakeLists.txt index 621adda7..0d389bce 100644 --- a/test_conformance/c11_atomics/CMakeLists.txt +++ b/test_conformance/c11_atomics/CMakeLists.txt @@ -7,4 +7,6 @@ set(${MODULE_NAME}_SOURCES test_atomics.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt index cc019b26..8ed3ba18 100644 --- a/test_conformance/conversions/CMakeLists.txt +++ b/test_conformance/conversions/CMakeLists.txt @@ -16,6 +16,6 @@ set_source_files_properties( COMPILE_FLAGS -march=i686) endif(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") +set_gnulike_module_compile_flags("-Wno-unused-but-set-variable -Wno-sign-compare") include(../CMakeCommon.txt) diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt index 5e9e30e3..275b96c2 100644 --- a/test_conformance/device_execution/CMakeLists.txt +++ b/test_conformance/device_execution/CMakeLists.txt @@ -17,6 +17,6 @@ set(DEVICE_EXECUTION_SOURCES utils.cpp ) -include(../CMakeCommon.txt) +set_gnulike_module_compile_flags("-Wno-sign-compare") -# end of file # +include(../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 098fb5be..be5fd1c9 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -17,6 +17,8 @@ set(${MODULE_NAME}_SOURCES command_buffer_finalize.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../../CMakeCommon.txt) add_subdirectory( cl_khr_command_buffer_mutable_dispatch ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 0d4dd039..9b598d8b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -12,4 +12,6 @@ set(${MODULE_NAME}_SOURCES ../basic_command_buffer.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../../../CMakeCommon.txt) diff --git a/test_conformance/geometrics/CMakeLists.txt b/test_conformance/geometrics/CMakeLists.txt index 3fee05fb..8a6f25c6 100644 --- a/test_conformance/geometrics/CMakeLists.txt +++ b/test_conformance/geometrics/CMakeLists.txt @@ -6,5 +6,7 @@ set(${MODULE_NAME}_SOURCES test_geometrics.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt index b5527c74..d7e7eded 100644 --- a/test_conformance/images/kernel_read_write/CMakeLists.txt +++ b/test_conformance/images/kernel_read_write/CMakeLists.txt @@ -21,7 +21,7 @@ set(${MODULE_NAME}_SOURCES # Make unused variables not fatal in this module; see # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484 -set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable") +set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable -Wno-sign-compare") include(../../CMakeCommon.txt) -- cgit v1.2.3 From 7759c2669aaef7a28b339b6bf4542e4ac983da00 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Thu, 21 Sep 2023 09:30:32 -0600 Subject: basic: fix more unused-but-set variables (#1811) --- test_conformance/basic/test_progvar.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index a46713e9..41cc0199 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -581,13 +581,19 @@ static void l_load_abilities(cl_device_id device) cl_uint max_dim = 0; status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dim), &max_dim, 0); - assert(status == CL_SUCCESS); + if (check_error(status, + "clGetDeviceInfo for " + "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed.")) + return; assert(max_dim > 0); size_t max_id[3]; max_id[0] = 0; status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, max_dim * sizeof(size_t), &max_id[0], 0); - assert(status == CL_SUCCESS); + if (check_error(status, + "clGetDeviceInfo for " + "CL_DEVICE_MAX_WORK_ITEM_SIZES failed.")) + return; l_max_global_id0 = max_id[0]; } @@ -597,7 +603,10 @@ static void l_load_abilities(cl_device_id device) status = clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE, sizeof(l_linker_available), &l_linker_available, 0); - assert(status == CL_SUCCESS); + if (check_error(status, + "clGetDeviceInfo for " + "CL_DEVICE_LINKER_AVAILABLE failed.")) + return; } } @@ -903,6 +912,7 @@ static std::string global_decls(const TypeInfo& ti, bool with_init) vol, tn, vol, tn, vol, tn, vol, tn); } assert(num_printed < sizeof(decls)); + (void)num_printed; return std::string(decls); } @@ -983,6 +993,7 @@ static std::string writer_function(const TypeInfo& ti) writer_template_atomic, ti.get_buf_elem_type()); } assert(num_printed < sizeof(writer_src)); + (void)num_printed; std::string result = writer_src; return result; } @@ -1024,6 +1035,7 @@ static std::string reader_function(const TypeInfo& ti) ti.get_buf_elem_type(), ti.get_buf_elem_type()); } assert(num_printed < sizeof(reader_src)); + (void)num_printed; std::string result = reader_src; return result; } -- cgit v1.2.3 From b143a990d964a15c72d94998664a81b171192e74 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 22 Sep 2023 15:10:44 +0100 Subject: select: fix -Wformat warnings (#1817) * Printing of a `size_t` requires the `%z` specifier. * Printing of `cl_long`/`cl_ulong` is now done using the `PRI*64` macros to ensure portability across 32 and 64-bit builds. Signed-off-by: Sven van Haastregt --- test_conformance/select/test_select.cpp | 4 ++- test_conformance/select/util_select.cpp | 51 +++++++++++++++++---------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 127f4538..e9009e49 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -20,6 +20,8 @@ #include #include #include + +#include #include #if ! defined( _WIN32) @@ -441,7 +443,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c block_elements, element_count[vecsize]) != 0) { - log_error("vec_size:%d indx: 0x%16.16llx\n", + log_error("vec_size:%d indx: 0x%16.16" PRIx64 "\n", (int)element_count[vecsize], i); return TEST_FAIL; } diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index b85f54a7..078ff64a 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -16,6 +16,7 @@ #include "harness/errorHelpers.h" #include +#include #include "test_select.h" @@ -648,8 +649,8 @@ size_t check_uchar(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_uchar) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_uchar) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -670,8 +671,8 @@ size_t check_char(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_char) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_char) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -693,8 +694,8 @@ size_t check_ushort(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_ushort) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_ushort) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -716,8 +717,8 @@ size_t check_short(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_short) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_short) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -739,8 +740,8 @@ size_t check_uint(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_uint) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_uint) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -763,8 +764,8 @@ size_t check_int(const void *const test, const void *const correct, if (t[i] != c[i]) { - log_error("\n(check_int) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_int) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -786,9 +787,9 @@ size_t check_ulong(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_ulong) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", + log_error("\n(check_ulong) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n", vector_size, i, count, c[i], t[i]); return i + 1; } @@ -809,9 +810,9 @@ size_t check_long(const void *const test, const void *const correct, for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_long) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", + log_error("\n(check_long) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n", vector_size, i, count, c[i], t[i]); return i + 1; } @@ -834,8 +835,8 @@ size_t check_half(const void *const test, const void *const correct, && !(isnan(((cl_half *)correct)[i]) && isnan(((cl_half *)test)[i]))) { - log_error("\n(check_half) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_half) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -858,8 +859,8 @@ size_t check_float(const void *const test, const void *const correct, if ((t[i] != c[i]) && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) { - log_error("\n(check_float) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_float) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -883,9 +884,9 @@ size_t check_double(const void *const test, const void *const correct, && !(isnan(((double *)correct)[i]) && isnan(((double *)test)[i]))) { - log_error("\n(check_double) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", + log_error("\n(check_double) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n", vector_size, i, count, c[i], t[i]); return i + 1; } -- cgit v1.2.3 From f39685030f5f72883c0a71400a2423e436870639 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 2 Oct 2023 17:11:55 +0200 Subject: gl: fix sometimes-uninitialized warning (#1815) Bail out when hitting the default case, so that we don't attempt to access the uninitialized `error` variable. Signed-off-by: Sven van Haastregt --- test_conformance/gl/test_images_write_common.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 4d721296..69d00a1a 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -571,6 +571,7 @@ static int test_image_format_write(cl_context context, cl_command_queue queue, "%s (%s):%d", GetGLTargetName(target), __FUNCTION__, __FILE__, __LINE__); + return -1; } // If there was a problem during creation, make sure it isn't a known -- cgit v1.2.3 From 43d6886810919475d95302fb6173de573ffcdef6 Mon Sep 17 00:00:00 2001 From: Steven Winston Date: Tue, 3 Oct 2023 09:26:48 -0700 Subject: rewrite test_select to run in a few seconds. (#1665) * rewrite test_select to run in a few seconds. * removing the threading; reverting to the original method. * Merge from Master, remove all suggested changes and start with a simple change report on each change's cost savings: BEFORE: real 47m8.497s user 48m8.860s sys 0m14.952s AFTER: real 17m53.383s user 18m53.342s sys 0m13.297s initSrcBuffer generates the same random noise every iteration through the loop. There is no change to the arguments, and the host data itself doesn't need to get rewritten. Profiling realizes a 2 times speed accel from simply relying upon the buffer to remain randomized at the next loop iteration. * BEFORE: real 17m53.383s user 18m53.342s sys 0m13.297s AFTER: real 12m26.035s user 13m15.505s sys 0m15.414s rearrange a few things in the loops to allow for vectorized / interleaved loop traversal. NB: not all loops are vectorizable obviously; but this addresses the worst offenders. Also note, to enable compiler to generate vectorized and interleaved loop traversal build with -o3. * address the CI format requirements. * address the CI format requirements. * address the CI format requirements. --- test_conformance/select/test_select.cpp | 56 ++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index e9009e49..72be08c7 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -47,11 +47,14 @@ static void initSrcBuffer(void* src1, Type stype, MTdata); // initialize the valued used to compare with in the select with // vlaues [start, count) -static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count); +static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, + const size_t count); // make a program that uses select for the given stype (src/dest type), // ctype (comparison type), veclen (vector length) -static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type stype, Type ctype, size_t veclen ); +static cl_program makeSelectProgram(cl_kernel *kernel_ptr, cl_context context, + Type stype, Type ctype, + const size_t veclen); // Creates and execute the select test for the given device, context, // stype (source/dest type), cmptype (comparison type), using max_tg_size @@ -121,7 +124,9 @@ static void initSrcBuffer(void* src1, Type stype, MTdata d) s1[i] = genrand_int32(d); } -static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, size_t count) +static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, + const size_t count) + { assert(cmptype != kfloat); switch (type_size[cmptype]) { @@ -144,11 +149,12 @@ static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, size_t count) // The short test doesn't iterate over the entire 32 bit space so // we alternate between positive and negative values int32_t* ui = (int32_t *)cmp; - int32_t sign = 1; - for (size_t i = 0; i < count; ++i, ++start) + int32_t neg_start = (int32_t)start * -1; + for (size_t i = 0; i < count; i++) { - ui[i] = (int32_t)start*sign; - sign = sign * -1; + ++start; + --neg_start; + ui[i] = (int32_t)((i % 2) ? start : neg_start); } } break; @@ -157,11 +163,12 @@ static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, size_t count) // We don't iterate over the entire space of 64 bit so for the // selects, we want to test positive and negative values int64_t* ll = (int64_t *)cmp; - int64_t sign = 1; - for (size_t i = 0; i < count; ++i, ++start) + int64_t neg_start = (int64_t)start * -1; + for (size_t i = 0; i < count; i++) { - ll[i] = start*sign; - sign = sign * -1; + ++start; + --neg_start; + ll[i] = (int64_t)((i % 2) ? start : neg_start); } break; } @@ -173,7 +180,9 @@ static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, size_t count) // Make the various incarnations of the program we want to run // stype: source and destination type for the select // ctype: compare type -static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type srctype, Type cmptype, size_t vec_len) +static cl_program makeSelectProgram(cl_kernel *kernel_ptr, + const cl_context context, Type srctype, + Type cmptype, const size_t vec_len) { char testname[256]; char stypename[32]; @@ -309,7 +318,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c clMemWrapper src1, src2, cmp, dest; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; - size_t block_elements = BUFFER_SIZE / type_size[stype]; + const size_t block_elements = BUFFER_SIZE / type_size[stype]; size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1; cl_ulong cmp_stride = block_elements * step; @@ -355,10 +364,21 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); test_error_count(err, "Error: could not allocate dest buffer\n"); - for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + programs[0] = makeSelectProgram(&kernels[0], context, stype, cmptype, + element_count[0]); + programs[1] = makeSelectProgram(&kernels[1], context, stype, cmptype, + element_count[1]); + programs[2] = makeSelectProgram(&kernels[2], context, stype, cmptype, + element_count[2]); + programs[3] = makeSelectProgram(&kernels[3], context, stype, cmptype, + element_count[3]); + programs[4] = makeSelectProgram(&kernels[4], context, stype, cmptype, + element_count[4]); + programs[5] = makeSelectProgram(&kernels[5], context, stype, cmptype, + element_count[5]); + + for (size_t vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { - programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, - cmptype, element_count[vecsize]); if (!programs[vecsize] || !kernels[vecsize]) { return -1; @@ -391,10 +411,10 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c log_info("Testing..."); uint64_t i; + initSrcBuffer(src1_host.data(), stype, d); + initSrcBuffer(src2_host.data(), stype, d); for (i=0; i < blocks; i+=step) { - initSrcBuffer(src1_host.data(), stype, d); - initSrcBuffer(src2_host.data(), stype, d); initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements); err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE, -- cgit v1.2.3 From 7f0c0aee05c83ea3049a3749ed8f52e0bace16bc Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:29:20 -0500 Subject: Replace cl_queue_properties_khr with cl_command_queue_properties (#1768) test_conformance/api/test_queue_properties.cpp uses a cl_queue_properties_khr value to query CL_DEVICE_QUEUE_PROPERTIES, however this should be a cl_command_queue_properties typed variable. Fixes issue #1640 --- test_conformance/api/test_queue_properties.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test_conformance/api/test_queue_properties.cpp b/test_conformance/api/test_queue_properties.cpp index 62d0a734..768bd5de 100644 --- a/test_conformance/api/test_queue_properties.cpp +++ b/test_conformance/api/test_queue_properties.cpp @@ -107,8 +107,9 @@ int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_ clProgramWrapper program; clKernelWrapper kernel; - cl_queue_properties_khr device_props = 0; - cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + cl_command_queue_properties device_props = 0; + cl_command_queue_properties queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, + 0 }; // Query extension if (!is_extension_available(deviceID, "cl_khr_create_command_queue")) -- cgit v1.2.3 From 4984196bcb79b0857994b79de8edd4644ab277e3 Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Tue, 3 Oct 2023 17:30:46 +0100 Subject: Fix CopyImage verification for 1D/2D images (#1791) The verification uses a common function with nested loops to verify the result of the copy operation. The upper loop limits thirdDim and SecondDim should be set according to the image type under test. Previously for 1D/2D they were set from dstImageInfo->depth and dstImageInfo->height. The issue is that the depth and height are set to 0 when unused. This caused the verification loop to be skipped. Signed-off-by: John Kesapides --- .../images/clCopyImage/test_copy_generic.cpp | 59 +++++++++++++++------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp index 3e0b60d9..888ca6ec 100644 --- a/test_conformance/images/clCopyImage/test_copy_generic.cpp +++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp @@ -519,32 +519,53 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d if( gDebugTrace ) log_info( " - Scanline verification...\n" ); - size_t thirdDim; - size_t secondDim; - if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) - { - secondDim = dstImageInfo->arraySize; - thirdDim = 1; - } - else if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) + size_t thirdDim = 1; + size_t secondDim = 1; + + switch (dstImageInfo->type) { - secondDim = dstImageInfo->height; - if( gTestMipmaps ) - secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1; - thirdDim = dstImageInfo->arraySize; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: { + secondDim = dstImageInfo->arraySize; + break; + } + case CL_MEM_OBJECT_IMAGE2D_ARRAY: { + secondDim = dstImageInfo->height; + thirdDim = dstImageInfo->arraySize; + break; + } + case CL_MEM_OBJECT_IMAGE3D: { + secondDim = dstImageInfo->height; + thirdDim = dstImageInfo->depth; + break; + } + case CL_MEM_OBJECT_IMAGE2D: { + secondDim = dstImageInfo->height; + break; + } + case CL_MEM_OBJECT_IMAGE1D: { + break; + } + default: { + log_error("ERROR: Unsupported Image type. \n"); + return error; + break; + } } - else + if (gTestMipmaps) { - secondDim = dstImageInfo->height; - thirdDim = dstImageInfo->depth; - if( gTestMipmaps ) + switch (dstImageInfo->type) { - secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1; - if(dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D) + case CL_MEM_OBJECT_IMAGE3D: thirdDim = (dstImageInfo->depth >> dst_lod) ? (dstImageInfo->depth >> dst_lod):1; + /* Fallthrough */ + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + secondDim = (dstImageInfo->height >> dst_lod) + ? (dstImageInfo->height >> dst_lod) + : 1; + break; } } - for( size_t z = 0; z < thirdDim; z++ ) { for( size_t y = 0; y < secondDim; y++ ) -- cgit v1.2.3 From 1fe72877d72d0ae2447fcdde5db713ee0a6b79f8 Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:33:47 -0500 Subject: Check that CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE for FULL_PROFILE (#1804) OpenCL FULL profile requires that online compiler be available. OpenCL-CTS currently queries profile as well as online compiler availability via device queries, but doesn't check for the consistency. Check for consistency that if CL_DEVICE_PROFILE is "FULL_PROFILE" (or technically is not "EMBEDDED_PROFILE") then CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE for that device. Fixes #1763 --- test_conformance/api/test_queries.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index 92ae1d7b..f0740107 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -644,6 +644,13 @@ int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_q } log_info( "\tReported device profile: %s \n", profile ); + if (strcmp(profile, "FULL_PROFILE") == 0 && compilerAvail != CL_TRUE) + { + log_error("ERROR: Returned profile of device is FULL , but " + "CL_DEVICE_COMPILER_AVAILABLE is not CL_TRUE as required by " + "OpenCL 1.2!"); + return -1; + } return 0; } -- cgit v1.2.3 From e3c8de8eb621db4ad2b6616267cba2eb3b444bb0 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 3 Oct 2023 18:36:01 +0200 Subject: math_brute_force: don't set/restore FTZ mode twice (#1808) The suite's `main()` function already disables the FTZ mode prior to invoking `runTestHarnessWithCheck` and restores the FP state afterwards, so tests don't have to do so themselves. Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/i_unary_double.cpp | 6 ------ test_conformance/math_brute_force/i_unary_float.cpp | 6 ------ 2 files changed, 12 deletions(-) diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index 3d6ce152..953c33bb 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -50,11 +50,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - // This test is not using ThreadPool so we need to disable FTZ here - // for reference computations - FPU_mode_type oldMode; - DisableFTZ(&oldMode); - Force64BitFPUPrecision(); // Init the kernels @@ -227,6 +222,5 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - RestoreFPState(&oldMode); return error; } diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index 94ebc66a..0ce37cc8 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -49,11 +49,6 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - // This test is not using ThreadPool so we need to disable FTZ here - // for reference computations - FPU_mode_type oldMode; - DisableFTZ(&oldMode); - Force64BitFPUPrecision(); // Init the kernels @@ -225,6 +220,5 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - RestoreFPState(&oldMode); return error; } -- cgit v1.2.3 From 60fc7fc844c81304334485ef9e58f44931c1417d Mon Sep 17 00:00:00 2001 From: Aharon Abramson Date: Tue, 10 Oct 2023 19:18:22 +0300 Subject: Update test_compile.cpp (#1812) * Update test_compile.cpp Prevent the error: "variable 'temp' is uninitialized when used here" Since one of the generated kernels starts with: __kernel void sample_test(__global float *src, __global int *dst) { float temp; int tid = get_global_id(0); dst[tid] = dst[tid] + temp; * Update test_compile.cpp clang format * Update test_compile.cpp Change the "0.0" literal to "0.0f" for devices that don't support FP64. --- test_conformance/compiler/test_compile.cpp | 3754 +++++++++++++++++----------- 1 file changed, 2271 insertions(+), 1483 deletions(-) diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index d250bdd4..3af8125a 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,148 +16,147 @@ #include "testBase.h" #if defined(_WIN32) #include -#elif defined(__linux__) || defined(__APPLE__) +#elif defined(__linux__) || defined(__APPLE__) #include #include #endif #include "harness/conversions.h" #define MAX_LINE_SIZE_IN_PROGRAM 1024 -#define MAX_LOG_SIZE_IN_PROGRAM 2048 +#define MAX_LOG_SIZE_IN_PROGRAM 2048 const char *sample_kernel_start = -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" float temp;\n" -" int tid = get_global_id(0);\n"; + "__kernel void sample_test(__global float *src, __global int *dst)\n" + "{\n" + " float temp = 0.0f;\n" + " int tid = get_global_id(0);\n"; const char *sample_kernel_end = "}\n"; -const char *sample_kernel_lines[] = { -"dst[tid] = src[tid];\n", -"dst[tid] = src[tid] * 3.f;\n", -"temp = src[tid] / 4.f;\n", -"dst[tid] = dot(temp,src[tid]);\n", -"dst[tid] = dst[tid] + temp;\n" }; +const char *sample_kernel_lines[] = { "dst[tid] = src[tid];\n", + "dst[tid] = src[tid] * 3.f;\n", + "temp = src[tid] / 4.f;\n", + "dst[tid] = dot(temp,src[tid]);\n", + "dst[tid] = dst[tid] + temp;\n" }; /* I compile and link therefore I am. Robert Ioffe */ -/* The following kernels are used in testing Improved Compilation and Linking feature */ - -const char *simple_kernel = -"__kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" int id = (int)get_global_id(0);\n" -" dst[id] = src[id];\n" -"}\n"; +/* The following kernels are used in testing Improved Compilation and Linking + * feature */ + +const char *simple_kernel = "__kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " int id = (int)get_global_id(0);\n" + " dst[id] = src[id];\n" + "}\n"; const char *simple_kernel_with_defines = -"__kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" int id = (int)get_global_id(0);\n" -" float temp = src[id] - 42;\n" -" dst[id] = FIRST + temp + SECOND;\n" -"}\n"; - -const char *simple_kernel_template = -"__kernel void\n" -"CopyBuffer%d(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" int id = (int)get_global_id(0);\n" -" dst[id] = src[id];\n" -"}\n"; - -const char *composite_kernel_start = -"__kernel void\n" -"CompositeKernel(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n"; + "__kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " int id = (int)get_global_id(0);\n" + " float temp = src[id] - 42;\n" + " dst[id] = FIRST + temp + SECOND;\n" + "}\n"; + +const char *simple_kernel_template = "__kernel void\n" + "CopyBuffer%d(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " int id = (int)get_global_id(0);\n" + " dst[id] = src[id];\n" + "}\n"; + +const char *composite_kernel_start = "__kernel void\n" + "CompositeKernel(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n"; const char *composite_kernel_end = "}\n"; -const char *composite_kernel_template = -" CopyBuffer%d(src, dst);\n"; - -const char *composite_kernel_extern_template = -"extern __kernel void\n" -"CopyBuffer%d(\n" -" __global float* src,\n" -" __global float* dst );\n"; - -const char *another_simple_kernel = -"extern __kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst );\n" -"__kernel void\n" -"AnotherCopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" CopyBuffer(src, dst);\n" -"}\n"; - -const char* simple_header = -"extern __kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst );\n"; - -const char* simple_header_name = "simple_header.h"; - -const char* another_simple_kernel_with_header = -"#include \"simple_header.h\"\n" -"__kernel void\n" -"AnotherCopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" CopyBuffer(src, dst);\n" -"}\n"; - -const char* header_name_templates[4] = { "simple_header%d.h", - "foo/simple_header%d.h", - "foo/bar/simple_header%d.h", - "foo/bar/baz/simple_header%d.h"}; - -const char* include_header_name_templates[4] = { "#include \"simple_header%d.h\"\n", - "#include \"foo/simple_header%d.h\"\n", - "#include \"foo/bar/simple_header%d.h\"\n", - "#include \"foo/bar/baz/simple_header%d.h\"\n"}; - -const char* compile_extern_var = "extern constant float foo;\n"; -const char* compile_extern_struct = "extern constant struct bar bart;\n"; -const char* compile_extern_function = "extern int baz(int, int);\n"; - -const char* compile_static_var = "static constant float foo = 2.78;\n"; -const char* compile_static_struct = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n"; -const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n"; - -const char* compile_regular_var = "constant float foo = 4.0f;\n"; -const char* compile_regular_struct = "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, 0.f, 0};\n"; -const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n"; - -const char* link_static_var_access = // use with compile_static_var -"extern constant float foo;\n" -"float access_foo() { return foo; }\n"; - -const char* link_static_struct_access = // use with compile_static_struct -"extern constant struct bar{float x, y, z, r; int color; } foo;\n" -"struct bar access_foo() {return foo; }\n"; - -const char* link_static_function_access = // use with compile_static_function -"extern int foo(int, int);\n" -"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n"; - -int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigned int numLines) +const char *composite_kernel_template = " CopyBuffer%d(src, dst);\n"; + +const char *composite_kernel_extern_template = "extern __kernel void\n" + "CopyBuffer%d(\n" + " __global float* src,\n" + " __global float* dst );\n"; + +const char *another_simple_kernel = "extern __kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst );\n" + "__kernel void\n" + "AnotherCopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " CopyBuffer(src, dst);\n" + "}\n"; + +const char *simple_header = "extern __kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst );\n"; + +const char *simple_header_name = "simple_header.h"; + +const char *another_simple_kernel_with_header = "#include \"simple_header.h\"\n" + "__kernel void\n" + "AnotherCopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " CopyBuffer(src, dst);\n" + "}\n"; + +const char *header_name_templates[4] = { "simple_header%d.h", + "foo/simple_header%d.h", + "foo/bar/simple_header%d.h", + "foo/bar/baz/simple_header%d.h" }; + +const char *include_header_name_templates[4] = { + "#include \"simple_header%d.h\"\n", "#include \"foo/simple_header%d.h\"\n", + "#include \"foo/bar/simple_header%d.h\"\n", + "#include \"foo/bar/baz/simple_header%d.h\"\n" +}; + +const char *compile_extern_var = "extern constant float foo;\n"; +const char *compile_extern_struct = "extern constant struct bar bart;\n"; +const char *compile_extern_function = "extern int baz(int, int);\n"; + +const char *compile_static_var = "static constant float foo = 2.78;\n"; +const char *compile_static_struct = "static constant struct bar {float x, y, " + "z, r; int color; } foo = {3.14159};\n"; +const char *compile_static_function = + "static int foo(int x, int y) { return x*x + y*y; }\n"; + +const char *compile_regular_var = "constant float foo = 4.0f;\n"; +const char *compile_regular_struct = + "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, " + "0.f, 0};\n"; +const char *compile_regular_function = + "int foo(int x, int y) { return x*x + y*y; }\n"; + +const char *link_static_var_access = // use with compile_static_var + "extern constant float foo;\n" + "float access_foo() { return foo; }\n"; + +const char *link_static_struct_access = // use with compile_static_struct + "extern constant struct bar{float x, y, z, r; int color; } foo;\n" + "struct bar access_foo() {return foo; }\n"; + +const char *link_static_function_access = // use with compile_static_function + "extern int foo(int, int);\n" + "int access_foo() { int blah = foo(3, 4); return blah + 5; }\n"; + +int test_large_single_compile(cl_context context, cl_device_id deviceID, + unsigned int numLines) { int error; cl_program program; @@ -166,96 +165,113 @@ int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigne MTdata d; /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( numLines * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__); + lines = (const char **)malloc(numLines * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First and last lines are easy */ - lines[ 0 ] = sample_kernel_start; - lines[ numLines - 1 ] = sample_kernel_end; + lines[0] = sample_kernel_start; + lines[numLines - 1] = sample_kernel_end; - numChoices = sizeof( sample_kernel_lines ) / sizeof( sample_kernel_lines[ 0 ] ); + numChoices = sizeof(sample_kernel_lines) / sizeof(sample_kernel_lines[0]); /* Fill the rest with random lines to hopefully prevent much optimization */ - d = init_genrand( gRandomSeed ); - for( i = 1; i < numLines - 1; i++ ) + d = init_genrand(gRandomSeed); + for (i = 1; i < numLines - 1; i++) { - lines[ i ] = sample_kernel_lines[ genrand_int32(d) % numChoices ]; + lines[i] = sample_kernel_lines[genrand_int32(d) % numChoices]; } - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, numLines, lines); - if( program == NULL || error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); - free( lines ); + error = create_single_kernel_helper_create_program(context, &program, + numLines, lines); + if (program == NULL || error != CL_SUCCESS) + { + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)", + numLines, IGetErrorString(error), __FILE__, __LINE__); + free(lines); if (program != NULL) { - error = clReleaseProgram( program ); - test_error( error, "Unable to release a program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release a program object"); } return -1; } /* Build it */ - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build a long program" ); + error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); + test_error(error, "Unable to build a long program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release a program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release a program object"); - free( lines ); + free(lines); return 0; } -int test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_large_compile(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 64, 128, 256, 512, 1024, 2048, 4096, 0 }; //8192, 16384, 32768, 0 }; + unsigned int toTest[] = { + 64, 128, 256, 512, 1024, 2048, 4096, 0 + }; // 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing large compiles...this might take awhile...\n" ); + log_info("Testing large compiles...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_single_compile( context, deviceID, toTest[ i ] ) != 0 ) + if (test_large_single_compile(context, deviceID, toTest[i]) != 0) { - log_error( "ERROR: long program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__); + log_error( + "ERROR: long program test failed for %d lines! (in %s:%d)\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel); +static int verifyCopyBuffer(cl_context context, cl_command_queue queue, + cl_kernel kernel); #if defined(__APPLE__) || defined(__linux) #define _strdup strdup #endif -int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multi_file_library(cl_context context, cl_device_id deviceID, + cl_command_queue queue, unsigned int numLines) { int error; cl_program program; @@ -264,164 +280,194 @@ int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_ unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { free(simple_kernels); - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2* numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for( i = numLines + 1; i < 2* numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, + 2 * numLines + 2, lines); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); - free( simple_kernels ); - for( i = 0; i < numLines; i++) + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); + free(simple_kernels); + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)lines[i+numLines+1] ); + free((void *)lines[i]); + free((void *)lines[i + numLines + 1]); } - free( lines ); + free(lines); if (program != NULL) { - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); } return -1; } /* Compile it */ - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - simple_kernels[i] = clCreateProgramWithSource( context, 1, &kernel_source, NULL, &error ); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + simple_kernels[i] = + clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", numLines, + simple_kernels, NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); - /* Link the program that calls the kernels and the library that contains them */ + /* Link the program that calls the kernels and the library that contains + * them */ cl_program programs[2] = { program, my_newly_minted_library }; - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)lines[i+numLines+1] ); + free((void *)lines[i]); + free((void *)lines[i + numLines + 1]); } - free( lines ); + free(lines); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multi_file_libraries(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; + unsigned int toTest[] = { + 2, 4, 8, 16, 32, 64, 128, 256, 0 + }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing multi-file libraries ...this might take awhile...\n" ); + log_info("Testing multi-file libraries ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multi_file_library( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multi_file_library(context, deviceID, queue, toTest[i]) + != 0) { - log_error( "ERROR: multi-file library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multi-file library program test failed for %d " + "lines! (in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_large_multiple_embedded_headers(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_embedded_headers(cl_context context, + cl_device_id deviceID, + cl_command_queue queue, + unsigned int numLines) { int error; cl_program program; @@ -432,29 +478,41 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate simple_kernels array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate simple_kernels array with %d " + "lines! (in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } - headers = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (headers == NULL) { - log_error( "ERROR: Unable to allocate headers array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + headers = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (headers == NULL) + { + log_error("ERROR: Unable to allocate headers array with %d lines! (in " + "%s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - header_names = (const char**)malloc( numLines*sizeof( const char * ) ); - if (header_names == NULL) { - log_error( "ERROR: Unable to allocate header_names array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + header_names = (const char **)malloc(numLines * sizeof(const char *)); + if (header_names == NULL) + { + log_error("ERROR: Unable to allocate header_names array with %d lines! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } - lines = (const char **)malloc( (2*numLines + 2)*sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ ); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, include_header_name_templates[i % 4], i); lines[i] = _strdup(buffer); @@ -463,153 +521,177 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device sprintf(buffer, composite_kernel_extern_template, i); const char *line = buffer; - error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line); - if( headers[i] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &headers[i], + 1, &line); + if (headers[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__); + log_error("ERROR: Unable to create a simple header program! (%s in " + "%s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2* numLines + 1 ] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for( i = numLines + 1; i < 2* numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, + 2 * numLines + 2, lines); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers, header_names, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers, + header_names, NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", numLines, + simple_kernels, NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); - /* Link the program that calls the kernels and the library that contains them */ + /* Link the program that calls the kernels and the library that contains + * them */ cl_program programs[2] = { program, my_newly_minted_library }; - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)header_names[i] ); + free((void *)lines[i]); + free((void *)header_names[i]); } - for( i = numLines + 1; i < 2* numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); - free( header_names ); + free(lines); + free(header_names); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); - error = clReleaseProgram( headers[i] ); - test_error( error, "Unable to release header program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); + error = clReleaseProgram(headers[i]); + test_error(error, "Unable to release header program object"); } - free( simple_kernels ); - free( headers ); + free(simple_kernels); + free(headers); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; + unsigned int toTest[] = { + 2, 4, 8, 16, 32, 64, 128, 256, 0 + }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing multiple embedded headers ...this might take awhile...\n" ); + log_info( + "Testing multiple embedded headers ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_embedded_headers( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_embedded_headers(context, deviceID, queue, + toTest[i]) + != 0) { - log_error( "ERROR: multiple embedded headers program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple embedded headers program test failed " + "for %d lines! (in %s:%d)\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -double logbase(double a, double base) -{ - return log(a) / log(base); -} +double logbase(double a, double base) { return log(a) / log(base); } -int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, + cl_command_queue queue, unsigned int numLines) { int error; cl_program *simple_kernels; @@ -617,164 +699,202 @@ int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_ unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; /* I want to create (log2(N)+1)/2 libraries */ - unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2; + unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001) / 2; unsigned int numLibraries = (unsigned int)pow(2.0, level - 1.0); - unsigned int numFilesInLib = numLines/numLibraries; - cl_program *my_program_and_libraries = (cl_program*)malloc((1+numLibraries)*sizeof(cl_program)); - if (my_program_and_libraries == NULL) { - log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries), __FILE__, __LINE__); + unsigned int numFilesInLib = numLines / numLibraries; + cl_program *my_program_and_libraries = + (cl_program *)malloc((1 + numLibraries) * sizeof(cl_program)); + if (my_program_and_libraries == NULL) + { + log_error("ERROR: Unable to allocate program array with %d programs! " + "(in %s:%d)\n", + (1 + numLibraries), __FILE__, __LINE__); return -1; } - log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib); + log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, + numLibraries, numFilesInLib); - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2*numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &my_program_and_libraries[0], 2 * numLines + 2, lines); - if( my_program_and_libraries[0] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &my_program_and_libraries[0], 2 * numLines + 2, lines); + if (my_program_and_libraries[0] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0, + NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - for(i = 0; i < numLibraries; i++) { - my_program_and_libraries[i+1] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+i*numFilesInLib, NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + for (i = 0; i < numLibraries; i++) + { + my_program_and_libraries[i + 1] = clLinkProgram( + context, 1, &deviceID, "-create-library", numFilesInLib, + simple_kernels + i * numFilesInLib, NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); } - /* Link the program that calls the kernels and the library that contains them */ - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1, my_program_and_libraries, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + /* Link the program that calls the kernels and the library that contains + * them */ + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, numLibraries + 1, + my_program_and_libraries, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - for(i = 0; i <= numLibraries; i++) { - error = clReleaseProgram( my_program_and_libraries[i] ); - test_error( error, "Unable to release program object" ); + for (i = 0; i <= numLibraries; i++) + { + error = clReleaseProgram(my_program_and_libraries[i]); + test_error(error, "Unable to release program object"); } - free( my_program_and_libraries ); - for(i = 0; i < numLines; i++) + free(my_program_and_libraries); + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); + free(lines); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_libraries(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 2, 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 }; + unsigned int toTest[] = { + 2, 8, 32, 128, 256, 0 + }; // 512, 2048, 8192, 32768, 0 }; unsigned int i; - log_info( "Testing multiple libraries ...this might take awhile...\n" ); + log_info("Testing multiple libraries ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_libraries(context, deviceID, queue, toTest[i]) + != 0) { - log_error( "ERROR: multiple library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple library program test failed for %d " + "lines! (in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_files_multiple_libraries(cl_context context, + cl_device_id deviceID, + cl_command_queue queue, + unsigned int numLines) { int error; cl_program *simple_kernels; @@ -782,915 +902,1173 @@ int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_i unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; /* I want to create (log2(N)+1)/4 libraries */ - unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2; + unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001) / 2; unsigned int numLibraries = (unsigned int)pow(2.0, level - 2.0); - unsigned int numFilesInLib = numLines/(2*numLibraries); - cl_program *my_programs_and_libraries = (cl_program*)malloc((1+numLibraries+numLibraries*numFilesInLib)*sizeof(cl_program)); - if (my_programs_and_libraries == NULL) { - log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries+numLibraries*numFilesInLib), __FILE__, __LINE__ ); + unsigned int numFilesInLib = numLines / (2 * numLibraries); + cl_program *my_programs_and_libraries = (cl_program *)malloc( + (1 + numLibraries + numLibraries * numFilesInLib) * sizeof(cl_program)); + if (my_programs_and_libraries == NULL) + { + log_error("ERROR: Unable to allocate program array with %d programs! " + "(in %s:%d)\n", + (1 + numLibraries + numLibraries * numFilesInLib), __FILE__, + __LINE__); return -1; } - log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib); + log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, + numLibraries, numFilesInLib); - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ ); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2*numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &my_programs_and_libraries[0], 2 * numLines + 2, lines); - if( my_programs_and_libraries[0] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &my_programs_and_libraries[0], 2 * numLines + 2, lines); + if (my_programs_and_libraries[0] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL, + 0, NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Copy already compiled kernels */ - for( i = 0; i < numLibraries*numFilesInLib; i++) { - my_programs_and_libraries[i+1] = simple_kernels[i]; + for (i = 0; i < numLibraries * numFilesInLib; i++) + { + my_programs_and_libraries[i + 1] = simple_kernels[i]; } /* Create library out of compiled templated kernels */ - for( i = 0; i < numLibraries; i++) { - my_programs_and_libraries[i+1+numLibraries*numFilesInLib] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+(i*numFilesInLib+numLibraries*numFilesInLib), NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + for (i = 0; i < numLibraries; i++) + { + my_programs_and_libraries[i + 1 + numLibraries * numFilesInLib] = + clLinkProgram( + context, 1, &deviceID, "-create-library", numFilesInLib, + simple_kernels + + (i * numFilesInLib + numLibraries * numFilesInLib), + NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); } - /* Link the program that calls the kernels and the library that contains them */ - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1+numLibraries*numFilesInLib, my_programs_and_libraries, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + /* Link the program that calls the kernels and the library that contains + * them */ + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, + numLibraries + 1 + numLibraries * numFilesInLib, + my_programs_and_libraries, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - for(i = 0; i < numLibraries+1+numLibraries*numFilesInLib; i++) { - error = clReleaseProgram( my_programs_and_libraries[i] ); - test_error( error, "Unable to release program object" ); + for (i = 0; i < numLibraries + 1 + numLibraries * numFilesInLib; i++) + { + error = clReleaseProgram(my_programs_and_libraries[i]); + test_error(error, "Unable to release program object"); } - free( my_programs_and_libraries ); + free(my_programs_and_libraries); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); + free(lines); - for(i = numLibraries*numFilesInLib; i < numLines; i++) + for (i = numLibraries * numFilesInLib; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_files_multiple_libraries(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 }; + unsigned int toTest[] = { 8, 32, 128, 256, + 0 }; // 512, 2048, 8192, 32768, 0 }; unsigned int i; - log_info( "Testing multiple files and multiple libraries ...this might take awhile...\n" ); + log_info("Testing multiple files and multiple libraries ...this might take " + "awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_files_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_files_multiple_libraries(context, deviceID, + queue, toTest[i]) + != 0) { - log_error( "ERROR: multiple files, multiple libraries program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple files, multiple libraries program test " + "failed for %d lines! (in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_large_multiple_files(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_files(cl_context context, cl_device_id deviceID, + cl_command_queue queue, unsigned int numLines) { int error; const char **lines; unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; - cl_program *my_programs = (cl_program*)malloc((1+numLines)*sizeof(cl_program)); + cl_program *my_programs = + (cl_program *)malloc((1 + numLines) * sizeof(cl_program)); - if (my_programs == NULL) { - log_error( "ERROR: Unable to allocate my_programs array with %d programs! (in %s:%d)\n", (1+numLines), __FILE__, __LINE__); + if (my_programs == NULL) + { + log_error("ERROR: Unable to allocate my_programs array with %d " + "programs! (in %s:%d)\n", + (1 + numLines), __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2* numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &my_programs[0], 2 * numLines + 2, lines); - if( my_programs[0] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &my_programs[0], + 2 * numLines + 2, lines); + if (my_programs[0] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &my_programs[i + 1], 1, &kernel_source); - if( my_programs[i+1] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &my_programs[i + 1], 1, &kernel_source); + if (my_programs[i + 1] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_programs[i+1], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_programs[i + 1], 1, &deviceID, NULL, 0, + NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } - /* Link the program that calls the kernels and the library that contains them */ - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1+numLines, my_programs, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + /* Link the program that calls the kernels and the library that contains + * them */ + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 1 + numLines, my_programs, + NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - for(i = 0; i < 1+numLines; i++) { - error = clReleaseProgram( my_programs[i] ); - test_error( error, "Unable to release program object" ); + for (i = 0; i < 1 + numLines; i++) + { + error = clReleaseProgram(my_programs[i]); + test_error(error, "Unable to release program object"); } - free( my_programs ); - for(i = 0; i < numLines; i++) + free(my_programs); + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); + free(lines); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_files(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 }; + unsigned int toTest[] = { 8, 32, 128, 256, + 0 }; // 512, 2048, 8192, 32768, 0 }; unsigned int i; - log_info( "Testing multiple files compilation and linking into a single executable ...this might take awhile...\n" ); + log_info("Testing multiple files compilation and linking into a single " + "executable ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_files( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_files(context, deviceID, queue, toTest[i]) != 0) { - log_error( "ERROR: multiple files program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple files program test failed for %d lines! " + "(in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_compile_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple compilation only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_static_compile_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple static compilations only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_var); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_static_var); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple static variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple static variable test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling a static variable...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple static variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple static variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_struct); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_static_struct); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple static struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple static struct test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling a static struct...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple static variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple static variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_function); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &program, 1, &compile_static_function); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple static function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple static function test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling a static function...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple static function program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple static function program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple extern compilations only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_header); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_header); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern kernel test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern kernel test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern kernel...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern kernel program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern kernel program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_var); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_extern_var); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern variable test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern variable...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_struct); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_extern_struct); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern struct test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern struct...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_function); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &program, 1, &compile_extern_function); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern function test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern function...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern function program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern function program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -struct simple_user_data { - const char* m_message; - cl_event m_event; +struct simple_user_data +{ + const char *m_message; + cl_event m_event; }; -const char* once_upon_a_midnight_dreary = "Once upon a midnight dreary!"; +const char *once_upon_a_midnight_dreary = "Once upon a midnight dreary!"; -static void CL_CALLBACK simple_compile_callback(cl_program program, void* user_data) +static void CL_CALLBACK simple_compile_callback(cl_program program, + void *user_data) { - simple_user_data* simple_compile_user_data = (simple_user_data*)user_data; - log_info("in the simple_compile_callback: program %p just completed compiling with '%s'\n", program, simple_compile_user_data->m_message); - if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message) != 0) + simple_user_data *simple_compile_user_data = (simple_user_data *)user_data; + log_info("in the simple_compile_callback: program %p just completed " + "compiling with '%s'\n", + program, simple_compile_user_data->m_message); + if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message) + != 0) { - log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s (in %s:%d)!\n", once_upon_a_midnight_dreary, simple_compile_user_data->m_message, __FILE__, __LINE__); + log_error("ERROR: in the simple_compile_callback: Expected '%s' and " + "got %s (in %s:%d)!\n", + once_upon_a_midnight_dreary, + simple_compile_user_data->m_message, __FILE__, __LINE__); } int error; - log_info("in the simple_compile_callback: program %p just completed compiling with '%p'\n", program, simple_compile_user_data->m_event); + log_info("in the simple_compile_callback: program %p just completed " + "compiling with '%p'\n", + program, simple_compile_user_data->m_event); - error = clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE); + error = + clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: in the simple_compile_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: in the simple_compile_callback: Unable to set user " + "event status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the simple_compile_callback: Successfully signaled compile_program_completion_event!\n"); + log_info("in the simple_compile_callback: Successfully signaled " + "compile_program_completion_event!\n"); } -int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; cl_event compile_program_completion_event; log_info("Testing a simple compilation with callback...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } compile_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - simple_user_data simple_compile_user_data = {once_upon_a_midnight_dreary, compile_program_completion_event}; + simple_user_data simple_compile_user_data = { + once_upon_a_midnight_dreary, compile_program_completion_event + }; - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, simple_compile_callback, (void*)&simple_compile_user_data); - test_error( error, "Unable to compile a simple program with a callback" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, + simple_compile_callback, + (void *)&simple_compile_user_data); + test_error(error, "Unable to compile a simple program with a callback"); error = clWaitForEvents(1, &compile_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event"); + test_error(error, + "clWaitForEvents failed when waiting on " + "compile_program_completion_event"); /* All done! */ error = clReleaseEvent(compile_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_embedded_header_compile(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, header; log_info("Testing a simple embedded header compile only...\n"); - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); - if( header == NULL || error != CL_SUCCESS ) + header = + clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); + if (header == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL); - test_error( error, "Unable to compile a simple program with embedded header" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, + &simple_header_name, NULL, NULL); + test_error(error, + "Unable to compile a simple program with embedded header"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( header ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(header); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_link_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple linking only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_regular_variable_access(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, second_program, my_newly_linked_program; - const char* sources[2] = {simple_kernel, compile_regular_var}; // here we want to avoid linking error due to lack of kernels - log_info("Compiling and linking two program objects, where one tries to access regular variable from another...\n"); - error = create_single_kernel_helper_create_program(context, &program, 2, sources); - if( program == NULL || error != CL_SUCCESS ) + const char *sources[2] = { + simple_kernel, compile_regular_var + }; // here we want to avoid linking error due to lack of kernels + log_info("Compiling and linking two program objects, where one tries to " + "access regular variable from another...\n"); + error = create_single_kernel_helper_create_program(context, &program, 2, + sources); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program with regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program with regular " + "variable! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program with regular function" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, + "Unable to compile a simple program with regular function"); - error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_var_access); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &second_program, 1, &link_static_var_access); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program that tries to access a regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program that tries to access " + "a regular variable! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a program that tries to access a regular variable" ); + error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error( + error, + "Unable to compile a program that tries to access a regular variable"); cl_program two_programs[2] = { program, second_program }; - my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error); - test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular variable" ); + my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, + two_programs, NULL, NULL, &error); + test_error(error, + "clLinkProgram: Expected a different error code while linking a " + "program that tries to access a regular variable"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( second_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(second_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_regular_struct_access(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, second_program, my_newly_linked_program; - const char* sources[2] = {simple_kernel, compile_regular_struct}; // here we want to avoid linking error due to lack of kernels - log_info("Compiling and linking two program objects, where one tries to access regular struct from another...\n"); - error = create_single_kernel_helper_create_program(context, &program, 2, sources); - if( program == NULL || error != CL_SUCCESS ) + const char *sources[2] = { + simple_kernel, compile_regular_struct + }; // here we want to avoid linking error due to lack of kernels + log_info("Compiling and linking two program objects, where one tries to " + "access regular struct from another...\n"); + error = create_single_kernel_helper_create_program(context, &program, 2, + sources); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program with regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program with regular struct! " + "(%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program with regular struct" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program with regular struct"); - error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_struct_access); - if( second_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &second_program, 1, &link_static_struct_access); + if (second_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program that tries to access a regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program that tries to access " + "a regular struct! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a program that tries to access a regular struct" ); + error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error( + error, + "Unable to compile a program that tries to access a regular struct"); cl_program two_programs[2] = { program, second_program }; - my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error); - test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular struct" ); + my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, + two_programs, NULL, NULL, &error); + test_error(error, + "clLinkProgram: Expected a different error code while linking a " + "program that tries to access a regular struct"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( second_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(second_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_regular_function_access(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, second_program, my_newly_linked_program; - const char* sources[2] = {simple_kernel, compile_regular_function}; // here we want to avoid linking error due to lack of kernels - log_info("Compiling and linking two program objects, where one tries to access regular function from another...\n"); - error = create_single_kernel_helper_create_program(context, &program, 2, sources); - if( program == NULL || error != CL_SUCCESS ) + const char *sources[2] = { + simple_kernel, compile_regular_function + }; // here we want to avoid linking error due to lack of kernels + log_info("Compiling and linking two program objects, where one tries to " + "access regular function from another...\n"); + error = create_single_kernel_helper_create_program(context, &program, 2, + sources); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program with regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program with regular " + "function! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program with regular function" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, + "Unable to compile a simple program with regular function"); - error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_function_access); - if( second_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &second_program, 1, &link_static_function_access); + if (second_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program that tries to access a regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program that tries to access " + "a regular function! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a program that tries to access a regular function" ); + error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error( + error, + "Unable to compile a program that tries to access a regular function"); cl_program two_programs[2] = { program, second_program }; - my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error); - test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular function" ); + my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, + two_programs, NULL, NULL, &error); + test_error(error, + "clLinkProgram: Expected a different error code while linking a " + "program that tries to access a regular function"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( second_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(second_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, header, simple_program; log_info("Testing a simple embedded header link...\n"); - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); - if( header == NULL || error != CL_SUCCESS ) + header = + clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); + if (header == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL); - test_error( error, "Unable to compile a simple program with embedded header" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, + &simple_header_name, NULL, NULL); + test_error(error, + "Unable to compile a simple program with embedded header"); - error = create_single_kernel_helper_create_program(context, &simple_program, 1, &simple_kernel); - if( simple_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &simple_program, + 1, &simple_kernel); + if (simple_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, simple_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from two binaries, one compiled " + "with embedded header"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( header ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(header); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( simple_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -const char* when_i_pondered_weak_and_weary = "When I pondered weak and weary!"; +const char *when_i_pondered_weak_and_weary = "When I pondered weak and weary!"; -static void CL_CALLBACK simple_link_callback(cl_program program, void* user_data) +static void CL_CALLBACK simple_link_callback(cl_program program, + void *user_data) { - simple_user_data* simple_link_user_data = (simple_user_data*)user_data; - log_info("in the simple_link_callback: program %p just completed linking with '%s'\n", program, (const char*)simple_link_user_data->m_message); - if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message) != 0) + simple_user_data *simple_link_user_data = (simple_user_data *)user_data; + log_info("in the simple_link_callback: program %p just completed linking " + "with '%s'\n", + program, (const char *)simple_link_user_data->m_message); + if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message) + != 0) { - log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s! (in %s:%d)\n", when_i_pondered_weak_and_weary, simple_link_user_data->m_message, __FILE__, __LINE__); + log_error("ERROR: in the simple_compile_callback: Expected '%s' and " + "got %s! (in %s:%d)\n", + when_i_pondered_weak_and_weary, + simple_link_user_data->m_message, __FILE__, __LINE__); } int error; - log_info("in the simple_link_callback: program %p just completed linking with '%p'\n", program, simple_link_user_data->m_event); + log_info("in the simple_link_callback: program %p just completed linking " + "with '%p'\n", + program, simple_link_user_data->m_event); error = clSetUserEventStatus(simple_link_user_data->m_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: simple_link_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: simple_link_callback: Unable to set user event " + "status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the simple_link_callback: Successfully signaled link_program_completion_event event!\n"); + log_info("in the simple_link_callback: Successfully signaled " + "link_program_completion_event event!\n"); } -int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; cl_event link_program_completion_event; log_info("Testing a simple linking with callback...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); link_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event}; + simple_user_data simple_link_user_data = { when_i_pondered_weak_and_weary, + link_program_completion_event }; - cl_program my_linked_library = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, simple_link_callback, (void*)&simple_link_user_data, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_linked_library = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, simple_link_callback, + (void *)&simple_link_user_data, &error); + test_error(error, "Unable to link a simple program"); error = clWaitForEvents(1, &link_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); + test_error( + error, + "clWaitForEvents failed when waiting on link_program_completion_event"); /* All done! */ error = clReleaseEvent(link_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_linked_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_linked_library); + test_error(error, "Unable to release program object"); return 0; } -static void initBuffer(float* & srcBuffer, unsigned int cnDimension) +static void initBuffer(float *&srcBuffer, unsigned int cnDimension) { float num = 0.0f; - for( unsigned int i = 0; i < cnDimension; i++ ) + for (unsigned int i = 0; i < cnDimension; i++) { - if( ( i % 10 ) == 0 ) + if ((i % 10) == 0) { num = 0.0f; } - srcBuffer[ i ] = num; + srcBuffer[i] = num; num = num + 1.0f; } } -static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel) +static int verifyCopyBuffer(cl_context context, cl_command_queue queue, + cl_kernel kernel) { int error, result = CL_SUCCESS; const size_t cnDimension = 32; // Allocate source buffer - float * srcBuffer = (float*)malloc(cnDimension * sizeof(float)); - float * dstBuffer = (float*)malloc(cnDimension * sizeof(float)); + float *srcBuffer = (float *)malloc(cnDimension * sizeof(float)); + float *dstBuffer = (float *)malloc(cnDimension * sizeof(float)); - if (srcBuffer == NULL) { - log_error( "ERROR: Unable to allocate srcBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__); + if (srcBuffer == NULL) + { + log_error("ERROR: Unable to allocate srcBuffer float array with %lu " + "floats! (in %s:%d)\n", + cnDimension, __FILE__, __LINE__); return -1; } - if (dstBuffer == NULL) { - log_error( "ERROR: Unable to allocate dstBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__); + if (dstBuffer == NULL) + { + log_error("ERROR: Unable to allocate dstBuffer float array with %lu " + "floats! (in %s:%d)\n", + cnDimension, __FILE__, __LINE__); return -1; } - if( srcBuffer && dstBuffer ) + if (srcBuffer && dstBuffer) { // initialize host memory - initBuffer(srcBuffer, cnDimension ); + initBuffer(srcBuffer, cnDimension); // Allocate device memory - cl_mem deviceMemSrc = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - cnDimension * sizeof( cl_float ), srcBuffer, &error); - test_error( error, "Unable to create a source memory buffer" ); + cl_mem deviceMemSrc = + clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + cnDimension * sizeof(cl_float), srcBuffer, &error); + test_error(error, "Unable to create a source memory buffer"); - cl_mem deviceMemDst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - cnDimension * sizeof( cl_float ), 0, &error); - test_error( error, "Unable to create a destination memory buffer" ); + cl_mem deviceMemDst = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, + cnDimension * sizeof(cl_float), 0, &error); + test_error(error, "Unable to create a destination memory buffer"); // Set kernel args // Set parameter 0 to be the source buffer - error = clSetKernelArg(kernel, 0, sizeof( cl_mem ), ( void * )&deviceMemSrc ); - test_error( error, "Unable to set the first kernel argument" ); + error = + clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&deviceMemSrc); + test_error(error, "Unable to set the first kernel argument"); // Set parameter 1 to be the destination buffer - error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), ( void * )&deviceMemDst ); - test_error( error, "Unable to set the second kernel argument" ); + error = + clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&deviceMemDst); + test_error(error, "Unable to set the second kernel argument"); // Execute kernel - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, - &cnDimension, 0, 0, NULL, NULL ); - test_error( error, "Unable to enqueue kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &cnDimension, 0, + 0, NULL, NULL); + test_error(error, "Unable to enqueue kernel"); - error = clFlush( queue ); - test_error( error, "Unable to flush the queue" ); + error = clFlush(queue); + test_error(error, "Unable to flush the queue"); // copy results from device back to host - error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0, cnDimension * sizeof( cl_float ), - dstBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read the destination buffer" ); + error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0, + cnDimension * sizeof(cl_float), dstBuffer, + 0, NULL, NULL); + test_error(error, "Unable to read the destination buffer"); - error = clFlush( queue ); - test_error( error, "Unable to flush the queue" ); + error = clFlush(queue); + test_error(error, "Unable to flush the queue"); // Compare the source and destination buffers - const int* pSrc = (int*)srcBuffer; - const int* pDst = (int*)dstBuffer; + const int *pSrc = (int *)srcBuffer; + const int *pDst = (int *)dstBuffer; int mismatch = 0; - for( size_t i = 0; i < cnDimension; i++ ) + for (size_t i = 0; i < cnDimension; i++) { - if( pSrc[i] != pDst[i] ) + if (pSrc[i] != pDst[i]) { - if( mismatch < 4 ) + if (mismatch < 4) { - log_info("Offset %08lX: Expected %08X, Got %08X\n", i * 4, pSrc[i], pDst[i] ); + log_info("Offset %08lX: Expected %08X, Got %08X\n", i * 4, + pSrc[i], pDst[i]); } else { @@ -1700,9 +2078,9 @@ static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kerne } } - if( mismatch ) + if (mismatch) { - log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch ); + log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch); result = -1; } else @@ -1710,806 +2088,989 @@ static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kerne log_info("Buffers match, test passes.\n"); } - free( srcBuffer ); + free(srcBuffer); srcBuffer = NULL; - free( dstBuffer ); + free(dstBuffer); dstBuffer = NULL; - if( deviceMemSrc ) + if (deviceMemSrc) { - error = clReleaseMemObject( deviceMemSrc ); - test_error( error, "Unable to release memory object" ); + error = clReleaseMemObject(deviceMemSrc); + test_error(error, "Unable to release memory object"); } - if( deviceMemDst ) + if (deviceMemDst) { - error = clReleaseMemObject( deviceMemDst ); - test_error( error, "Unable to release memory object" ); + error = clReleaseMemObject(deviceMemDst); + test_error(error, "Unable to release memory object"); } } return result; } -int test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program; log_info("Testing execution after a simple compile and link...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link_no_device_info( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { int error; cl_program program; - log_info("Testing execution after a simple compile and link with no device information provided...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing execution after a simple compile and link with no device " + "information provided...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } error = clCompileProgram(program, 0, NULL, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link_with_defines( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { int error; cl_program program; - log_info("Testing execution after a simple compile and link with defines...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel_with_defines, "-DFIRST=5 -DSECOND=37"); - if( program == NULL || error != CL_SUCCESS ) + log_info( + "Testing execution after a simple compile and link with defines...\n"); + error = create_single_kernel_helper_create_program( + context, &program, 1, &simple_kernel_with_defines, + "-DFIRST=5 -DSECOND=37"); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0, + NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_serialize_reload_object(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program; - size_t binarySize; + size_t binarySize; unsigned char *binary; - log_info("Testing execution after serialization and reloading of the object...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing execution after serialization and reloading of the " + "object...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ ); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with %lu " + "characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + log_error("ERROR: Expected size of binaries array in clGetProgramInfo " + "is incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buffers), + &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - cl_program program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); + cl_program program_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary, + NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( program_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program_with_binary); + test_error(error, "Unable to release program object"); free(binary); return 0; } -int test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_serialize_reload_library(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, another_program; - size_t binarySize; + size_t binarySize; unsigned char *binary; - log_info("Testing execution after linking a binary with a simple library...\n"); + log_info( + "Testing execution after linking a binary with a simple library...\n"); // we will test creation of a simple library from one file - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); // Get the size of the resulting library (only one device) - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters (in %s:%d)!", binarySize, __FILE__, __LINE__); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with %lu " + "characters (in %s:%d)!", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) - { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, 0, + NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) + { + log_error("ERROR: Expected size of binaries array in clGetProgramInfo " + "is incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, + sizeof(buffers), &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); + cl_program library_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, library_with_binary }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + library_with_binary }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( library_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(library_with_binary); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); free(binary); return 0; } -static void CL_CALLBACK program_compile_completion_callback(cl_program program, void* user_data) +static void CL_CALLBACK program_compile_completion_callback(cl_program program, + void *user_data) { int error; cl_event compile_program_completion_event = (cl_event)user_data; - log_info("in the program_compile_completion_callback: program %p just completed compiling with '%p'\n", program, compile_program_completion_event); + log_info("in the program_compile_completion_callback: program %p just " + "completed compiling with '%p'\n", + program, compile_program_completion_event); error = clSetUserEventStatus(compile_program_completion_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: in the program_compile_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: in the program_compile_completion_callback: Unable " + "to set user event status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the program_compile_completion_callback: Successfully signaled compile_program_completion_event event!\n"); + log_info("in the program_compile_completion_callback: Successfully " + "signaled compile_program_completion_event event!\n"); } -static void CL_CALLBACK program_link_completion_callback(cl_program program, void* user_data) +static void CL_CALLBACK program_link_completion_callback(cl_program program, + void *user_data) { int error; cl_event link_program_completion_event = (cl_event)user_data; - log_info("in the program_link_completion_callback: program %p just completed linking with '%p'\n", program, link_program_completion_event); + log_info("in the program_link_completion_callback: program %p just " + "completed linking with '%p'\n", + program, link_program_completion_event); error = clSetUserEventStatus(link_program_completion_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: in the program_link_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: in the program_link_completion_callback: Unable to " + "set user event status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the program_link_completion_callback: Successfully signaled link_program_completion_event event!\n"); + log_info("in the program_link_completion_callback: Successfully signaled " + "link_program_completion_event event!\n"); } -int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link_with_callbacks( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { int error; cl_program program; cl_event compile_program_completion_event, link_program_completion_event; - log_info("Testing execution after a simple compile and link with callbacks...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing execution after a simple compile and link with " + "callbacks...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } compile_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, - program_compile_completion_callback, (void*)compile_program_completion_event); - test_error( error, "Unable to compile a simple program" ); + program_compile_completion_callback, + (void *)compile_program_completion_event); + test_error(error, "Unable to compile a simple program"); error = clWaitForEvents(1, &compile_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event"); + test_error(error, + "clWaitForEvents failed when waiting on " + "compile_program_completion_event"); error = clReleaseEvent(compile_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); link_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, - program_link_completion_callback, (void*)link_program_completion_event, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 1, &program, + program_link_completion_callback, + (void *)link_program_completion_event, &error); + test_error(error, "Unable to link a simple program"); error = clWaitForEvents(1, &link_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); + test_error( + error, + "clWaitForEvents failed when waiting on link_program_completion_event"); error = clReleaseEvent(link_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_library_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing creation of a simple library...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_library_with_callback(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; cl_event link_program_completion_event; log_info("Testing creation of a simple library with a callback...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); link_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event}; + simple_user_data simple_link_user_data = { when_i_pondered_weak_and_weary, + link_program_completion_event }; - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, - simple_link_callback, (void*)&simple_link_user_data, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = clLinkProgram( + context, 1, &deviceID, "-create-library", 1, &program, + simple_link_callback, (void *)&simple_link_user_data, &error); + test_error(error, "Unable to create a simple library"); error = clWaitForEvents(1, &link_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); + test_error( + error, + "clWaitForEvents failed when waiting on link_program_completion_event"); /* All done! */ error = clReleaseEvent(link_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_library_with_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, another_program; log_info("Testing creation and linking with a simple library...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, my_newly_minted_library }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + my_newly_minted_library }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_library_with_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, another_program; - log_info("Testing execution after linking a binary with a simple library...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info( + "Testing execution after linking a binary with a simple library...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, my_newly_minted_library }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + my_newly_minted_library }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, another_program; log_info("Testing two file compiling and linking...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, another_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, "Unable to create an executable from two binaries"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, another_program; - log_info("Testing two file compiling and linking and execution of two kernels afterwards ...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing two file compiling and linking and execution of two " + "kernels afterwards ...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, another_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, "Unable to create an executable from two binaries"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_embedded_header_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, header, simple_program; log_info("Testing execution after embedded header link...\n"); // we will test execution after compiling and linking with embedded headers - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); - if( header == NULL || error != CL_SUCCESS ) + header = + clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); + if (header == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL); - test_error( error, "Unable to compile a simple program with embedded header" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, + &simple_header_name, NULL, NULL); + test_error(error, + "Unable to compile a simple program with embedded header"); - simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); - if( simple_program == NULL || error != CL_SUCCESS ) + simple_program = + clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); + if (simple_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, simple_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from two binaries, one compiled " + "with embedded header"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( header ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(header); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( simple_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } #if defined(__APPLE__) || defined(__linux) -#define _mkdir(x) mkdir(x,S_IRWXU) +#define _mkdir(x) mkdir(x, S_IRWXU) #define _chdir chdir #define _rmdir rmdir #define _unlink unlink @@ -2517,461 +3078,602 @@ int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context co #include #endif -int test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_included_header_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, simple_program; log_info("Testing execution after included header link...\n"); // we will test execution after compiling and linking with included headers - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } /* setup */ -#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ )) +#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) /* Some tests systems doesn't allow one to write in the test directory */ - if (_chdir("/tmp") != 0) { - log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("/tmp") != 0) + { + log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } #endif - if (_mkdir("foo") != 0) { - log_error( "ERROR: Unable to create directory foo! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_mkdir("foo") != 0) + { + log_error("ERROR: Unable to create directory foo! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_mkdir("foo/bar") != 0) { - log_error( "ERROR: Unable to create directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_mkdir("foo/bar") != 0) + { + log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_chdir("foo/bar") != 0) { - log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("foo/bar") != 0) + { + log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - FILE* simple_header_file = fopen(simple_header_name, "w"); - if (simple_header_file == NULL) { - log_error( "ERROR: Unable to create simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ ); + FILE *simple_header_file = fopen(simple_header_name, "w"); + if (simple_header_file == NULL) + { + log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (fprintf(simple_header_file, "%s", simple_header) < 0) { - log_error( "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__); + if (fprintf(simple_header_file, "%s", simple_header) < 0) + { + log_error( + "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (fclose(simple_header_file) != 0) { - log_error( "ERROR: Unable to close simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__); + if (fclose(simple_header_file) != 0) + { + log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (_chdir("../..") != 0) { - log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__); + if (_chdir("../..") != 0) + { + log_error("ERROR: Unable to change to original working directory! (in " + "%s:%d)\n", + __FILE__, __LINE__); return -1; } -#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ )) - error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL, NULL, NULL, NULL); +#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) + error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL, + NULL, NULL, NULL); #else - error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL, NULL, NULL); + error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL, + NULL, NULL); #endif - test_error( error, "Unable to compile a simple program with included header" ); + test_error(error, + "Unable to compile a simple program with included header"); /* cleanup */ - if (_chdir("foo/bar") != 0) { - log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("foo/bar") != 0) + { + log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_unlink(simple_header_name) != 0) { - log_error( "ERROR: Unable to remove simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ ); + if (_unlink(simple_header_name) != 0) + { + log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (_chdir("../..") != 0) { - log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("../..") != 0) + { + log_error("ERROR: Unable to change to original working directory! (in " + "%s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_rmdir("foo/bar") != 0) { - log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_rmdir("foo/bar") != 0) + { + log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_rmdir("foo") != 0) { - log_error( "ERROR: Unable to remove directory foo! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_rmdir("foo") != 0) + { + log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); - if( simple_program == NULL || error != CL_SUCCESS ) + simple_program = + clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); + if (simple_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, simple_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from two binaries, one compiled " + "with embedded header"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( simple_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_program_binary_type(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; - cl_program program, another_program, program_with_binary, fully_linked_program_with_binary; + cl_program program, another_program, program_with_binary, + fully_linked_program_with_binary; cl_program_binary_type program_type = -1; size_t size; - size_t binarySize; + size_t binarySize; unsigned char *binary; log_info("Testing querying of program binary type...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, + NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { - log_error( "ERROR: Expected program type of a just compiled program to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a just compiled program to " + "be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary { - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ ); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with " + "%lu characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + log_error( + "ERROR: Expected size of binaries array in clGetProgramInfo is " + "incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buffers), + &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); - - error = clGetProgramBuildInfo (program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + program_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); + + error = clGetProgramBuildInfo( + program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { - log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a program created from " + "compiled object to be " + "CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; free(binary); } - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program_with_binary, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); - error = clGetProgramBuildInfo (my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, + &program_with_binary, NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); + error = clGetProgramBuildInfo( + my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY) { - log_error( "ERROR: Expected program type of a just linked library to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a just linked library to be " + "CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; // Get the size of the resulting library (only one device) - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with %lu " + "characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) - { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, 0, + NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) + { + log_error("ERROR: Expected size of binaries array in clGetProgramInfo " + "is incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, + sizeof(buffers), &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); - error = clGetProgramBuildInfo (library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + cl_program library_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); + error = clGetProgramBuildInfo( + library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY) { - log_error( "ERROR: Expected program type of a library loaded with binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a library loaded with " + "binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; - free(binary); + free(binary); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, library_with_binary }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + library_with_binary }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); - error = clGetProgramBuildInfo (fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + error = clGetProgramBuildInfo( + fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { - log_error( "ERROR: Expected program type of a newly build executable to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a newly build executable to " + "be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary { - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ ); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with " + "%lu characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) + error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARIES, 0, + NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + log_error( + "ERROR: Expected size of binaries array in clGetProgramInfo is " + "incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARIES, + sizeof(buffers), &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - fully_linked_program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); - - error = clGetProgramBuildInfo (fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + fully_linked_program_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); + + error = clGetProgramBuildInfo( + fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { - log_error( "ERROR: Expected program type of a program created from a fully linked executable binary to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a program created from " + "a fully linked executable binary to be " + "CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; free(binary); } - error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL, NULL, NULL); - test_error( error, "Unable to build a simple program" ); + error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL, + NULL, NULL); + test_error(error, "Unable to build a simple program"); - cl_kernel kernel = clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary, + "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question is "Who is copying whom?" */ - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question + * is "Who is copying whom?" */ + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); - test_error( error, "Unable to build a simple program" ); - error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + test_error(error, "Unable to build a simple program"); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, + NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { - log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ ); + log_error( + "ERROR: Expected program type of a program created from compiled " + "object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; /* All's well that ends well. William Shakespeare */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( library_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(library_with_binary); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program_with_binary); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( program_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program_with_binary); + test_error(error, "Unable to release program object"); return 0; } -volatile int compileNotificationSent; +volatile int compileNotificationSent; -void CL_CALLBACK test_notify_compile_complete( cl_program program, void *userData ) +void CL_CALLBACK test_notify_compile_complete(cl_program program, + void *userData) { - if( userData == NULL || strcmp( (char *)userData, "compilation" ) != 0 ) + if (userData == NULL || strcmp((char *)userData, "compilation") != 0) { - log_error( "ERROR: User data passed in to compile notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: User data passed in to compile notify function was " + "not correct! (in %s:%d)\n", + __FILE__, __LINE__); compileNotificationSent = -1; } else compileNotificationSent = 1; - log_info( "\n <-- program successfully compiled\n" ); + log_info("\n <-- program successfully compiled\n"); } -volatile int libraryCreationNotificationSent; +volatile int libraryCreationNotificationSent; -void CL_CALLBACK test_notify_create_library_complete( cl_program program, void *userData ) +void CL_CALLBACK test_notify_create_library_complete(cl_program program, + void *userData) { - if( userData == NULL || strcmp( (char *)userData, "create library" ) != 0 ) + if (userData == NULL || strcmp((char *)userData, "create library") != 0) { - log_error( "ERROR: User data passed in to library creation notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: User data passed in to library creation notify " + "function was not correct! (in %s:%d)\n", + __FILE__, __LINE__); libraryCreationNotificationSent = -1; } else libraryCreationNotificationSent = 1; - log_info( "\n <-- library successfully created\n" ); + log_info("\n <-- library successfully created\n"); } -volatile int linkNotificationSent; +volatile int linkNotificationSent; -void CL_CALLBACK test_notify_link_complete( cl_program program, void *userData ) +void CL_CALLBACK test_notify_link_complete(cl_program program, void *userData) { - if( userData == NULL || strcmp( (char *)userData, "linking" ) != 0 ) + if (userData == NULL || strcmp((char *)userData, "linking") != 0) { - log_error( "ERROR: User data passed in to link notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: User data passed in to link notify function was not " + "correct! (in %s:%d)\n", + __FILE__, __LINE__); linkNotificationSent = -1; } else linkNotificationSent = 1; - log_info( "\n <-- program successfully linked\n" ); + log_info("\n <-- program successfully linked\n"); } -int test_large_compile_and_link_status_options_log(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_compile_and_link_status_options_log(cl_context context, + cl_device_id deviceID, + cl_command_queue queue, + unsigned int numLines) { int error; cl_program program; - cl_program * simple_kernels; + cl_program *simple_kernels; const char **lines; unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; @@ -2984,263 +3686,349 @@ int test_large_compile_and_link_status_options_log(cl_context context, cl_device cl_build_status status; size_t size_ret; - compileNotificationSent = libraryCreationNotificationSent = linkNotificationSent = 0; + compileNotificationSent = libraryCreationNotificationSent = + linkNotificationSent = 0; - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2*numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, + 2 * numLines + 2, lines); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Lets check that the compilation status is CL_BUILD_NONE */ - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program compile status" ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Unable to get program compile status"); if (status != CL_BUILD_NONE) { - log_error( "ERROR: Expected compile status to be CL_BUILD_NONE prior to the beginning of the compilation! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: Expected compile status to be CL_BUILD_NONE prior to " + "the beginning of the compilation! (status: %d in %s:%d)\n", + (int)status, __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, test_notify_compile_complete, (void *)"compilation"); - test_error( error, "Unable to compile a simple program" ); + error = + clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, + test_notify_compile_complete, (void *)"compilation"); + test_error(error, "Unable to compile a simple program"); - /* Wait for compile to complete (just keep polling, since we're just a test */ - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program compile status" ); + /* Wait for compile to complete (just keep polling, since we're just a test + */ + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Unable to get program compile status"); - while( (int)status == CL_BUILD_IN_PROGRESS ) + while ((int)status == CL_BUILD_IN_PROGRESS) { - log_info( "\n -- still waiting for compile... (status is %d)", status ); - sleep( 1 ); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program compile status" ); + log_info("\n -- still waiting for compile... (status is %d)", status); + sleep(1); + error = + clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Unable to get program compile status"); } - if( status != CL_BUILD_SUCCESS ) + if (status != CL_BUILD_SUCCESS) { - log_error( "ERROR: compile failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: compile failed! (status: %d in %s:%d)\n", (int)status, + __FILE__, __LINE__); return -1; } - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret ); - test_error( error, "Device failed to return compile log size" ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, + NULL, &size_ret); + test_error(error, "Device failed to return compile log size"); compile_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, compile_log, NULL ); - if (error != CL_SUCCESS){ - log_error("Device failed to return a compile log (in %s:%d)\n", __FILE__, __LINE__); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, + size_ret, compile_log, NULL); + if (error != CL_SUCCESS) + { + log_error("Device failed to return a compile log (in %s:%d)\n", + __FILE__, __LINE__); test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); } log_info("BUILD LOG: %s\n", compile_log); free(compile_log); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_OPTIONS, + 0, NULL, &size_ret); test_error(error, "Device failed to return compile options size"); compile_options = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, compile_options, NULL ); - test_error(error, "Device failed to return compile options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_OPTIONS, + size_ret, compile_options, NULL); + test_error( + error, + "Device failed to return compile options.\nclGetProgramBuildInfo " + "CL_PROGRAM_BUILD_OPTIONS failed"); log_info("BUILD OPTIONS: %s\n", compile_options); free(compile_options); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, test_notify_create_library_complete, (void *)"create library", &error); - test_error( error, "Unable to create a multi-line library" ); + cl_program my_newly_minted_library = clLinkProgram( + context, 1, &deviceID, "-create-library", numLines, simple_kernels, + test_notify_create_library_complete, (void *)"create library", &error); + test_error(error, "Unable to create a multi-line library"); - /* Wait for library creation to complete (just keep polling, since we're just a test */ - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get library creation link status" ); + /* Wait for library creation to complete (just keep polling, since we're + * just a test */ + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get library creation link status"); - while( (int)status == CL_BUILD_IN_PROGRESS ) + while ((int)status == CL_BUILD_IN_PROGRESS) { - log_info( "\n -- still waiting for library creation... (status is %d)", status ); - sleep( 1 ); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get library creation link status" ); + log_info("\n -- still waiting for library creation... (status is %d)", + status); + sleep(1); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get library creation link status"); } - if( status != CL_BUILD_SUCCESS ) + if (status != CL_BUILD_SUCCESS) { - log_error( "ERROR: library creation failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: library creation failed! (status: %d in %s:%d)\n", + (int)status, __FILE__, __LINE__); return -1; } - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret ); - test_error( error, "Device failed to return a library creation log size" ); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret); + test_error(error, "Device failed to return a library creation log size"); library_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, library_log, NULL ); - if (error != CL_SUCCESS) { - log_error("Device failed to return a library creation log (in %s:%d)\n", __FILE__, __LINE__); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_LOG, size_ret, library_log, + NULL); + if (error != CL_SUCCESS) + { + log_error("Device failed to return a library creation log (in %s:%d)\n", + __FILE__, __LINE__); test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); } log_info("CREATE LIBRARY LOG: %s\n", library_log); free(library_log); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret ); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret); test_error(error, "Device failed to return library creation options size"); library_options = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, library_options, NULL ); - test_error(error, "Device failed to return library creation options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_OPTIONS, size_ret, + library_options, NULL); + test_error( + error, + "Device failed to return library creation " + "options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); log_info("CREATE LIBRARY OPTIONS: %s\n", library_options); free(library_options); - /* Link the program that calls the kernels and the library that contains them */ + /* Link the program that calls the kernels and the library that contains + * them */ cl_program programs[2] = { program, my_newly_minted_library }; - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, test_notify_link_complete, (void *)"linking", &error); - test_error( error, "Unable to link a program with a library" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 2, programs, + test_notify_link_complete, (void *)"linking", &error); + test_error(error, "Unable to link a program with a library"); - /* Wait for linking to complete (just keep polling, since we're just a test */ - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program link status" ); + /* Wait for linking to complete (just keep polling, since we're just a test + */ + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get program link status"); - while( (int)status == CL_BUILD_IN_PROGRESS ) + while ((int)status == CL_BUILD_IN_PROGRESS) { - log_info( "\n -- still waiting for program linking... (status is %d)", status ); - sleep( 1 ); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program link status" ); + log_info("\n -- still waiting for program linking... (status is %d)", + status); + sleep(1); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get program link status"); } - if( status != CL_BUILD_SUCCESS ) + if (status != CL_BUILD_SUCCESS) { - log_error( "ERROR: program linking failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: program linking failed! (status: %d in %s:%d)\n", + (int)status, __FILE__, __LINE__); return -1; } - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret ); - test_error( error, "Device failed to return a linking log size" ); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret); + test_error(error, "Device failed to return a linking log size"); linking_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, linking_log, NULL ); - if (error != CL_SUCCESS){ - log_error("Device failed to return a linking log (in %s:%d).\n", __FILE__, __LINE__); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_LOG, size_ret, linking_log, + NULL); + if (error != CL_SUCCESS) + { + log_error("Device failed to return a linking log (in %s:%d).\n", + __FILE__, __LINE__); test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); } log_info("BUILDING LOG: %s\n", linking_log); free(linking_log); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret ); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret); test_error(error, "Device failed to return linking options size"); linking_options = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, linking_options, NULL ); - test_error(error, "Device failed to return linking options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_OPTIONS, size_ret, + linking_options, NULL); + test_error( + error, + "Device failed to return linking options.\nclGetProgramBuildInfo " + "CL_PROGRAM_BUILD_OPTIONS failed"); log_info("BUILDING OPTIONS: %s\n", linking_options); free(linking_options); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)lines[i+numLines+1] ); + free((void *)lines[i]); + free((void *)lines[i + numLines + 1]); } - free( lines ); + free(lines); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_compile_and_link_status_options_log(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - unsigned int toTest[] = { 256, 0 }; //512, 1024, 8192, 16384, 32768, 0 }; + unsigned int toTest[] = { 256, 0 }; // 512, 1024, 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing Compile and Link Status, Options and Logging ...this might take awhile...\n" ); + log_info("Testing Compile and Link Status, Options and Logging ...this " + "might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_compile_and_link_status_options_log( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_compile_and_link_status_options_log(context, deviceID, + queue, toTest[i]) + != 0) { - log_error( "ERROR: large program compilation, linking, status, options and logging test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ ); + log_error( + "ERROR: large program compilation, linking, status, options " + "and logging test failed for %d lines! (in %s:%d)\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } -- cgit v1.2.3 From a7c33f8dc47ec3f1869a94284d3fbb15a3eacc13 Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 10 Oct 2023 21:51:23 +0530 Subject: Add ffp-contract=off Compilation flag for CTS build (#1824) * Fix enqueue_flags test to use correct barrier type. Currently, enqueue_flags test uses CLK_LOCAL_MEM_FENCE. Use CLK_GLOBAL_MEM_FENCE instead as all threads across work-groups need to wait here. * Add check for support for Read-Wrie images Read-Write images have required OpenCL 2.x. Read-Write image tests are already being skipped for 1.x devices. With OpenCL 3.0, read-write images being optional, the tests should be run or skipped depending on the implementation support. Add a check to decide if Read-Write images are supported or required to be supported depending on OpenCL version and decide if the tests should be run on skipped. Fixes issue #894 * Fix formatting in case of Read-Write image checks. Fix formatting in case of Read-write image checks. Also, combine two ifs into one in case of kerne_read_write tests * Fix some more formatting for RW-image checks Remove unnecessary spaces at various places. Also, fix lengthy lines. * Fix malloc-size calculation in test imagedim unsigned char size is silently assumed to be 1 in imagedim test of test_basic. Pass sizeof(type) in malloc size calculation. Also, change loop variable from signed to unsigned. Add checks for null pointer for malloced memory. * Initial CTS for external sharing extensions Initial set of tests for below extensions with Vulkan as producer 1. cl_khr_external_memory 2. cl_khr_external_memory_win32 3. cl_khr_external_memory_opaque_fd 4. cl_khr_external_semaphore 5. cl_khr_external_semaphore_win32 6. cl_khr_external_semaphore_opaque_fd * Updates to external sharing CTS Updates to external sharing CTS 1. Fix some build issues to remove unnecessary, non-existent files 2. Add new tests for platform and device queries. 3. Some added checks for VK Support. * Update CTS build script for Vulkan Headers Update CTS build to clone Vulkan Headers repo and pass it to CTS build in preparation for external memory and semaphore tests * Fix Vulkan header path Fix Vulkan header include path. * Add Vulkan loader dependency Vulkan loader is required to build test_vulkan of OpenCL-CTS. Clone and build Vulkan loader as prerequisite to OpenCL-CTS. * Fix Vulkan loader path in test_vulkan Remove arch/os suffix in Vulkan loader path to match vulkan loader repo build. * Fix warnings around getHandle API. Return type of getHandle is defined differently based on win or linux builds. Use appropriate guards when using API at other places. While at it remove duplicate definition of ARRAY_SIZE. * Use ARRAY_SIZE in harness. Use already defined ARRAY_SIZE macro from test_harness. * Fix build issues for test_vulkan Fix build issues for test_vulkan 1. Add cl_ext.h in common files 2. Replace cl_mem_properties_khr with cl_mem_properties 3. Replace cl_external_mem_handle_type_khr with cl_external_memory_handle_type_khr 4. Type-cast malloc as required. * Fix code formatting. Fix code formatting to get CTS CI builds clean. * Fix formatting fixes part-2 Another set of formatting fixes. * Fix code formatting part-3 Some more code formatting fixes. * Fix code formatting issues part-4 More code formatting fixes. * Formatting fixes part-5 Some more formatting fixes * Fix formatting part-6 More formatting fixes continued. * Code formatting fixes part-7 Code formatting fixes for image * Code formatting fixes part-8 Fixes for platform and device query tests. * Code formatting fixes part-9 More formatting fixes for vulkan_wrapper * Code formatting fixes part-10 More fixes to wrapper header * Code formatting fixes part-11 Formatting fixes for api_list * Code formatting fixes part-12 Formatting fixes for api_list_map. * Code formatting changes part-13 Code formatting changes for utility. * Code formatting fixes part-15 Formatting fixes for wrapper. * Misc Code formatting fixes Some more misc code formatting fixes. * Fix build breaks due to code formatting Fix build issues arised with recent code formatting issues. * Fix presubmit script after merge Fix presubmit script after merge conflicts. * Fix Vulkan loader build in presubmit script. Use cmake ninja and appropriate toolchain for Vulkan loader dependency to fix linking issue on arm/aarch64. * Use static array sizes Use static array sizes to fix windows builds. * Some left-out formatting fixes. Fix remaining formatting issues. * Fix harness header path Fix harness header path While at it, remove Misc and test pragma. * Add/Fix license information Add Khronos License info for test_vulkan. Replace Apple license with Khronos as applicable. * Fix headers for Mac OSX builds. Use appropriate headers for Mac OSX builds * Fix Mac OSX builds. Use appropriate headers for Mac OSX builds. Also, fix some build issues due to type-casting. * Fix new code formatting issues Fix new code formatting issues with recent MacOS fixes. * Add back missing case statement Add back missing case statement that was accidentally removed. * Disable USE_GAS for Vulkan Loader build. Disable USE_GAS for Vulkan Loader build to fix aarch64 build. * Fixes to OpenCL external sharing tests Fix clReleaseSemaphore() API. Fix copyright year. Some other minor fixes. * Improvements to OpenCL external sharing CTS Use SPIR-V shaders instead of NV extension path from GLSL to Vulkan shaders. Fixes for lower end GPUs to use limited memory. Update copy-right year at some more places. * Fix new code formatting issues. Fix code formatting issues with recent changes for external sharing tests. * More formatting fixes. More formatting fixes for recent updates to external sharing tests. * Final code formatting fixes. Minor formatting fixes to get format checks clean. * Update extension list of test_compiler Upate extension list of test_compiler with missing external memory and semaphore extensions * Add ffp-contract=off Compilation flag for CTS build. GCC defaults to using ffp-contract=fast even when fast math is disabled in the case of GNU C This creates precision issues when comparing the results with that of x86_64. GNU options reference: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html# This fix is to disable floating-point expression contractions with flag ffp-contract=off for math_brute_force tests Fixes #1794 * Make fp-contract flag arch-independent, but compiler dependent Use existing CMake constructs to add fp-contract flag so that it automatically checks for compiler support. Also, make this change arch-independent. Fixes #1794 * Fix typo in earlier commit Fix typo in earlier commit Fixes #1794 * Remove duplicate addition of ffp-contract flag ffp-contract flag is currently added via two macros Retain add_cxx_flag_if_supported macro and remove set_gnulike_module_compile_flags. Fixes #1794 * Fix typo in earlier commit Add closing " that was unintentionally removed in previous commit. --- test_conformance/math_brute_force/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 32814026..a221f05a 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -45,4 +45,6 @@ set(${MODULE_NAME}_SOURCES # warnings), but other tests not (yet); so enable -Wall locally. set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas") +add_cxx_flag_if_supported(-ffp-contract=off) + include(../CMakeCommon.txt) -- cgit v1.2.3 From af2710355db1ce873f8ea79b16e576abcbab6666 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 10 Oct 2023 19:22:50 +0300 Subject: add tests for clCommandSVMMemcpyKHR & clCommandSVMMemfillKHR (#1821) * add tests for clCommandSVMMemcpyKHR & clCommandSVMMemfillKHR * Fix typo SVMMemfill -> SVMMemFill * fix clCommandSVMMemFillKHR calls to match extension * add Khronos license + minor fixes * review fixes --- test_common/harness/typeWrappers.h | 42 ++++++++++ .../cl_khr_command_buffer/CMakeLists.txt | 1 + .../cl_khr_command_buffer/basic_command_buffer.h | 13 +++ .../mutable_command_basic.h | 11 --- .../command_buffer_test_copy.cpp | 77 ++++++++++++++++++ .../command_buffer_test_fill.cpp | 67 +++++++++++++++ .../extensions/cl_khr_command_buffer/main.cpp | 2 + .../extensions/cl_khr_command_buffer/procs.h | 4 + .../cl_khr_command_buffer/svm_command_basic.cpp | 94 ++++++++++++++++++++++ .../cl_khr_command_buffer/svm_command_basic.h | 42 ++++++++++ 10 files changed, 342 insertions(+), 11 deletions(-) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp create mode 100644 test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h index 50c7c938..ad11b480 100644 --- a/test_common/harness/typeWrappers.h +++ b/test_common/harness/typeWrappers.h @@ -145,6 +145,48 @@ using clSamplerWrapper = using clEventWrapper = wrapper_details::Wrapper; +class clSVMWrapper { + void *Ptr = nullptr; + cl_context Ctx = nullptr; + +public: + clSVMWrapper() = default; + + clSVMWrapper(cl_context C, size_t Size, + cl_svm_mem_flags F = CL_MEM_READ_WRITE) + : Ctx(C) + { + Ptr = clSVMAlloc(C, F, Size, 0); + } + + clSVMWrapper &operator=(void *other) = delete; + clSVMWrapper(clSVMWrapper const &other) = delete; + clSVMWrapper &operator=(clSVMWrapper const &other) = delete; + clSVMWrapper(clSVMWrapper &&other) + { + Ptr = other.Ptr; + Ctx = other.Ctx; + other.Ptr = nullptr; + other.Ctx = nullptr; + } + clSVMWrapper &operator=(clSVMWrapper &&other) + { + Ptr = other.Ptr; + Ctx = other.Ctx; + other.Ptr = nullptr; + other.Ctx = nullptr; + return *this; + } + + ~clSVMWrapper() + { + if (Ptr) clSVMFree(Ctx, Ptr); + } + + void *operator()() const { return Ptr; } +}; + + class clProtectedImage { public: clProtectedImage() diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index be5fd1c9..8a4a116a 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_COMMAND_BUFFER) set(${MODULE_NAME}_SOURCES main.cpp basic_command_buffer.cpp + svm_command_basic.cpp command_buffer_printf.cpp command_buffer_get_command_buffer_info.cpp command_buffer_set_kernel_arg.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index 44f4cc63..d08a11af 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -34,6 +34,18 @@ } \ } +// If it is supported get the addresses of all the APIs here. +#define GET_EXTENSION_ADDRESS(FUNC) \ + FUNC = reinterpret_cast( \ + clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } + + // Helper test fixture for constructing OpenCL objects used in testing // a variety of simple command-buffer enqueue scenarios. struct BasicCommandBufferTest : CommandBufferTestBase @@ -70,6 +82,7 @@ protected: clCommandBufferWrapper command_buffer; }; + template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index c88c14d1..19147556 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -19,17 +19,6 @@ #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" -// If it is supported get the addresses of all the APIs here. -#define GET_EXTENSION_ADDRESS(FUNC) \ - FUNC = reinterpret_cast( \ - clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ - if (FUNC == nullptr) \ - { \ - log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ - " with " #FUNC "\n"); \ - return TEST_FAIL; \ - } - struct BasicMutableCommandBufferTest : BasicCommandBufferTest { BasicMutableCommandBufferTest(cl_device_id device, cl_context context, diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp index 7a1f0e6d..0a30e76b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "basic_command_buffer.h" +#include "svm_command_basic.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -197,6 +198,74 @@ struct CopyBufferKHR : public BasicCommandBufferTest const cl_char pattern_2 = 0x28; }; +struct CopySVMBufferKHR : public BasicSVMCommandBufferTest +{ + using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandSVMMemFillKHR( + command_buffer, nullptr, svm_in_mem(), &pattern_1, sizeof(cl_char), + data_size(), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandSVMMemFillKHR failed"); + + error = clCommandSVMMemcpyKHR(command_buffer, nullptr, svm_out_mem(), + svm_in_mem(), data_size(), 0, nullptr, + nullptr, nullptr); + test_error(error, "clCommandSVMMemcpyKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_1(data_size()); + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_1.data(), + svm_out_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + error = clEnqueueSVMMemFill(queue, svm_in_mem(), &pattern_2, + sizeof(cl_char), data_size(), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueSVMMemFill(queue, svm_out_mem(), &pattern_2, + sizeof(cl_char), data_size(), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); + + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_2.data(), + svm_out_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); + } + + return CL_SUCCESS; + } + + const cl_char pattern_1 = 0x14; + const cl_char pattern_2 = 0x28; +}; + struct CopyBufferToImageKHR : public BasicCommandBufferTest { using BasicCommandBufferTest::BasicCommandBufferTest; @@ -510,6 +579,14 @@ int test_copy_buffer(cl_device_id device, cl_context context, return MakeAndRunTest(device, context, queue, num_elements); } +int test_copy_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + + int test_copy_buffer_to_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp index 0ba8055a..67809cfb 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "basic_command_buffer.h" +#include "svm_command_basic.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -171,6 +172,64 @@ struct FillBufferKHR : public BasicCommandBufferTest const char pattern_2 = 0x30; }; +struct FillSVMBufferKHR : public BasicSVMCommandBufferTest +{ + using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandSVMMemFillKHR( + command_buffer, nullptr, svm_in_mem(), &pattern_1, sizeof(cl_char), + data_size(), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandSVMMemFillKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_1(data_size()); + + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_1.data(), + svm_in_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + error = clEnqueueSVMMemFill(queue, svm_in_mem(), &pattern_2, + sizeof(cl_char), data_size(), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); + + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_2.data(), + svm_in_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); + } + + return CL_SUCCESS; + } + + const char pattern_1 = 0x15; + const char pattern_2 = 0x30; +}; }; int test_fill_buffer(cl_device_id device, cl_context context, @@ -179,6 +238,14 @@ int test_fill_buffer(cl_device_id device, cl_context context, return MakeAndRunTest(device, context, queue, num_elements); } +int test_fill_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + + int test_fill_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 3e923f6c..4ecb0806 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -45,8 +45,10 @@ test_definition test_list[] = { ADD_TEST(simultaneous_queue_substitution), ADD_TEST(fill_image), ADD_TEST(fill_buffer), + ADD_TEST(fill_svm_buffer), ADD_TEST(copy_image), ADD_TEST(copy_buffer), + ADD_TEST(copy_svm_buffer), ADD_TEST(copy_buffer_to_image), ADD_TEST(copy_image_to_buffer), ADD_TEST(copy_buffer_rect), diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index cd839cbb..ce121cea 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -103,10 +103,14 @@ extern int test_fill_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_fill_buffer(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fill_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_copy_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_copy_buffer(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_copy_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_copy_buffer_to_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_copy_image_to_buffer(cl_device_id device, cl_context context, diff --git a/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp new file mode 100644 index 00000000..1fc48ce5 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp @@ -0,0 +1,94 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "svm_command_basic.h" + +//-------------------------------------------------------------------------- + +bool BasicSVMCommandBufferTest::Skip() +{ + if (BasicCommandBufferTest::Skip()) return true; + + Version version = get_device_cl_version(device); + if (version < Version(2, 0)) + { + log_info("test requires OpenCL 2.x/3.0 device"); + return true; + } + + cl_device_svm_capabilities svm_capabilities; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, + sizeof(svm_capabilities), &svm_capabilities, NULL); + if (error != CL_SUCCESS) + { + print_error(error, "Unable to query CL_DEVICE_SVM_CAPABILITIES"); + return true; + } + + if (svm_capabilities == 0) + { + log_info("Device property CL_DEVICE_SVM_COARSE_GRAIN_BUFFER not " + "supported \n"); + return true; + } + + if (init_extension_functions() != CL_SUCCESS) + { + log_error("Unable to initialise extension functions"); + return true; + } + + return false; +} + +//-------------------------------------------------------------------------- + +cl_int BasicSVMCommandBufferTest::SetUpKernelArgs(void) +{ + size_t size = sizeof(cl_int) * num_elements * buffer_size_multiplier; + svm_in_mem = clSVMWrapper(context, size); + if (svm_in_mem() == nullptr) + { + log_error("Unable to allocate SVM memory"); + return CL_OUT_OF_RESOURCES; + } + svm_out_mem = clSVMWrapper(context, size); + if (svm_out_mem() == nullptr) + { + log_error("Unable to allocate SVM memory"); + return CL_OUT_OF_RESOURCES; + } + return CL_SUCCESS; +} + +//-------------------------------------------------------------------------- + +cl_int BasicSVMCommandBufferTest::init_extension_functions() +{ + cl_int error = BasicCommandBufferTest::init_extension_functions(); + test_error(error, "Unable to initialise extension functions"); + + cl_platform_id platform; + error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + + GET_EXTENSION_ADDRESS(clCommandSVMMemFillKHR); + GET_EXTENSION_ADDRESS(clCommandSVMMemcpyKHR); + + return CL_SUCCESS; +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h new file mode 100644 index 00000000..f6b6b427 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h @@ -0,0 +1,42 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef CL_KHR_SVM_COMMAND_BASIC_H +#define CL_KHR_SVM_COMMAND_BASIC_H + +#include "basic_command_buffer.h" + + +struct BasicSVMCommandBufferTest : BasicCommandBufferTest +{ + BasicSVMCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicCommandBufferTest(device, context, queue) + {} + + virtual bool Skip() override; + virtual cl_int SetUpKernelArgs(void) override; + +protected: + cl_int init_extension_functions(); + + clCommandSVMMemFillKHR_fn clCommandSVMMemFillKHR = nullptr; + clCommandSVMMemcpyKHR_fn clCommandSVMMemcpyKHR = nullptr; + + clSVMWrapper svm_in_mem, svm_out_mem; +}; + +#endif -- cgit v1.2.3 From c73d6a341bdcc1c05fa995390672e56a4cf66c6c Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:25:10 -0500 Subject: Add NULL CL_MEM_HOST_PTR check for clGetMemObjectInfo (#1801) The spec requires implementations return NULL for CL_MEM_HOST_PTR when the flags passed at memory object creation time do not contain CL_MEM_USE_HOST_PTR CTS was not checking this. Add the same check. Fixes #1752 --- test_conformance/api/test_mem_object_info.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp index 8dc8f6cf..7eedec85 100644 --- a/test_conformance/api/test_mem_object_info.cpp +++ b/test_conformance/api/test_mem_object_info.cpp @@ -217,6 +217,9 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_ // Create a buffer object to test against. bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error ); test_error( error, "Unable to create buffer to test with" ); + void *ptr; + TEST_MEM_OBJECT_PARAM(bufferObject, CL_MEM_HOST_PTR, ptr, NULL, + "host pointer", "%p", void *) } // Perform buffer object queries. -- cgit v1.2.3