本文测试OpenCL中读取image数据时关于坐标的两个问题:

  1. 使用float2坐标读取
  2. 使用int2坐标读取

首先完整的测试代码如下,测试平台为SDM855:

SRE实战 互联网时代守护先锋,助力企业售后服务体系运筹帷幄!一键直达领取阿里云限量特价优惠。
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <math.h>
#include "OCL/OPPOOpenCLWrapper.h"
#include "OCL/OCLUtils.h"

#ifndef uchar
#define uchar unsigned char
#endif



const char code[] = R"(


const sampler_t samp1 = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;


__kernel void readtest(read_only image2d_t src, global uchar *dst)
{
    int2 coord = (int2)(get_global_id(0), get_global_id(1));

    if(coord.x == 0 && coord.y == 0){

        printf("(float2)(0.0, 0.0)   read:%f  \n", read_imagef(src, samp1, (float2)(0.0, 0.0) ).x * 255.0);
        printf("(float2)(0.0, 1.0)   read:%f  \n", read_imagef(src, samp1, (float2)(0.0, 1.0) ).x * 255.0);
        printf("(float2)(0.0, 1.5)   read:%f  \n", read_imagef(src, samp1, (float2)(0.0, 1.5) ).x * 255.0);
        printf("(float2)(0.0, 2.0)   read:%f  \n", read_imagef(src, samp1, (float2)(0.0, 2.0) ).x * 255.0);
        printf("(float2)(1.5, 1.5)   read:%f  \n", read_imagef(src, samp1, (float2)(1.5, 1.5) ).x * 255.0);
        printf("(float2)(0.5, 2.0)   read:%f  \n", read_imagef(src, samp1, (float2)(0.5, 2.0) ).x * 255.0);
        printf("(float2)(0.5, 2.5)   read:%f  \n", read_imagef(src, samp1, (float2)(0.5, 2.5) ).x * 255.0);
        printf("(float2)(1.0, 1.0)   read:%f  \n", read_imagef(src, samp1, (float2)(1.0, 1.0) ).x * 255.0);
        printf("(float2)(254.0, 254.0)   read:%f  \n", read_imagef(src, samp1, (float2)(254.0, 254.0) ).x * 255.0);
        printf("(float2)(255.0, 255.0)   read:%f  \n", read_imagef(src, samp1, (float2)(255.0, 255.0) ).x * 255.0);
        printf("(float2)(255.5, 255.5)   read:%f  \n", read_imagef(src, samp1, (float2)(255.5, 255.5) ).x * 255.0);
        printf("(float2)(256.0, 256.0)   read:%f  \n", read_imagef(src, samp1, (float2)(256.0, 256.0) ).x * 255.0);
        printf("(float2)(300, 300.0)   read:%f  \n", read_imagef(src, samp1, (float2)(300.0, 300.0) ).x * 255.0);

        printf("(int2)(1, 1)   read:%f  \n", read_imagef(src, samp1, (int2)(1, 1) ).x * 255.0);
        printf("(int2)(0, 0)   read:%f  \n", read_imagef(src, samp1, (int2)(0, 0) ).x * 255.0);
        printf("(int2)(1, 2)   read:%f  \n", read_imagef(src, samp1, (int2)(1, 2) ).x * 255.0);
        printf("(int2)(254, 254)   read:%f  \n", read_imagef(src, samp1, (int2)(254, 254) ).x * 255.0);
        printf("(int2)(255, 255)   read:%f  \n", read_imagef(src, samp1, (int2)(255, 255) ).x * 255.0);
        printf("(int2)(256, 256)   read:%f  \n", read_imagef(src, samp1, (int2)(256, 256) ).x * 255.0);
        printf("(int2)(257, 257)   read:%f  \n", read_imagef(src, samp1, (int2)(257, 257) ).x * 255.0);

    }

}


)";

void testsamp05()
{
    OPPOOpenCLWrapper ocl;

    cl_image_format imageformat;
    imageformat.image_channel_data_type = CL_UNORM_INT8;
    imageformat.image_channel_order = CL_R;
    cl_image_desc imagedesc;
    memset(&imagedesc, 0, sizeof(imagedesc));
    imagedesc.image_width = 256;
    imagedesc.image_height = 256;
    imagedesc.image_type = CL_MEM_OBJECT_IMAGE2D;

    std::vector<uchar> data(256*256, 0);
    for(int i = 0; i < 256; ++i){
        for(int w = 0; w < 256; ++w){
            data[i*256+w] = std::max(i, w);
        }
    }
    cl_int err;
    cl_mem src = clCreateImage(ocl.getContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imageformat, &imagedesc, data.data(), &err);
    checkErr(err, "src");
    cl_mem dst = clCreateBuffer(ocl.getContext(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 256*256, NULL, &err);
    checkErr(err, "dst");

    const char *pcode = code;
    cl_program prog = ocl.makeProgram(&pcode, sizeof(code) / sizeof(code[0]));
    cl_kernel kernel = ocl.makeKernel(prog, "readtest");

    clSetKernelArg(kernel , 0, sizeof(src), &src);
    clSetKernelArg(kernel , 1, sizeof(dst), &dst);

    size_t globalsize[] = {256, 256}    ;
    clEnqueueNDRangeKernel(ocl.getCommandQueue(), kernel, 2, NULL, globalsize, NULL, 0, NULL, NULL);
    clFinish(ocl.getCommandQueue());

}

我们创建一个宽高都为256的image对象,然后其值设置为当前宽高坐标的大者。同时数据格式为CL_UNORM_INT8,然后使用不同的坐标去读取image对象的值。其结果显示如下:

(float2)(0.0, 0.0)   read:0.000000
(float2)(0.0, 1.0)   read:0.500000
(float2)(0.0, 1.5)   read:1.000000
(float2)(0.0, 2.0)   read:1.500000
(float2)(1.5, 1.5)   read:1.000000
(float2)(0.5, 2.0)   read:1.500000
(float2)(0.5, 2.5)   read:2.000000
(float2)(1.0, 1.0)   read:0.750000
(float2)(254.0, 254.0)   read:253.750000
(float2)(255.0, 255.0)   read:254.750000
(float2)(255.5, 255.5)   read:255.000000
(float2)(256.0, 256.0)   read:255.000000
(float2)(300, 300.0)   read:255.000000
(int2)(1, 1)   read:1.000000
(int2)(0, 0)   read:0.000000
(int2)(1, 2)   read:2.000000
(int2)(254, 254)   read:254.000000
(int2)(255, 255)   read:255.000000
(int2)(256, 256)   read:255.000000
(int2)(257, 257)   read:255.000000

从上面的结果我们可以看出得知如下信息:

  1. 如果读取的时候使用的是float2坐标,假设为坐标为(w, h),那么,其返回的值为(w - 0.5, h - 0.5)处的插值结果,插值的方式为我们常规意义,或者在CPU代码中对该图像进行双线性插值。当然这也和采样器sampler_t对象设置为CLK_FILTER_LINEAR有关。如果其设置为CLK_FILTER_NEAREST,那么肯定就是为最近邻插值了。举例来说,对于(float2)(1.0, 1.0)坐标,其插值目标为(1.0 - 0.5, 1.0 - 0.5),位于(0,0), (0, 1), (1, 0), (1,1)四个像素点中间,根据双线性插值计算。其结果即为0.75
  2. 如果读取的时候使用的是int2坐标,那么其坐标与值的关系就和CPU中处理该image一样。
扫码关注我们
微信号:SRE实战
拒绝背锅 运筹帷幄