日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問(wèn) 生活随笔!

生活随笔

當(dāng)前位置: 首頁(yè) > 编程资源 > 编程问答 >内容正文

编程问答

OpenCL 第6课:矩阵转置

發(fā)布時(shí)間:2023/12/18 编程问答 32 豆豆
生活随笔 收集整理的這篇文章主要介紹了 OpenCL 第6课:矩阵转置 小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

上一節(jié)我們寫了個(gè)一維向量相加的程序。這節(jié)我們來(lái)看一個(gè)4×4矩陣轉(zhuǎn)置程序。

4X4矩陣我們采用二維數(shù)組進(jìn)行存儲(chǔ),在程序設(shè)計(jì)上,我們讓轉(zhuǎn)置過(guò)程分4次轉(zhuǎn)置完成,就是一次轉(zhuǎn)一行。注意這里的OpenCL的工作維數(shù)是二維。(當(dāng)然用一維的方式也可以,只是在CL代碼中要用到循環(huán),效率不高)

程序分兩部份:

(1)transposition.cl代碼

?
1 2 3 4 5 6 7 8 9 10 11 __kernel void transposition(__global int* A, ????????????????????__global int* B) { ????//獲取索引號(hào),這里是二維的,所以可以取兩個(gè) ????//否則另一個(gè)永遠(yuǎn)是0 ????int col = get_global_id(0); ????int row = get_global_id(1); ????B[col*4+row] = A[row*4+col]; }

(2)main.cpp代碼

?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 #include <iostream> #include <stdio.h> #include <string.h> #include <string> #include <CL/cl.h>//包含CL的頭文件 using namespace std; //4x4數(shù)組 #define dim_x 4 #define dim_y 4 //從外部文件獲取cl內(nèi)核代碼 bool GetFileData(const char* fname,string& str) { ????FILE* fp = fopen(fname,"r"); ????if(fp==NULL) ????{ ????????printf("no found file\n"); ????????return false; ????} ????int n=0; ????while(feof(fp)==0) ????{ ????????str += fgetc(fp); ????} ????return true; } int main() { ????//先讀外部CL核心代碼,如果失敗則退出。 ????//代碼存buf_code里面 ????string code_file; ????if(false == GetFileData("transposition.cl",code_file)) ????????return 0; ????char* buf_code = new char[code_file.size()]; ????strcpy(buf_code,code_file.c_str()); ????buf_code[code_file.size()-1] = NULL; ????//聲明CL所需變量。 ????cl_device_id device; ????cl_platform_id platform_id = NULL; ????cl_context context; ????cl_command_queue cmdQueue; ????cl_mem bufferA,bufferB,bufferC; ????cl_program program; ????cl_kernel kernel = NULL; ????//我們使用的是二維向量 ????//設(shè)定向量大小(維數(shù)) ????size_t globalWorkSize[2]; ????globalWorkSize[0] = dim_x ; ????globalWorkSize[1] = dim_y; ????cl_int err; ????/* ????????定義輸入變量和輸出變量,并設(shè)定初值 ????*/ ????int buf_A[dim_x][dim_y]; ????int buf_B[dim_x][dim_y]; ????size_t datasize = sizeof(int) * dim_x * dim_y; ????int n=0; ????int m=0; ????for(n=0;n<dim_x;n++) ????{ ????????for(m=0;m<dim_y;m++) ????????{ ????????????buf_A[m][n] = m + n*dim_x; ????????} ????} ????//step 1:初始化OpenCL ????err = clGetPlatformIDs(1,&platform_id,NULL); ????if(err!=CL_SUCCESS) ????{ ????????cout<<"clGetPlatformIDs error"<<endl; ????????return 0; ????} ????//這次我們只用CPU來(lái)進(jìn)行并行運(yùn)算,當(dāng)然你也可以該成GPU ????clGetDeviceIDs(platform_id,CL_DEVICE_TYPE_GPU,1,&device,NULL); ????//step 2:創(chuàng)建上下文 ????context = clCreateContext(NULL,1,&device,NULL,NULL,NULL); ????//step 3:創(chuàng)建命令隊(duì)列 ????cmdQueue = clCreateCommandQueue(context,device,0,NULL); ????//step 4:創(chuàng)建數(shù)據(jù)緩沖區(qū) ????bufferA = clCreateBuffer(context, ?????????????????????????????CL_MEM_READ_ONLY, ?????????????????????????????datasize,NULL,NULL); ????bufferB = clCreateBuffer(context, ?????????????????????????????CL_MEM_WRITE_ONLY, ?????????????????????????????datasize,NULL,NULL); ????//step 5:將數(shù)據(jù)上傳到緩沖區(qū) ????clEnqueueWriteBuffer(cmdQueue, ?????????????????????????bufferA,CL_FALSE, ?????????????????????????0,datasize, ?????????????????????????buf_A,0, ?????????????????????????NULL,NULL); ????//step 6:加載編譯代碼,創(chuàng)建內(nèi)核調(diào)用函數(shù) ????program = clCreateProgramWithSource(context,1, ????????????????????????????????????????(const char**)&buf_code, ????????????????????????????????????????NULL,NULL); ????clBuildProgram(program,1,&device,NULL,NULL,NULL); ????kernel = clCreateKernel(program,"transposition",NULL); ????//step 7:設(shè)置參數(shù),執(zhí)行內(nèi)核 ????clSetKernelArg(kernel,0,sizeof(cl_mem),&bufferA); ????clSetKernelArg(kernel,1,sizeof(cl_mem),&bufferB); ????//<span style="color: #ff0000;"><strong>注意這里第三個(gè)參數(shù)已經(jīng)改成2,表示二維數(shù)據(jù)。</strong></span> ????clEnqueueNDRangeKernel(cmdQueue,kernel, ???????????????????????????2,NULL, ???????????????????????????globalWorkSize, ???????????????????????????NULL,0,NULL,NULL); ????//step 8:取回計(jì)算結(jié)果 ????clEnqueueReadBuffer(cmdQueue,bufferB,CL_TRUE,0, ????????????????????????datasize,buf_B,0,NULL,NULL); ????//輸出計(jì)算結(jié)果 ????for(n=0;n<dim_x;n++) ????{ ????????for(m=0;m<dim_y;m++) ????????{ ????????????cout<< buf_A[m][n] <<","; ????????} ????????cout<<endl; ????} ????cout<<endl<<"====transposition===="<<endl<<endl; ????for(n=0;n<dim_x;n++) ????{ ????????for(m=0;m<dim_y;m++) ????????{ ????????????cout<< buf_B[m][n] <<","; ????????} ????????cout<<endl; ????} ????//釋放所有調(diào)用和內(nèi)存 ????clReleaseKernel(kernel); ????clReleaseProgram(program); ????clReleaseCommandQueue(cmdQueue); ????clReleaseMemObject(bufferA); ????clReleaseMemObject(bufferB); ????clReleaseContext(context); ????delete buf_code; ????return 0; }

運(yùn)算結(jié)果:

?

?

?

總結(jié)

以上是生活随笔為你收集整理的OpenCL 第6课:矩阵转置的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。

如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。