CUDA, dynamic array + array. malloc and copy -


so have been stuck on problem while. struct looks this:

typedef struct  { int size; int dim[dimensions]; float *data;  }matrix; 

now problem me how malloc , memcpy. how i'm doing it:

matrix * d_in; matrix * d_out; const int threads_bytes = sizeof(int) + sizeof(int)*dimensions + sizeof(float)*h_a->_size; cudamalloc((void **) &d_in, threads_bytes); cudamemcpy(d_in, h_a, threads_bytes, cudamemcpyhosttodevice); 

edit: how allocated h_a:

 matrix  a; // = (matrix*)malloc(sizeof(matrix));  a._dim[0] = 40;  a._dim[1] = 60;  a._size = a._dim[0]*a._dim[1];  a._data = (float*)malloc(a._size*sizeof(float));  matrix *h_a = &a;  

where h_a matrix allocated. call kernel this:

devicecomp<<<gridsize, blocksize>>>(d_out, d_in); 

however, in kernel cannot reach data struct, array , variable.

this common problem. when did malloc operation on host (for h_a->data), allocated host data, not accessible device.

this answer describes in detail going on , how fix it.

in case, should work:

matrix  a; // = (matrix*)malloc(sizeof(matrix)); a._dim[0] = 40; a._dim[1] = 60; a._size = a._dim[0]*a._dim[1]; a._data = (float*)malloc(a._size*sizeof(float)); matrix *h_a = &a;   float *d_data; cudamalloc((void **) &d_data, a._size*sizeof(float));   matrix * d_in; matrix * d_out; const int threads_bytes = sizeof(int) + sizeof(int)*dimensions + sizeof(float)*h_a->_size; cudamalloc((void **) &d_in, threads_bytes); cudamemcpy(d_in, h_a, threads_bytes, cudamemcpyhosttodevice);  cudamemcpy(&(d_in->data), &d_data, sizeof(float *), cudamemcpyhosttodevice); 

note doesn't copy data area host copy of a device copy. makes device-accessible data area, equal in size host data area. if want copy data area, require cudamemcpy operation, using h_a->data , d_data.


Comments

Popular posts from this blog

c# - DetailsView in ASP.Net - How to add another column on the side/add a control in each row? -

javascript - firefox memory leak -

Trying to import CSV file to a SQL Server database using asp.net and c# - can't find what I'm missing -