CUDA, dynamic array + array. malloc and copy -
so have been stuck on problem while. struct looks this:
typedef struct { int size; int dim[dimensions]; float *data; }matrix;
now problem me how malloc , memcpy. how i'm doing it:
matrix * d_in; matrix * d_out; const int threads_bytes = sizeof(int) + sizeof(int)*dimensions + sizeof(float)*h_a->_size; cudamalloc((void **) &d_in, threads_bytes); cudamemcpy(d_in, h_a, threads_bytes, cudamemcpyhosttodevice);
edit: how allocated h_a:
matrix a; // = (matrix*)malloc(sizeof(matrix)); a._dim[0] = 40; a._dim[1] = 60; a._size = a._dim[0]*a._dim[1]; a._data = (float*)malloc(a._size*sizeof(float)); matrix *h_a = &a;
where h_a matrix allocated. call kernel this:
devicecomp<<<gridsize, blocksize>>>(d_out, d_in);
however, in kernel cannot reach data struct, array , variable.
this common problem. when did malloc operation on host (for h_a->data), allocated host data, not accessible device.
this answer describes in detail going on , how fix it.
in case, should work:
matrix a; // = (matrix*)malloc(sizeof(matrix)); a._dim[0] = 40; a._dim[1] = 60; a._size = a._dim[0]*a._dim[1]; a._data = (float*)malloc(a._size*sizeof(float)); matrix *h_a = &a; float *d_data; cudamalloc((void **) &d_data, a._size*sizeof(float)); matrix * d_in; matrix * d_out; const int threads_bytes = sizeof(int) + sizeof(int)*dimensions + sizeof(float)*h_a->_size; cudamalloc((void **) &d_in, threads_bytes); cudamemcpy(d_in, h_a, threads_bytes, cudamemcpyhosttodevice); cudamemcpy(&(d_in->data), &d_data, sizeof(float *), cudamemcpyhosttodevice);
note doesn't copy data
area host copy of a
device copy. makes device-accessible data
area, equal in size host data
area. if want copy data
area, require cudamemcpy
operation, using h_a->data
, d_data
.
Comments
Post a Comment