CUDA, dynamic array + array. malloc and copy -
so have been stuck on problem while. struct looks this:
typedef struct { int size; int dim[dimensions]; float *data; }matrix; now problem me how malloc , memcpy. how i'm doing it:
matrix * d_in; matrix * d_out; const int threads_bytes = sizeof(int) + sizeof(int)*dimensions + sizeof(float)*h_a->_size; cudamalloc((void **) &d_in, threads_bytes); cudamemcpy(d_in, h_a, threads_bytes, cudamemcpyhosttodevice); edit: how allocated h_a:
matrix a; // = (matrix*)malloc(sizeof(matrix)); a._dim[0] = 40; a._dim[1] = 60; a._size = a._dim[0]*a._dim[1]; a._data = (float*)malloc(a._size*sizeof(float)); matrix *h_a = &a; where h_a matrix allocated. call kernel this:
devicecomp<<<gridsize, blocksize>>>(d_out, d_in); however, in kernel cannot reach data struct, array , variable.
this common problem. when did malloc operation on host (for h_a->data), allocated host data, not accessible device.
this answer describes in detail going on , how fix it.
in case, should work:
matrix a; // = (matrix*)malloc(sizeof(matrix)); a._dim[0] = 40; a._dim[1] = 60; a._size = a._dim[0]*a._dim[1]; a._data = (float*)malloc(a._size*sizeof(float)); matrix *h_a = &a; float *d_data; cudamalloc((void **) &d_data, a._size*sizeof(float)); matrix * d_in; matrix * d_out; const int threads_bytes = sizeof(int) + sizeof(int)*dimensions + sizeof(float)*h_a->_size; cudamalloc((void **) &d_in, threads_bytes); cudamemcpy(d_in, h_a, threads_bytes, cudamemcpyhosttodevice); cudamemcpy(&(d_in->data), &d_data, sizeof(float *), cudamemcpyhosttodevice); note doesn't copy data area host copy of a device copy. makes device-accessible data area, equal in size host data area. if want copy data area, require cudamemcpy operation, using h_a->data , d_data.
Comments
Post a Comment