Spaces:
Build error
Build error
// React to failure on return code != cudaSuccess | |
do { \ | |
{_cuda_err = fn;} \ | |
if (_cuda_err != cudaSuccess) goto _cuda_fail; \ | |
} while(false) | |
// React to failure on return code == 0 | |
do { \ | |
if (!(fn)) { _cuda_err = cudaUnspecified; goto _cuda_fail; } \ | |
else _cuda_err = cudaSuccess; \ | |
} while(false) | |
// Clone CPU <-> CUDA | |
template <typename T> | |
T* cuda_clone(const void* ptr, int num) | |
{ | |
T* cuda_ptr; | |
cudaError_t r; | |
r = cudaMalloc(&cuda_ptr, num * sizeof(T)); | |
if (r != cudaSuccess) return NULL; | |
r = cudaMemcpy(cuda_ptr, ptr, num * sizeof(T), cudaMemcpyHostToDevice); | |
if (r != cudaSuccess) return NULL; | |
cudaDeviceSynchronize(); | |
return cuda_ptr; | |
} | |
template <typename T> | |
T* cpu_clone(const void* ptr, int num) | |
{ | |
T* cpu_ptr; | |
cudaError_t r; | |
cpu_ptr = (T*) malloc(num * sizeof(T)); | |
if (cpu_ptr == NULL) return NULL; | |
r = cudaMemcpy(cpu_ptr, ptr, num * sizeof(T), cudaMemcpyDeviceToHost); | |
if (r != cudaSuccess) return NULL; | |
cudaDeviceSynchronize(); | |
return cpu_ptr; | |
} | |
// Pack two half values into a half2, host version | |
__host__ inline __half2 pack_half2(__half h1, __half h2) | |
{ | |
unsigned short s1 = *reinterpret_cast<unsigned short*>(&h1); | |
unsigned short s2 = *reinterpret_cast<unsigned short*>(&h2); | |
ushort2 us2 = make_ushort2(s1, s2); | |
return *reinterpret_cast<__half2*>(&us2); | |
} | |