Cuda Toolkit File
// Initialize input vectors for (int i = 0; i < n; i++) h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX;
// Cleanup cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); delete[] h_a; delete[] h_b; delete[] h_c;
$(TARGET): $(SOURCES) $(NVCC) $(NVCC_FLAGS) -o $@ $^ cuda toolkit
// Launch kernel int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_a, d_b, d_c, n);
// Copy result back to host cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost); // Initialize input vectors for (int i =
clean: rm -f $(TARGET)
// Allocate host memory float *h_a = new float[n]; float *h_b = new float[n]; float *h_c = new float[n]; i++) h_a[i] = rand() / (float)RAND_MAX
all: $(TARGET)
