Cuda Toolkit File

// Initialize input vectors for (int i = 0; i < n; i++) h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX;

// Cleanup cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); delete[] h_a; delete[] h_b; delete[] h_c;

$(TARGET): $(SOURCES) $(NVCC) $(NVCC_FLAGS) -o $@ $^ cuda toolkit

// Launch kernel int threadsPerBlock = 256; int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_a, d_b, d_c, n);

// Copy result back to host cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost); // Initialize input vectors for (int i =

clean: rm -f $(TARGET)

// Allocate host memory float *h_a = new float[n]; float *h_b = new float[n]; float *h_c = new float[n]; i++) h_a[i] = rand() / (float)RAND_MAX

all: $(TARGET)