#include<stdio.h>
#include<malloc.h>
#include<math.h>

#include "cuda_utils.h"

// CUDA Kernel //
// TODO: make eval function a CUDA kernel
void eval(float* a,float b,float* c,int size) {
  for(int i=0;i<size;i++) {
    c[i]=a[i]*b;
  }
}

// CPU code //
int main() {

  srand48(time(NULL));

  // define vector size //
  const int vector_size = 10000;

  // allocate memory on cpu //
  float* vector_a_cpu;
  float* vector_c_cpu;
  // TODO: allocate host memory

  float b=drand48();
  // initialize cpu input vector //
  for(int i=0;i<vector_size;i++) {
    vector_a_cpu[i]=drand48();
  }
  // initialize cpu output vector to 0 //
  for(int i=0;i<vector_size;i++) {
    vector_c_cpu[i]=0;
  }

  // allocate memory on gpu //
  float* vector_a_gpu;
  float* vector_c_gpu;
  // TODO: allocate device memory

  // copy input vector to gpu //
  // TODO: copy input data to gpu

  // cuda kernel call //
  // TODO: add kernel call

  // copy output vector from gpu to cpu //
  // TODO: copy results back to host

  // verify results //
  bool success=true;
  for(int i=0;i<vector_size;i++) {
    if (fabsf(vector_a_cpu[i]*b-vector_c_cpu[i])>1e-5*b) {
      printf("computation result is wrong: index=%i expected result=%g "
             "cuda result=%g\n", i, vector_a_cpu[i]*b, vector_c_cpu[i]);
      success=false;
      break;
    }
  }
  if (success) {
    printf("computation result is correct.\n");
  }

  // free gpu memory //
  cudaVerify(cudaFree(vector_a_gpu));
  cudaVerify(cudaFree(vector_c_gpu));

  // free cpu memory //
  cudaVerify(cudaFreeHost(vector_a_cpu));
  cudaVerify(cudaFreeHost(vector_c_cpu));

  // exit //
  return 0;
}
