#include <stdio.h>
#include <libspe.h>
#include "dma.h"

extern spe_program_handle_t dma_spu;

#define NUM_ELEMENTS 1000

// Align data array on cache line (128 bytes) for best DMA performance
int data[NUM_ELEMENTS] __attribute__((aligned(128)));

CONTROL_BLOCK cb __attribute__((aligned(16)));

extern void init_data();
extern void verify_data();

int main()
{
  speid_t id;

  // Generate some data
  init_data();

  // Fill in the control block with the address/size of the data array in main
  // memory
  cb.data_addr = (uintptr32_t)data;
  cb.num_elements = NUM_ELEMENTS;

  // Create SPU thread, passing it the address of the control block as argp
  id = spe_create_thread(0,         // thread group
                         &dma_spu,  // program
                         &cb,       // argp
                         NULL,      // envp
                         -1,        // processor affinity mask (ignore)
                         0);        // flags

  // Spin until the SPU tells us it's done by writing to the mailbox
  while (spe_stat_out_mbox(id) == 0);
  // Drain the mailbox
  spe_read_out_mbox(id);

  spe_wait(id, NULL, 0);

  // Check that the SPU did everything correctly
  verify_data();
  return 0;
}

void init_data()
{
  for (int i = 0; i < NUM_ELEMENTS; i++) {
    data[i] = i;
  }
  printf("Done init\n");
}

void verify_data()
{
  for (int i = 0; i < NUM_ELEMENTS; i++) {
    if (data[i] != i * MUL_FACTOR + ADD_FACTOR) {
      printf("Verify failed!\n");
      return;
    }
  }
  printf("Verify succeeded\n");
}
