#include <spu_mfcio.h>
#include <libmisc.h>
#include "dma.h"

CONTROL_BLOCK cb __attribute__((aligned(16)));
int *data;

// Temporary area used to store values to be sent to next SPU's mailbox. The
// docs say this is 16 byte aligned.
struct {
  uint32_t padding[3];
  uint32_t value;
} next_mbox __attribute__((aligned(16)));

int main(uint64_t speid, uint64_t argp, uint64_t envp)
{
  uint32_t id;
  uintptr32_t cb_addr;
  uint32_t data_size;

  id = argp;

  // Read the address of the control block from the mailbox. This blocks until
  // the PPU writes to the mailbox.
  cb_addr = spu_read_in_mbox();

  // DMA in the control block and wait for it to complete
  mfc_get(&cb,         // destination local store address
          cb_addr,     // source main memory address
          sizeof(cb),  // number of bytes to transfer
          0,           // tag (0-31)
          0,           // (ignore)
          0);          // (ignore)
  mfc_write_tag_mask(1 << 0);
  mfc_read_tag_status_all();

  // Allocate a buffer to hold the data, aligned on a 2^7 (128) byte boundary
  // (no error checking!)
  data_size = cb.num_elements * ELEMENT_SIZE;
  data = malloc_align(data_size, 7);

  // DMA in the actual data and wait for it to complete
  mfc_get(data,          // dest LS addr
          cb.data_addr,  // source main memory addr
          data_size,     // number of bytes
          0,             // tag
          0,
          0);
  mfc_read_tag_status_all();

  // Do processing (multiply by 2)
  for (int i = 0; i < cb.num_elements; i++) {
    data[i] *= MUL_FACTOR;
  }

  printf("Done multiply\n");

  // Notify SPU 1 that the data is ready via mailbox. Send the main memory
  // address of this SPU's data array so SPU 1 can copy it out. This address is
  // calculated by adding the offset of the array in the local store to the
  // memory-mapped address of the local store (which the PPU provided in the
  // control block).

  next_mbox.value = cb.spu_ls[id] + (uint32_t)data;
  mfc_put(&next_mbox.value,
          cb.spu_control[id + 1] + 12,
          4,
          0,
          0,
          0);
  mfc_read_tag_status_all();

  // Wait for notification from SPU 1 indicating it has copied out the data.
  spu_read_in_mbox();
  return 0;
}
