#include "nws_single_gpu_runnable_init.h"
#include "thread_manager.h"
#include "thread_manager_runnable.h"
#include "nws_algorithm_option.h"

using align::NWSSingleGPURunnableInit;
using align::DevPointers;

map<int, DevPointers> NWSSingleGPURunnableInit::devPtr;

void NWSSingleGPURunnableInit::run() {
    cudaSetDevice(this->tm->getThreadsInfo().gpuNo);
    
    
    // ALLOCATE MEMORY ON GPU
    
    DevPointers devptr;
    
    cudaMalloc((void**)&devptr.packedSeqs, this->nwa->seqs->packedSeqsSize);
    cudaMalloc((void**)&devptr.starts,  this->nwa->seqs->seqCount*sizeof(int));
    cudaMalloc((void**)&devptr.lengths, this->nwa->seqs->seqCount*sizeof(int));
    cudaMalloc((void**)&devptr.pair1, this->nwa->pairsPerGPU*sizeof(int));
    cudaMalloc((void**)&devptr.pair2, this->nwa->pairsPerGPU*sizeof(int));
    cudaMalloc((void**)&devptr.scores_overlaps, this->nwa->pairsPerGPU*sizeof(int2));
    cudaMalloc((void**)&devptr.sm, this->nwa->seqs->sm->residuesCount*this->nwa->seqs->sm->residuesCount*sizeof(int));
    cudaMalloc((void**)&devptr.H, this->memoryOffset*this->nwa->seqs->longestSeq*sizeof(short));

    checkError("FAILED TO MALLOC MEMORY ON GPU!\n");
    
    // MEMCPY TO GPU
    cudaMemcpy(devptr.packedSeqs, this->nwa->seqs->packedSeqs, this->nwa->seqs->packedSeqsSize, cudaMemcpyDefault);
    cudaMemcpy(devptr.starts,  this->nwa->seqs->starts,  this->nwa->seqs->seqCount*sizeof(int), cudaMemcpyDefault);
    cudaMemcpy(devptr.lengths, this->nwa->seqs->lengths, this->nwa->seqs->seqCount*sizeof(int), cudaMemcpyDefault);
    cudaMemcpy(devptr.sm, this->nwa->seqs->sm->substitutionMatrix, this->nwa->seqs->sm->residuesCount*this->nwa->seqs->sm->residuesCount*sizeof(int), cudaMemcpyDefault);
    
    devPtr[this->tm->getThreadsInfo().gpuNo] = devptr;
    
    checkError("CANNOT SET MEMORY ON GPU!\n");
}


NWSSingleGPURunnableInit *NWSSingleGPURunnableInit::setMemoryOffset(int memoryOffset) {
    this->memoryOffset = memoryOffset;
    return this;
}
