#include <stdio.h>
#include <unistd.h>
#include <cuda_runtime_api.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include "sequences.h"
#include "substitution_matrix.h"
#include "main_cu.h"
#include "thread_manager.h"
#include "arguments_manager.h"

using Exceptions::Exception;
using namespace Data;
using std::map;
using namespace std;

//GLOBAL DATA
int maxGpu = 128;

int maxDevMultiprocessor = 1;
int gpuNumber;
int* gpus;

void deviceQuery()
{
    cudaGetDeviceCount(&gpuNumber);
    printf("Device count: %d\n", gpuNumber);
    if(gpuNumber > maxGpu)
    {
        gpuNumber = maxGpu;
        printf("Using %d device(s)\n", gpuNumber);
    }
    gpus = new int[gpuNumber];
    for(int i=0; i<gpuNumber; i++)
        gpus[i] = i;
    cudaDeviceProp devProp;
    for (int i = 0; i < gpuNumber; i++)
    {
        cudaGetDeviceProperties(&devProp, 0);
        maxDevMultiprocessor = MAX(maxDevMultiprocessor, devProp.multiProcessorCount);
    }
}

void initCUDA(ThreadManager* This, void* data)
{
    //cudaSetDevice(This->threadsInfos[pthread_self()].gpuNo);
    int gpuNo = This->getThreadsInfo().gpuNo;
    cudaSetDevice(gpuNo);

    char* devPtr;
    cudaMalloc((void**)&devPtr, sizeof(int));
    cudaMemcpy(devPtr, &gpuNo, sizeof(int), cudaMemcpyHostToDevice);

    gpuNo = -1;
    cudaMemcpy(&gpuNo, devPtr, sizeof(int), cudaMemcpyDeviceToHost);
    cudaFree(devPtr);
    
    printf("GPU: %d\n",gpuNo);
}

bool readArguments(int argc, char** argv)
{
    char* argBuf;
    ArgumentsManager arguments(argc, argv);

    argBuf = (char*)arguments.getParam("help", "h");
    if (argBuf != NULL)
    {
        ifstream fin("HELP");
        string temp;
        while(getline(fin, temp))
            cout << temp << endl;
        fin.close();
        return false;
    }

//    argBuf = (char*)arguments.getParam("ngpus", "ng");
//    if ((argBuf != NULL) && (atoi(argBuf) > 0))
//        maxGpu = atoi(argBuf);

    return true;

}

int main(int argc, char** argv)
{

    if(!readArguments(argc, argv))
        return 0;

    try
    {
        ArgumentsManager am(argc, argv);

        // These parameters are required regardless of algorithm type.
        if (!am.containParam("alg", "a") || !am.containParam("ngpus", "ng"))
        {
            am.openConfig("etc/default.cnf");
            am.reloadArgs();
        }
        maxGpu = am.getIntParam("ngpus", "ng");

        deviceQuery();

        stringstream maxDevMultiprocessorString;
        maxDevMultiprocessorString<<maxDevMultiprocessor;
        am.addParam("maxDevMultiprocessor", maxDevMultiprocessorString.str());
        am.reloadArgs();

        
        ThreadManager* tm;
        tm = new ThreadManager(gpus,gpuNumber);
        for(int i=0; i<gpuNumber; i++)
            tm->request(initCUDA, NULL, i);
        tm->wait();

        

        ThreadManagerRunnable* alg = ThreadManagerRunnable::getAlgorithm(&am);
        if (alg != NULL)
        {
            alg->loadArguments(&am);
            alg->run(tm);

            //printf("%s\n", ((AlignmentMatchAlgorithm*)alg)->matches1Manager->getSequence(234, 660));
            //printf("%s\n", ((AlignmentMatchAlgorithm*)alg)->matches2Manager->getSequence(234, 660));

            //printf("%s\n", ((WatermanEggertGpu*)alg)->matches1Manager->getSequence(0, 1));
            //printf("%s\n", ((WatermanEggertGpu*)alg)->matches2Manager->getSequence(0, 1));

            delete alg;
        }

      
    }
    catch (Exception* ex)
    {
        printf("%s\n",ex->getMessage());
        return 1;
    }
    return 0;
}
