#-*-coding:utf-8-*-
import os
import sys
cp = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, cp+'/../../SIMUWRAPPER')
sys.path.insert(0, cp+'/scripts')

import sys
import statCrunch as SC
import fileManip as FM


mp = os.path.dirname(os.path.abspath(__file__))+"/../METAPOP/user_input/traits/T1.lua"
 
def getSelectedLoci(pat=mp):
    lociSelect = SC.getLociList(pat, True)
    #print "pat", pat
    locS = "S1.lua"
    traits = FM.checkTfromS("/".join(pat.split("/")[:-1]) + "/../species/" + locS)
    #dLoci = SC.getLociDict(csv, traits, absol=True)
    #print "LS", lociSelect
    return [int(i) for i in lociSelect.split(",")]


ndigit = 3
handleLoci = False

def formatLoci(nloci, selecloc):
    txtloc = ""
    #print "sl",selecloc
    for i in range(1,nloci/2+1):
        txtloc += "loc_"+str(i)
        if i in selecloc and handleLoci:
            txtloc += "_a" #related with a from trait T1
        txtloc += "\n"
    return txtloc#"\n".join(["loc-"+str(i+1) for i in range(nloci/2)])

def convertMTPtoGENEPOP(fi, fo, header = "METAPOP simulation", T1location = ""):
    """Use a genes_Sn_T.csv to convert into fstat format:"""
    # Example:
    # METAPOP
    # loc_1_a
    # loc_2
    # ...
    # Pop
    # Pop56_I2894 004004 004001...
    # Pop56_I2895 004004 004002...
    # ...
    # Pop
    # Pop55_I2896 004004 004004...
    # ...

    fl = open(fi, "r")
    l = fl.readlines()
    fl.close()
    nloci = 0
    txtLoci = header + "\n"
    dPop = {}
    nindiv = 0
    if T1location != "":
        lselected = getSelectedLoci()#(T1location)
    else:
        lselected = []

    for li in l:
        lis = li[:-1].split(" ")
        lociLine = ""
        nloci = len(lis[2:])
        for loc in range(nloci):
            alll = lis[2:][loc]
            vallele = int(alll) + 1
            sval = str(vallele)
            if loc%2 == 0:
                lociLine += " "
            lociLine += "0"*(3-len(sval)) + sval
        if lis[0] not in dPop:
            dPop[lis[0]] = []
        dPop[lis[0]] += [(lis[1], lociLine)]
        nindiv += 1

    txtLoci += formatLoci(nloci, lselected)
    for p in dPop:
        txtLoci += "Pop\n"
        for i in dPop[p]:
            txtLoci += "POP"+p+"_I"+i[0] + " ,  " + i[1] + "\n"

    fop = open(fo, "w")
    fop.write(txtLoci)
    fop.close()
    return txtLoci

def convertFSTATtoIND(l, fo):
    nloci = 0
    npop = 0
    nindiv = 0
    txtGenome = ""

    print "[INFO] The ind.txt header always refers to species S1. If the given genome is about another species, change the last argument in the header of the ind.txt file"
    headerInd = "# individuals S1"

    if len(l[0].split()) != 4:
        print "[ERROR] Fstat header should contain 4 values, but the given file header has",len(l[0].split()), ". Program can't continue."
        exit(0)
    else:
        nind, nloc, nall, largAllele = [int(i) for i in l[0].split()]

    for li in l[1:]:
        lis = li[:-1].split()
        if len(lis)>1:
            nstrands = len(lis[1])/largAllele
            alleles = [[str(int(lis[j+1][i*largAllele:(i+1)*largAllele])) for i in range(nstrands)] for j in range(nloc)]
            allelesTxt = " ".join([" ".join(i) for i in alleles])
            txtGenome += " ".join([lis[0], "0 "])+allelesTxt + "\n"


    fop = open(fo, "w")
    fop.write(headerInd + "\n" + txtGenome)
    fop.close()


def convertGENEPOPtoIND(l, fo):
    txtGenome = ""

    print "[INFO] The ind.txt header always refers to species S1. If the given genome is about another species, change the last argument in the header of the ind.txt file"
    headerInd = "# individuals S1"
    isPop = False
    print "[INFO] To have a consistent repartition of individuals by pop between simulations, be sure that the individual label follows the format POP??_I???, (_I is optional)"
    for li in l:
        if "POP" == li.upper():
            isPop = True
        elif isPop:
            lis = li[:-1].split()
            if len(lis) > 2:
                if len(lis[2]) in [2,3]:
                    largAllele = len(lis[2])
                    nstrands = 1
                elif len(lis[2]) in [4,6]:
                    largAllele = len(lis[2])/2
                    nstrands = 2
                else:
                    print "[ERROR] Allele format not recognized, only haploid and diploid genomes are accepted. Lenght of each allele value should be 2 or 3"
                    exit(0)
                nloc = len(lis)-2
                alleles = [[str(int(lis[j + 2][i * largAllele:(i + 1) * largAllele])) for i in range(nstrands)] for j in
                           range(nloc)]
                allelesTxt = " ".join([" ".join(i) for i in alleles])
                txtGenome += " ".join([lis[0].split("_")[0][3:], "0 "]) + allelesTxt + "\n"

    fop = open(fo, "w")
    fop.write(headerInd + "\n" + txtGenome)
    fop.close()


def convertToIND(fi, fo):
    try:
        fl = open(fi, "r")
    except:
        print "[ERROR] File", fi, "does not exist. Program can't proceed."
        exit(0)
    l = fl.readlines()
    fl.close()
    if l[0][0].isdigit():  # AUTODETECT Fstat format
        convertFSTATtoIND(l, fout)
    else:
        convertGENEPOPtoIND(l, fout)


def convertMTPtoFSTAT(fi, fo, T1location = ""):
    """Uses a genes_Sn_T.csv to convert into fstat format:"""
    #Example:
    # 60 20 6 3
    #loc_1_a
    #loc_2
    #...
    #1 004004 004005...
    #1 006004 004000...
    #...
    #2 001002 002001...
    #...
    try:
        fl = open(fi, "r")
    except:
        print "[ERROR] File", fi, "does not exist. Program can't proceed."
        exit(0)
    l = fl.readlines()
    fl.close()
    nloci = 0
    npop = 0
    nindiv = 0
    txtLoci = ""
    lpop = []
    lloci = []
    if T1location != "":
        lselected = getSelectedLoci()
    else:
        lselected = []
    for li in l:
        #print "IND", nindiv
        lis = li[:-1].split(" ")
        lociLine = ""
        nloci = len(lis[2:])
        if lloci == []:
            lloci = [[]]*nloci

        for loc in range(nloci):
            alll = lis[2:][loc]

            vallele = int(alll) + 1
            if vallele not in lloci[loc]:
                lloci[loc] += [vallele]
            sval = str(vallele)
            if loc%2 == 0:
                lociLine += " "
            lociLine += "0"*(3-len(sval)) + sval

        if lis[0] not in lpop:
            lpop += [lis[0]]
            npop += 1
        txtLoci += str(npop)+ "  " + lociLine + "\n"
        nindiv += 1

    txtLoci = formatLoci(nloci, lselected) + "\n" + txtLoci
    txtLoci = '  '.join([ str(npop), str(nloci/2), str(max([max(i) for i in lloci])), str(ndigit)]) + "\n" + txtLoci + "\n"
    fop = open(fo, "w")
    fop.write(txtLoci)
    fop.close()
    return txtLoci

if __name__ == "__main__":
    sa = sys.argv
    fout = ""
    possibleFormats = {"F":"fstat", "G":"genepop", "I":"ind"}
    currentFormats = []
    for f in possibleFormats:
        if f in sa:
            sa.remove(f)
            currentFormats += [f]
    if currentFormats == []:
        print "[INFO]", "Add F, G or I as last argument to define the format (F:fstat, G:genepop, I:ind.txt generation)"
        exit(0)
        #currentFormats = ["F", "G"]
    for f in currentFormats:
        if len(sa)>1:
            fi = sa[1]
            #print "fi", fi

            if len(sa) > 2:
                fout = sa[2]
        else:
            fi = "../METAPOP/outputs/genotypes/genes_S1_1.csv"
            print "[INFO]", "Input file required as first argument. Run launched with input file = " + fi

        if fout == "":
            fout = ".".join(fi.split(".")[:-1]) + "_" + possibleFormats[f] + "_out.txt"
            print "[INFO]", "No output file precised as second argument, default value = ", fout

        if f == "F":
            convertMTPtoFSTAT(fi, fout, "_")
            print "[SUCCESS]", fi, "converted to Fstat format (",fout,")"
        elif f == "G":
            convertMTPtoGENEPOP(fi, fout, T1location = "_")
            print "[SUCCESS]", "Conversion done for", fi, " into Genepop format (", fout, ")"
        elif f == "I":
            # DETECT IF INPUT IS GENEPOP OR FSTAT
            convertToIND(fi, fout)
            print "[SUCCESS]", fi, "converted to ind format (", fout, ")"
        else:
            print "[ERROR]", "format", f, "not supported."



