-- Functions to load user data
-- The path to each file should be in an index called user_data_index.txt
-- Then all files in this index are loader as user data
-- The program decides what to do with each file section depending on a header content
-- This header starts with a # followed by datatype 
-- everithing after is interpreted as arguments


-- See if the file exists
local function file_exists(file)
  local f = io.open(file, "rb")
  if f then f:close() end
  return f ~= nil
end


-- get all lines from a file, returns an empty 
-- list/table if the file does not exist
local function lines_from(file)
  if not file_exists(file) then return {} end
  lines = {}
  for line in io.lines(file) do 
    lines[#lines + 1] = line
  end
  return lines
end


-- checks is a string is blank
-- nil also returns true as does empty string
local function isBlank(x)
  if( x == nil ) then return true end
  return not not tostring(x):find("^%s*$") -- not not is intended !
end


-- Splits a string a each time it ecounters at least one ocurrences of sep
local function mysplit(inputstr, sep)
    if sep == nil then sep = "%s" end
    local t={} ; i=1
    for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
        t[i] = str
        i = i + 1
    end
    return t
end


-- Converts a table of strings to a table of numerics
-- Returns continuous table of numerics (no nils possible)
-- Non convertable strings are ignored with a warning
local function convert_to_numerics(line)
    if isBlank(line) then return {} end
    local words = mysplit(line, "%s") -- split string at spaces
    local numbers = {}
    for i = 1, #words do
        local temp = tonumber(words[i])
        if(temp == nil) then 
            print("Warning : trying to convert non-numeric sring to numeric")
            print(" => Value ignored.")
        else numbers[#numbers + 1] = temp end
    end    
    return numbers
end



-- Specific to our input format :
-- Split a continuous table of lines each time a line containing a "#" sign.
-- We call such line a header, the rest data.
-- In a header line, everithing before the fisrt # (included) is removed
-- Data lines before the frist header are ignored
-- Head or trail whitespaces and blank / empty line are taken care of
-- Header line might be empty or blank
-- One header + data is a frame
-- Each frame is gauranteed to be at least one line long 
--  => At least the header, but this line might be empty or blank.
-- No other particular checks are performed on data lines or header contentent
local function file_split(fileData)
    local frames_list = {}
    -- Before the fisrt header line, everything in written in temp_frame goes to trash 
    local temp_frame = {}
    for i = 1, #fileData, 1 do
        local line = fileData[i]
        --Remove begin/end spaces
        line = line:match( "^%s*(.-)%s*$" )
        -- Find if a # is present and return its position
        local a, b = line:find("#")
        if(a == nil) then  -- If no # Just store  non blank line in the current table
            if not isBlank(line) then temp_frame[#temp_frame + 1] = line end
        else 
            frames_list[#frames_list + 1] = {} -- New table
            -- Tricky: temp_frame acts as a reference to a position in frames_list
            temp_frame = frames_list[#frames_list] 
            line = line:sub(a+b, nil)  --Truncate everything before the #
            line = line:match( "^%s*(.+)" ) --Leading whitespace removed
            if line == nil then line = "" end
            temp_frame[#temp_frame + 1] = line
        end 
    end
    return frames_list
end


--------------------------------------------------------------------------------
-- A bunch of similar looking functions that convert a frame accorting to its type.
-- It consists of two steps: 
    -- Parsing the header to put each value in the rightly named field
        -- No check, only position based. Needs at least X fields
    -- Parsing data lines to convert them from string to numeric table
-- Failure returns nil
-- rTab.fields is used to classify all those traits in a tree like way (in the order indicated)
-- Id is always the first classifier

local function load_flux_matrix(header, fileLines)
    -- Normal header structure : DataType Species propagule Time
    if #header < 4 then return nil end
    rTab = {}
    rTab.fields = { "id", "species", "propagule", "time" }
    rTab.id = header[1]
    rTab.species =  header[2]
    rTab.propagule = header[3]
    --rTab.time = "1" --header[4]
    rTab.time = header[4]
    rTab.data = {}
    
    -- Load Data
    for i = 2, #fileLines do -- 1 is unsplitter header        
        rTab.data[#rTab.data + 1] = {} -- New line
        rTab.data[#rTab.data] = convert_to_numerics(fileLines[i])
    end
    return rTab
end

local function load_map(header, fileLines)
    -- Same format for Zopt, E and selInt
    -- Normal header structure : DataType Species Trait Time
    if #header < 3 then return nil end
    local rTab = {}
    rTab.fields = { "id", "trait", "time" }
    rTab.id = header[1]
    rTab.trait = header[2]
    rTab.time = header[3]
    rTab.data = {}
    
    -- Load Data
    for i = 2, #fileLines do -- 1 is unsplitted header, jump over      
        rTab.data[#rTab.data + 1] = {} -- New line
        rTab.data[#rTab.data] = convert_to_numerics(fileLines[i])
    end
    return rTab
end

local function load_map_species(header, fileLines)
    -- Same format for Zopt, E and selInt
    -- Normal header structure : DataType Species
    if #header < 2 then return nil end
    local rTab = {}
    rTab.fields = { "id", "species" }
    rTab.id = header[1]
    rTab.species = header[2]
    rTab.data = {}

    -- Load Data
    for i = 2, #fileLines do -- 1 is unsplitted header, jump over
        rTab.data[#rTab.data + 1] = {} -- New line
        rTab.data[#rTab.data] = convert_to_numerics(fileLines[i])
    end
    return rTab
end

local function load_individuals(header, fileLines)
    -- Format for individuals
    -- Normal header structure : DataType Species
    if #header < 2 then return nil end
    local rTab = {}
    rTab.fields = { "id", "species" }
    rTab.id = header[1]
    rTab.species =  header[2]
    rTab.data = {}
    
    -- Load Data
    for i = 2, #fileLines do -- 1 is unsplitted header, jump over      
        rTab.data[#rTab.data + 1] = {} -- New line
        rTab.data[#rTab.data] = convert_to_numerics(fileLines[i])
    end
    return rTab
end 

local function load_allelic_effects(header, fileLines)
    -- Format for allelic effects
    -- Normal header structure : DataType Species Trait
    if #header < 3 then return nil end
    local rTab = {}
    rTab.fields = { "id", "trait", "ltype"}
    rTab.id = header[1]
    rTab.trait =  header[2]
    rTab.ltype =  header[3]
    rTab.data = {}
    
    -- Load Data
    for i = 2, #fileLines do -- 1 is unsplitted header, jump over      
        rTab.data[#rTab.data + 1] = {} -- New line
        rTab.data[#rTab.data] = convert_to_numerics(fileLines[i])
    end
    return rTab
end 
--------------------------------------------------------------------------------



-- Specific to our input format :
-- A frame generated by function file_split is analysed to create a formated frame
-- In case of failure, return value is a nil
-- Here we perform two main operation :
    -- Identify the type of data from the first field of the header
    -- Format the frame using the function associated with the type
local function analyse_data(frame)
    --Check if nil/at least two lines (1 header + 1 data)
    if frame == nil or #frame <= 1 then return nil end
    -- Get header and check if blank
    local header = frame[1]
    if isBlank(header) then return nil end
    -- Split at whitespaces
    local words = mysplit(header, "%s")
    if words == nil or  #words < 1 then return nil end

    formated_frame = {}
    if      words[1] == "flux_matrix" then
        formated_frame = load_flux_matrix( words, frame )
    elseif words[1] == "flux_pattern" then
        formated_frame = load_flux_matrix( words, frame )
    elseif words[1] == "zopt" then
        formated_frame = load_map( words, frame )
    elseif words[1] == "e" then
        formated_frame = load_map( words, frame )
        --print("E")
        --for key,value in pairs(formated_frame) do print(key,value) end
    elseif words[1] == "selection_intensity" then
        formated_frame = load_map( words, frame )
    elseif words[1] == "demography" then
        --print("FORMATE DEMO")
        formated_frame = load_map_species( words, frame )
        --for key,value in pairs(formated_frame) do print(key,value) end
    elseif words[1] == "individuals" then
        formated_frame = load_individuals( words, frame )
        --print(formated_frame.data[1][2])
    elseif words[1] == "allelic_effects" then
        formated_frame = load_allelic_effects( words, frame )
    -- ADD OTHER DATA TYPES HERE
    else return nil end
    return formated_frame
end







-- Opens a user file and converts it to formated_frames
-- Each time a frame fails to be formatted, provides a warning and ignores it
-- Returns empty table if  file does not exist or empty
local function load_user_data(fileName)
    if not file_exists(fileName) then 
        print("Warning : could not find the following file "..fileName) 
        return {}
    end
    
    local fileLines = lines_from(fileName)
    if #fileLines == 0 then 
        print("Warning : empty file at "..fileName) 
        return {}
    end
    
    local frames = file_split(fileLines)
    if #frames == 0 then --WAS #fileLines, changed but not verified
        print("Warning : no valid frame at "..fileName) 
        return {}
    end

    local formated_frames = {}
    for i = 1, #frames, 1 do
        local temp =  analyse_data(frames[i])
        if temp == nil then 
            print("Could not read a chunk of data")
            print("First line : ", frames[i][1])
        end
        formated_frames[#formated_frames + 1 ] = temp
    end
    return formated_frames
end




--Insert someting in a table a the following path
-- Ex : => target[path1][path2][path3] = data
local function insert_in_path(target, path, data)
    temp = target
    -- Create the path for n-1 steps
    for i = 1, #path - 1 do
        local name = path[i]        
        if temp[name] == nil then temp[name] = {} end
        temp = temp[name]
    end
    -- Warning if already existing
    if temp[path[#path]] ~= nil then
        io.write("Warning, overlapping frames \n\t".. path[#path] .." in: ") 
        for k = 1, #path do io.write(path[k].." ") end
        io.write("\n")
    end
    -- Write data anyway
    temp[path[#path]] = data
    -- returns nothing (target is modified)
end


-- Arranging frames in a tree like way according to fields
local function sort_formated_frames(formated_frames)
    local types_table = {}
    for i = 1, #formated_frames do -- For all frames
        local frame = formated_frames[i]
        local path = {}
        for j = 1, #frame.fields do path[#path+1] = frame[frame.fields[j]] end
        insert_in_path(types_table, path, frame.data)
    end
    return types_table
end

-- Function that exectutes the lua code from a file in a sandbox
-- and returns the global environement
-- Allowed functions must be explicitly listed in env (otherwise, empty table will do)
local function load_file_globals(scriptfile, env)
    local func = assert(loadfile(scriptfile, 't', env))
    func() -- Chunk needs to be executed at least once
    return env
end


-- Loads all files in the fileNames table as user data
-- On file may contain more than one set of data (no restriction)
local function load_data(fileNames)   
    local total_formated_frames = {}
    for i = 1, #fileNames, 1 do
        local fileName =  fileNames[i]
        local  formated_frames = load_user_data(fileName)
        append_table(total_formated_frames, formated_frames)
    end
    -- Sort in a tree like table
    local sorted_frames = sort_formated_frames(total_formated_frames)

    return sorted_frames
end


-- Loads traits from a list of filesnames
-- Only on trait per file
-- Traits stored by [speciesId] and then [traitId]
local function load_traits(fileNames)
    local usrTraits = {}
    for i = 1, #fileNames, 1 do  
        local fileName =  fileNames[i]
        local env = {} -- sets an empty environment
        local temp  = load_file_globals(fileName, env)
        --assert(temp.fileType == "trait",
        --    "File "..fileName.. " fileType ~= \'trait\'")
        assert(type(temp.id) == 'string', 
            "File "..fileName.. " does not provides field id")
        assert_type(temp.species,'string', 
            "File "..fileName.. " does not provides a field species")
        usrTraits[ #usrTraits + 1 ] = temp
    end
    return usrTraits
end


-- Loads species from a list of filesnames 
-- Similar to load traits, kept as different function to allow more secific checksS
-- Only on species per file
local function load_species(fileNames)
    local usrSpecies = {}
    for i = 1, #fileNames, 1 do  
        local fileName =  fileNames[i]
        local env = {} -- sets an empty environment
        local temp = load_file_globals(fileName, env)
        --assert(temp.fileType == "species",
        --    "File"..fileName.. " fileType ~= \'species\'")
        -- Id check here because needed for matching
        assert(type(temp.id) == 'string', 
            "File"..fileName.. " does not provides a string id field")
        usrSpecies[ #usrSpecies + 1 ] = temp 
    end
    return usrSpecies
end


-- Loads an index file (list of filenames)
-- Basic cleaning (comments, extra spaces)
-- On name per line !
-- adds the path to index file to each file
local function load_index_file(fileName, myPath)
    local fullName = myPath..fileName
    -- Checks index existence
    if not file_exists(fullName) then 
        print("Warning: could not find the following file "..fullName) 
    end
    -- Loads all raw lines from the index
    local fileLines = lines_from(fullName)

    -- Lines cleaning loop
    local lines = {}
    for _, line in pairs(fileLines) do
        -- Removes white spaces at head/tail
        line = line:match( "^%s*(.-)%s*$" )
        -- Removes comments
        local a, _ = line:find("[--]")
        if a ~= nil then line = line:sub(1, a-1) end
        -- Second round of removing whit spaces (before the #)
        line = line:match( "^%s*(.-)%s*$" ) 
        -- Copy result in non blank
        if not isBlank(line) then lines[ #lines + 1 ] = line  end
    end
    
    -- Adds path
    for i = 1, #lines do
        lines[i] = myPath..lines[i]
    end
    
    return lines
end


--------------------------------------------------------------------------------
--------------------------------- Module interface -----------------------------
--------------------------------------------------------------------------------
local userData = {}
userData.load_data = load_data
userData.load_traits = load_traits
userData.load_species = load_species
userData.load_index_file = load_index_file
userData.load_file_globals = load_file_globals
return userData
