-----------------------------------------------------------------------------
-- |
-- Module      :  FileFormat.TSPLIB
-- Copyright   :  (c) Richard Senington 2011
-- License     :  GPL-style
-- 
-- Maintainer  :  Richard Senington <sc06r2s@leeds.ac.uk>
-- Stability   :  provisional
-- Portability :  portable
-- 
-- Partial loading routines for the TSPLIB file format.
-- The format itself has a large number of variations, 
-- and this has only been designed to load the @tsp@ and 
-- @atsp@ variants. It has been tried on all the files
-- from the repository in these classes and it parses
-- them at least. 
--
-- Relies upon the @CombinatorialOptimisation.TSP@ library.
--
-- Currently this does not use the Haskell parsing 
-- libraries, nor ByteString, just some custom built
-- routines.
----------------------------------------------------------------------------- 

module FileFormat.TSPLIB(
  loadTSPFile
  )where

import CombinatorialOptimisation.TSP
import Data.Maybe
import qualified Data.Map as M
import qualified Data.Array as A
import Data.List

-- going to use a fixed point internal representation, only doing addition afterall
-- not happy about how I have bolted this in.

import CombinatorialOptimisation.TSP.FixedPoint

{- |  Simple 2d Euclidian. -}

euclidianDistance :: (Double,Double)->(Double,Double)->Double
euclidianDistance (a,b) (c,d) = sqrt ((a-c)*(a-c)+(b-d)*(b-d))

{- |  Always rounded up Euclidian. -}

euclidianDistanceCeil :: (Double,Double)->(Double,Double)->Double
euclidianDistanceCeil a b =  fromIntegral . ceiling     $  euclidianDistance a b

{- |  For only two types of file. Basically a form of rounded Euclidian. -}

pseudoEuclideanDistance :: (Double,Double)->(Double,Double)->Double
pseudoEuclideanDistance (a,b) (c,d) = let e = sqrt (((a-c)*(a-c)+(b-d)*(b-d))  /10 )
                                          f = fromIntegral (floor e)
                                      in if f<e then f+1 else f

{- |  Distance between two points on the Earth's surface. This is based upon code that 
      another developer wrote, based on the code proposed by TSPLIB itself. I am 
      not sure if this is for an earth shape, or just a sphere. I suspect earth shape. -}

geoDistance :: (Double,Double)->(Double,Double)->Double
geoDistance (x1,y1) (x2,y2) = encodeFloat (floor dij) 0
  where
    q1 = cos (lon1  - lon2)
    q2 = cos (lat1 - lat2)
    q3 = cos (lat1 + lat2)
    lon1 = degConvert y1
    lon2 = degConvert y2
    lat1 = degConvert x1
    lat2 = degConvert x2
    
    dij =  6378.388 * (acos( 0.5*((1.0+q1)*q2 - ((1.0-q1)*q3) )) ) + 1.0

    degConvert m = let deg = encodeFloat (floor m) 0
                       miN = m - deg
                   in 3.141592 * (deg + (5.0 * miN/3.0))/180.0

{- |  A data type for representing bits of the specification section of a file.
      Assumed to be used later as a list of Specifications. -}

data Specification = IGNORE String | USEFUL String String | ENDSPEC String | FAIL String deriving Show

{- |  Test for filtering specification lines, so we only get data we might want to use. Could be
      got rid of as most of the time I do dictionary lookups on the output of the specification
      reading routines. -}

isUsefulSpec (USEFUL _ _) = True
isUsefulSpec _ = False

{- |  Helper routine for @readSpecification@. Reads a single line (assumes 
      that input is already line by line). -}

readSpecificationLine :: String->Specification
readSpecificationLine s 
  | likeString "NAME" s = IGNORE s
  | likeString "TYPE" s = USEFUL "Type" (trim s)
  | likeString "NODE_COORD_SECTION" s = ENDSPEC "NODE COORD"
  | likeString "EDGE_WEIGHT_SECTION" s = ENDSPEC "EDGE WEIGHT"
  | likeString "COMMENT" s = IGNORE s
  | likeString "DIMENSION" s = USEFUL "Dimension" (trim s)
  | likeString "DISPLAY_DATA_TYPE" s = IGNORE s
  | likeString "EDGE_WEIGHT_TYPE" s = USEFUL "EdgeWeightType" (trim s)
  | likeString "EDGE_WEIGHT_FORMAT" s = USEFUL "EdgeWeightFormat" (trim s)
  | otherwise = FAIL $ "unrecognised field in specification : "++s
  where
    likeString q s = take (length q) s == q
    trim s = let s' = (dropWhile (==' ')) . (drop 1) . (dropWhile (/=':')) $ s 
             in reverse . (dropWhile (==' ')) . reverse $ s'

{- |  Helper routine for @loadTSPFile@. Reads the specification section of a file. -}

readSpecification :: [String]->([Specification],[String])
readSpecification [] = ([FAIL "seem to have run out of data, without ending the specification phase"],[]) 
readSpecification (s:ss) = let p = readSpecificationLine s
                               (rs,es) = readSpecification ss
                           in case p of 
                             ENDSPEC k  -> ([USEFUL "DATA PART TYPE" k],ss)
                             IGNORE _   -> (p:rs,es)
                             FAIL _     -> (p:rs,es)
                             USEFUL _ _ -> (p:rs,es)
 
{- |  Loads a TSPLIB style file. The first parameter is the internal 
      storage type from @CombinatorialProblems.TSP@. It allows for 
      full matrix, triangular matrix and full recalculation. If the 
      requested internal storage cannot be used with the file, this 
      will throw an error (e.g. recomputation where you are given a 
      full matrix in the file).

      The second parameter is the file path. -}

loadTSPFile :: InternalStorage->FilePath->IO TSPProblem
loadTSPFile storageType fName 
  = do rawContents<-readFile fName
       let (spec,remainder) = readSpecification $ lines rawContents
                       
       -- mapM_ print spec
       -- print ""
  
       let specList = map (\(USEFUL a b)->(a,b)) . filter isUsefulSpec $ spec
       -- print specList
       let dataPart = fromJust $ lookup "DATA PART TYPE" specList
       let numNodes = read $ fromJust $ lookup "Dimension" specList
       case dataPart of 
         "NODE COORD"  -> return $ loadFromNodePositions storageType numNodes specList (takeWhile (/="EOF") remainder)
         "EDGE WEIGHT" -> return $ loadFromMatrix storageType numNodes specList (takeWhile (/="EOF") remainder)
         _             -> error "Unsupported data section"

{- |  Helper routine for @loadTSPFile@. This assumes the input is just points and the 
      distances must be calculated. -}

loadFromNodePositions :: InternalStorage->Int->[(String,String)]->[String]->TSPProblem
loadFromNodePositions storageType numCities spec d 
  = let d' = map ((\[a,b]->(a,b)) . (map readF) . (drop 1) . words) d
        posArr = A.listArray (0 , numCities-1) d'

        cities = [0 ..(numCities-1)]
        
        explicit = A.listArray (0,numCities*numCities-1)  [realToFrac $ distFunc (posArr A.! a) (posArr A.! b) | a<-cities,b<-cities]
        triangular = A.listArray (0,sum [0..numCities])  [realToFrac $ distFunc (posArr A.! a) (posArr A.! b) | a<-cities,b<-[0..a]]
       
        p = case storageType of
              ExplicitMatrix -> \x y->explicit A.! (x * numCities + y)
              TriangularMatrix -> (\x y->let x' = min x y; y' = max x y in triangular A.! (div (y'*y'+y') 2 + x'))
              Recomputation -> \a b->realToFrac $ distFunc (posArr A.! a) (posArr A.! b)
    in TSPProblem 0 M.empty p numCities M.empty M.empty
  where
    readF = read :: (String->Double)
    distanceFunctionSpec = fromJust $ lookup "EdgeWeightType" spec
    distFunc = case distanceFunctionSpec of
       "GEO" -> geoDistance
       "EUC_2D" -> euclidianDistance
       "ATT"    -> pseudoEuclideanDistance
       "CEIL_2D"-> euclidianDistanceCeil
       _        -> error $ "unsupported distance function : "++distanceFunctionSpec
 
{- |  Helper routine for @loadTSPFile@. This assumes the input data is a matrix of some form, 
      and loads it. -}

loadFromMatrix  :: InternalStorage->Int->[(String,String)]->[String]->TSPProblem
loadFromMatrix Recomputation _ _ _ = error "Cannot load matrix and store in recomputation form"
loadFromMatrix storageType numCities spec d
  | distanceFunctionSpec /= "EXPLICIT" = error "loading from non explicit matrix? (does not make sense)"
  | storageType == TriangularMatrix = TSPProblem 0 M.empty (\x y->let x' = min x y; y' = max x y in triangularArray A.! (div (y'*y'+y') 2 + x')) numCities M.empty M.empty
  | storageType == ExplicitMatrix = TSPProblem 0 M.empty (\x y->explicitArray A.! (y * numCities + x)) numCities M.empty M.empty
  where
    readF = realToFrac . (read :: (String->Double))
    distanceFunctionSpec = fromJust $ lookup "EdgeWeightType" spec
    inputNumberSequence = concatMap ((map readF) . words) d -- not entirely trusting line breaks in file format, so turning into list of numbers, and putting in coords later
    cities = [0 ..(numCities-1)]

    blankMatrix = foldl' (\m d->M.insert (d,d) 0 m) M.empty cities
    fillPair m ((a,b),c) = M.insert (b,a) c $ (M.insert (a,b) c m)
    makeFromTri = foldl' fillPair blankMatrix

    loadedMap = case fromJust $ lookup "EdgeWeightFormat" spec of
           "FULL_MATRIX" -> foldl' (\m (d,c)->M.insert d c m) M.empty (zip [(a,b) | a<-cities,b<-cities] inputNumberSequence)
           "UPPER_ROW"      -> makeFromTri $ zip [(a,b) | a<-cities,b<-[a..(numCities - 1)],a/=b] inputNumberSequence
           "LOWER_ROW"      -> makeFromTri $ zip [(a,b) | a<-cities,b<-[0..a],a/=b] inputNumberSequence
           "UPPER_DIAG_ROW" -> makeFromTri $ zip [(a,b) | a<-cities,b<-[a..(numCities - 1)]] inputNumberSequence
           "LOWER_DIAG_ROW" -> makeFromTri $ zip [(a,b) | a<-cities,b<-[0..a]] inputNumberSequence

           "UPPER_COL"      -> makeFromTri $ zip [(b,a) | a<-cities,b<-[a..(numCities - 1)],a/=b] inputNumberSequence
           "LOWER_COL"      -> makeFromTri $ zip [(b,a) | a<-cities,b<-[0..a],a/=b] inputNumberSequence
           "UPPER_DIAG_COL" -> makeFromTri $ zip [(b,a) | a<-cities,b<-[a..(numCities - 1)]] inputNumberSequence
           "LOWER_DIAG_COL" -> makeFromTri $ zip [(b,a) | a<-cities,b<-[0..a]] inputNumberSequence
 
    explicitArray = A.listArray (0,numCities*numCities-1) [loadedMap M.! (a,b) | a<-cities,b<-cities]
    triangularArray = A.listArray (0,sum [0..numCities])  [loadedMap M.! (a,b) | a<-cities,b<-[0..a]]