CombinatorialOptimisation/TSP.hs

-----------------------------------------------------------------------------
-- |
-- Module      :  CombinatorialOptimisation.TSP
-- Copyright   :  (c) Richard Senington 2011
-- License     :  GPL-style
-- 
-- Maintainer  :  Richard Senington <sc06r2s@leeds.ac.uk>
-- Stability   :  provisional
-- Portability :  portable
-- 
-- A library for the representation and manipulation of travelling salesperson
-- problems.
-- The approach taken is the creation of a complex data structure called 
-- TSPProblem which contains both the problem, the current solution and 
-- the current value of the route.
-- The route is stored as a dictionary (@Data.Map@) of vertex indexes
-- to a pair of values, the previous vertex and the next vertex in the
-- sequence. This is to facilitate changing the route quickly, and
-- avoid searching for data in lists.
--
-- The data structure also contains two additional fields, the 
-- @routeElementToIndex@ and @indexToRouteElement@ components.
-- These exist to allow manipulation either by the vertex number
-- or the position in the current solution. 
-- Solutions are hamiltonian cycles.
-- For ease of reasoning it is recommended that users do not 
-- attempt to move vertex 0, or index 0, so that solutions
-- are cycles from 0 to 0. I may change this in the future to 
-- lock this down a bit. In the meantime, there is no
-- actual problem with making these changes, however 
-- later manipulations may not match up clearly with 
-- the way the show routines work.
--
-- Currently only two functions are provided for manipulating routes,
-- either by position in the sequence (@exchangeCitiesOnIndex@) or 
-- by vertex name (@exchangeCities@).
--
-- I am not sure how this will clearly support meta-heuristics that
-- work by deleting edges and recombining subsequences. However 
-- since I am storing association lists I think it should be possible 
-- to make this work, I will worry about it later.
----------------------------------------------------------------------------- 

module CombinatorialOptimisation.TSP(
  TSPProblem(TSPProblem,currentPrice,routeMap,edgePrices,numCities,routeElementToIndex,indexToRouteElement),
  InternalStorage(ExplicitMatrix,TriangularMatrix,Recomputation),
  showEdgeWeights,
  exchangeCities,
  exchangeCitiesOnIndex,
  evaluateRouteNaive,
  randomiseRoute,
  setRoute,
  makeASymmetricTSPMap,
  makeSymmetricTSPMap,
  makeEuclideanTSPMap
)where

import qualified Data.Map as M
import qualified Data.Array as A
import System.Random
import Data.List

{- |  The data type for carrying the combination problem and solution to 
      the TSP. The route is stored as a dictionary of associations 
      from vertex name to a pair of values, the name of the preceding 
      vertex and the next vertex. This forms an infinite loop, so 
      use carefully.

      The @routeElementToIndex@\/@indexToRouteElement@ pair store 
      fixed indexes to the cities. This is intended to allow 
      a dumb heuristic to decide to switch elements 0 and 2, 
      knowing they must be separated by 1 element, rather than
      vertices 0 and 2, which may be next to each other, or 
      very different parts of the cycle.
-}

data TSPProblem = TSPProblem { currentPrice :: Float,
                               routeMap :: M.Map Int (Int,Int),
                               edgePrices :: (Int->Int->Float),
                               numCities :: Int,
                               routeElementToIndex :: M.Map Int Int,
                               indexToRouteElement :: M.Map Int Int
                             }

{- |  There are three possible internal storage forms. A full explicit matrix, an upper triangular matrix or recomputation 
      from data points. The advantage of full explicit is speed, but it takes more memory. It is also the only option for 
      asymmetric TSP problems. The triangular matrix is also fast, but can only be used in symmetric problems, and also 
      still requires quite a bit of memory. Recomputation is the last option, it is slow because it is no longer a lookup
      table, but will take much less room. Can only be used with problems where the distance between two points can be
      calculated. Currently I am only supporting symmetric TSPs for this.
-}

data InternalStorage = ExplicitMatrix | TriangularMatrix | Recomputation deriving (Show,Eq) -- just in case I need these

instance Show TSPProblem where
  show t = concat ["TSPProblem of ",show . numCities $ t,
                   " cities\n    Current Solution ",show r,
                   "\n    Costing ",show . currentPrice $ t,"\n"]
    where
      rm = snd . ((M.!) (routeMap t))
      r = 0:(takeWhile (\x->x/=0) $ iterate rm (rm 0))++[0]

{- |  Converts the lookup table of a problem into a comma and newline delimited
      string. This should facilitate copying into spreadsheets for checking the 
      problem being used and validating solutions by hand. -}

showEdgeWeights :: TSPProblem->String
showEdgeWeights t = headerRow ++ concatMap makeRow nc
  where
    ep = edgePrices t
    nc = [0 .. numCities t-1]
    headerRow = ',': concat (intersperse "," $ map show [0..numCities t-1]) ++ "\n"
    makeRow i = show i ++ "," ++ concat (intersperse "," [ show (ep i' i) |  i'<-nc]) ++"\n"

{- |  Will perform a switch of 2 cities in the path. This is by city name, not current index
      in the path. It looks up the current indexes by city name and passes the work off to 
      @exchangeCitiesOnIndex@.  -}

exchangeCities :: Int->Int->TSPProblem->TSPProblem
exchangeCities a b t = exchangeCitiesOnIndex (min i1 i2) (max i1 i2) t 
  where
    i1 = routeElementToIndex t M.! a
    i2 = routeElementToIndex t M.! b
 
{- |  Performs the bulk of the work for exchanging elements of the cycle.
      It assumes that the order of the indexes is increasing (e.g. 0 2 not 2 0).
      While changing the order it will also calculate the change in value of the 
      route and update this. This is performed fairly efficiently by finding the 
      edges being removed, and the edges being created and adding the difference 
      between the two to the current price. -}

exchangeCitiesOnIndex :: Int->Int->TSPProblem->TSPProblem
exchangeCitiesOnIndex i1 i2 t 
  | d == 0 = t
  | d == 1 = t{routeMap=rAdj,currentPrice=currentPrice t + priceChangeAdj,routeElementToIndex=t2',indexToRouteElement=t1'}
  | otherwise = t{routeMap=r',currentPrice=currentPrice t + priceChange,routeElementToIndex=t2',indexToRouteElement=t1'}
  where 
    d = abs (i1 - i2)

    -- basic setup
    r = (routeMap t)
    a = indexToRouteElement t M.! i1
    b = indexToRouteElement t M.! i2
    p = edgePrices t
    ((a1,a2),(b1,b2)) = (r M.! a,r M.! b)

    -- usual code
    priceChange = sum [p a1 b,p b a2,p b1 a,p a b2] - sum [p a a2,p b b2,p a1 a,p b1 b]
    r' = foldl' (\m (k,f) -> M.adjust f k m) r [(a,\_->(b1,b2)),(b,\_->(a1,a2)),(a1,\(x,y)->(x,b)),(a2,\(x,y)->(b,y)),(b1,\(x,y)->(x,a)),(b2,\(x,y)->(a,y))]

    -- index exchange
    t1 = indexToRouteElement t
    t2 = routeElementToIndex t
    t2' = M.insert b i1 (M.insert a i2 t2) 
    t1' = M.insert i1 b (M.insert i2 a t1) 
    
    -- adjacent exchange, special case
    priceChangeAdj = sum [p a1 b,p b a,p a b2] - sum [p a1 a,p a b,p b b2]
    rAdj = foldl' (\m (k,f) -> M.adjust f k m) r [(a1,\(x,y)->(x,b)),(b2,\(x,y)->(a,y)),(a,\_->(b,b2)),(b,\_->(a1,a))]

{- |  A brute force recalculation of the current length of the path. Use sparingly.-}

evaluateRouteNaive :: TSPProblem->TSPProblem
evaluateRouteNaive t = t{currentPrice=evalRoute 0}
  where
    ep = edgePrices t
    rm = snd . ((M.!) (routeMap t))
    evalRoute x = let n = rm x
                  in if n==0 then ep x n 
                             else ep x n + evalRoute n

{- |  Take a path through the system and a problem, insert the path into the system, 
      calculating distances and setting up appropriate look up tables. It does not
      validate the list in terms of going through all the cities, or going through 
      a city more than once (though this is likely to break other parts of the system 
      very very fast). It does organise the list so that the starting node is vertex 0. 

      Uses the @evaluateRouteNaive@ to calculate the length of the path via a brute
      force method. This is not expected to be used frequently. -}

setRoute :: [Int]->TSPProblem->TSPProblem
setRoute path t = evaluateRouteNaive t{routeMap=newRoute,indexToRouteElement=in1,routeElementToIndex=in2} 
  where
    l = dropWhile (/=0) $ cycle path 
    l' = tail l
    l'' = tail l'
    (k,k':_) = span (\(_,x,_)->x/=0) $ zip3 l l' l''
    newRoute = foldl' (\m (a,b,c) -> M.insert b (a,c) m) M.empty (k':k)
    in1 = M.fromList $ zip [0..] (take (numCities t) l)
    in2 = M.fromList . (map swap) . M.assocs $ in1
    swap (a,b)  = (b,a)

{- |  Shuffles a simple list of cities and then passes off the work to setRoute. -}

randomiseRoute :: RandomGen g=>g->TSPProblem->TSPProblem
randomiseRoute g t = setRoute (0:map snd (sort (zip (randoms g :: [Float]) [1 .. numCities t -1]))) t  

{- |  Construct a TSPProblem instance for an Asymmetric TSP. That is, the distance
      from A-B is the not necessarily the same as B-A. The actual route will 
      not be set up initially, the dictionaries will be empty. This could be 
      used directly for a global search system (branch and bound), or use in 
      conjunction with @setRoute@ or @randomiseRoute@ to initialise for local search. 
      Internal data structure is always fully explicit matrix.-}

makeASymmetricTSPMap :: RandomGen g=>(Float,Float)->Int->g->TSPProblem
makeASymmetricTSPMap distanceLimits numCities g 
  = let cities = [0 ..(numCities-1)]
        cityCoords = [(a,b) | a<-cities,b<-cities,a/=b]
        matrix = M.fromList $ zip cityCoords (randomRs distanceLimits g)
        -- p' = (\x y->M.findWithDefault 0 (x,y) matrix)
        explicit = A.listArray (0,numCities*numCities-1)  [M.findWithDefault 0 (a,b) matrix | a<-cities,b<-cities]
    in TSPProblem 0 M.empty (\x y->explicit A.! (x * numCities + y)) numCities M.empty M.empty
    -- TSPProblem 0 M.empty p numCities M.empty M.empty

{- |  Construct a TSPProblem instance for a Symmetric TSP. That is, the distance
      from A-B is the same as B-A. The actual route will not be set up initially,
      the dictionaries will be empty. This could be used directly for a global 
      search system (branch and bound), or use in conjunction with @setRoute@ or 
      @randomiseRoute@ to initialise for local search. Should be noted that this
      does not create locations and calculate distances, but rather randomly 
      assigns distances to each edge, making them symmetric. -}

makeSymmetricTSPMap :: RandomGen g=>InternalStorage->(Float,Float)->Int->g->TSPProblem
makeSymmetricTSPMap Recomputation _ _ _ = error "Cannot support recomputation, please use alternative storage, or makeEuclideanTSPMap"
makeSymmetricTSPMap storageType distanceLimits numCities g 
  = let cities = [0 ..(numCities-1)]
        cityCoords = [(a,b) | a<-cities,b<-take (a+1) cities,a/=b ]
        f e ((a,b),c) = M.insert (b,a) c (M.insert (a,b) c e)
        matrix = foldl f M.empty (zip cityCoords (randomRs distanceLimits g))
        explicit = A.listArray (0,numCities*numCities-1)  [M.findWithDefault 0 (a,b) matrix | a<-cities,b<-cities]
        triangular = A.listArray (0,sum [0..numCities])  [M.findWithDefault 0 (a,b) matrix | a<-cities,b<-[0..a]]
        p = if storageType == ExplicitMatrix then (\x y->explicit A.! (x * numCities + y))
                                             else (\x y->let x' = min x y; y' = max x y in triangular A.! (div (y'*y'+y') 2 + x'))
    in TSPProblem 0 M.empty p numCities M.empty M.empty

{- |  Construct a TSPProblem instance for a Symmetric TSP. The route will not be
      initially set up, the dictionaries will be empty. This does create the 
      vertices of the graph as points in a 2d space, and the lengths of edges 
      are calculated, so this supports all internal storage types. 
-}

makeEuclideanTSPMap :: RandomGen g=>InternalStorage->(Float,Float)->(Float,Float)->Int->g->TSPProblem
makeEuclideanTSPMap storageType xRange yRange numCities g 
  = let cities = [0 ..(numCities-1)]
        (genA,genB) = split g
        positions = take numCities $ zip (randomRs xRange genA) (randomRs yRange genB)
        posArr = A.listArray (0 , numCities-1) positions

        explicit = A.listArray (0,numCities*numCities-1)  [euclidianDistance (posArr A.! a) (posArr A.! b) | a<-cities,b<-cities]
        triangular = A.listArray (0,sum [0..numCities])  [euclidianDistance (posArr A.! a) (posArr A.! b) | a<-cities,b<-[0..a]]

        p = case storageType of
              ExplicitMatrix -> \x y->explicit A.! (x * numCities + y)
              TriangularMatrix -> (\x y->let x' = min x y; y' = max x y in triangular A.! (div (y'*y'+y') 2 + x'))
              Recomputation -> \a b->if a == b then 0 else euclidianDistance (posArr A.! a) (posArr A.! b)
    in TSPProblem 0 M.empty p numCities M.empty M.empty
  where
    euclidianDistance :: (Float,Float)->(Float,Float)->Float
    euclidianDistance (a,b) (c,d) = sqrt ((a-c)*(a-c)+(b-d)*(b-d))