-- | A structure to allow fast lookup of objects whose sequence
--   location lines up with a give position.
module Bio.Location.LocMap (LocMap, mkLocMap, defaultZonesize, fromList
                           , lookupWithin, lookupOverlaps
                           , delete, deleteBy, insert
                           , checkInvariants
                           )
    where 

import qualified Data.IntMap as IM
import qualified Data.IntSet as IS
import Data.List hiding (insert, delete, deleteBy)
import Data.Maybe
import Data.Monoid

import qualified Bio.Location.Location as Loc
import qualified Bio.Location.Position as Pos
import Bio.Sequence.SeqData (Offset)

defaultZonesize :: Offset
defaultZonesize = 65536

-- | Collection mapping a collection of 'Loc' locations, possibly
--   overlapping, binned for efficient lookup by position.
data LocMap a = LocMap !Offset !(IM.IntMap (Loc.Loc, a)) !(IM.IntMap IS.IntSet)

instance Monoid (LocMap a) where
    mempty = mkLocMap defaultZonesize
    mappend lm0 (LocMap _ keyToLoc1 _) = foldl' (\lm (l,x) -> insert l x lm) lm0 $ IM.elems keyToLoc1

-- | Create an empty 'LocMap' with a specified position bin size
mkLocMap :: Offset -> LocMap a
mkLocMap zonesize = LocMap zonesize IM.empty IM.empty

-- | Create a 'LocMap' from an associated list.
fromList :: Offset -> [(Loc.Loc, a)] -> LocMap a
fromList zonesize = foldl' (\lm0 (l,x) -> insert l x lm0) (mkLocMap zonesize)

-- | Add an object with an associated 'Loc' sequence region
insert :: Loc.Loc -> a -> LocMap a -> LocMap a
insert l x (LocMap zonesize keyToLoc zoneKeys) 
    = let !key = case IM.maxViewWithKey keyToLoc of
                   Just ((k, _), _) -> k + 1
                   Nothing -> 1
          !newKeyToLoc = IM.insertWithKey duplicateError key (l, x) keyToLoc
          !newZoneKeys = foldl' (insertIntoZone key) zoneKeys $ locZones zonesize l
      in LocMap zonesize newKeyToLoc newZoneKeys
    where insertIntoZone key currZoneKeys z = case IM.lookup z currZoneKeys of
                                                Nothing -> IM.insert z (IS.singleton key) currZoneKeys
                                                Just currZoneKeySet -> IM.insert z (IS.insert key currZoneKeySet) currZoneKeys
          duplicateError k (l1, _) (l2, _) = error $ "LocMap: key " ++ show k ++ " duplicated: " ++ show (l1, l2)

-- | Find the (possibly empty) list of sequence regions and associated
--   objects that contain a 'Pos' position, in the sense of 'withinLoc'
lookupWithin :: Pos.Pos -> LocMap a -> [(Loc.Loc, a)]
lookupWithin pos (LocMap zonesize keyToLoc zoneKeys) 
    = let !zoneKeySet = IM.findWithDefault IS.empty (posZone zonesize pos) zoneKeys
      in filter ((Loc.isWithin pos) . fst) $ keySetLocs keyToLoc zoneKeySet

-- | Find the (possibly empty) list of sequence regions and associated
--   objects that overlap a 'Loc' region, in the sense of 'overlapsLoc'
lookupOverlaps :: Loc.Loc -> LocMap a -> [(Loc.Loc, a)]
lookupOverlaps loc (LocMap zonesize keyToLoc zoneKeys)
    = let !zonesKeySet = IS.unions $ map (\z -> IM.findWithDefault IS.empty z zoneKeys) $ locZones zonesize loc
      in filter ((Loc.overlaps loc) . fst) $ keySetLocs keyToLoc zonesKeySet

-- | Remove a region / object association from the map, if it is
-- present.  If it is present multiple times, only the first
-- occurrence will be deleted.
delete :: (Eq a) => (Loc.Loc, a) -> LocMap a -> LocMap a
delete target = deleteBy (== target)

-- | Remove the first region / object association satisfying a
-- predicate function.
deleteBy :: ((Loc.Loc, a) -> Bool) -> LocMap a -> LocMap a
deleteBy isTarget lm0@(LocMap zonesize keyToLoc zoneKeys) 
    = case filter (isTarget . snd) $ IM.toList keyToLoc of
        [] -> lm0
        ((key,(l,_)):_) -> let !newKeyToLoc = IM.delete key keyToLoc
                               !newZoneKeys = foldl' (deleteFromZone key) zoneKeys $ locZones zonesize l
                           in LocMap zonesize newKeyToLoc newZoneKeys
    where deleteFromZone key currZoneKeys z = let !currZoneKeySet = IM.findWithDefault missingZone z currZoneKeys 
                                              in IM.insert z (IS.delete key currZoneKeySet) currZoneKeys
              where missingZone = error $ "LocMap deleteBy: empty keyset for zone " ++ show z

posZone :: Offset -> Pos.Pos -> Int
posZone zonesize pos = fromIntegral $ (Pos.offset pos) `div` zonesize

locZones :: Offset -> Loc.Loc -> [Int]
locZones zonesize loc = let !(pmin, pmax) = Loc.bounds loc
                            !zmin = fromIntegral $ pmin `div` zonesize
                            !zmax = fromIntegral $ pmax `div` zonesize
                        in [zmin..zmax]

keySetLocs :: (IM.IntMap (Loc.Loc, a)) -> IS.IntSet -> [(Loc.Loc, a)]
keySetLocs keyToLoc = map keyLoc . IS.toList
    where keyLoc key = IM.findWithDefault unknownKey key keyToLoc
              where unknownKey = error $ "LocMap: unknown key " ++ show key

checkInvariants :: LocMap a -> [String]
checkInvariants (LocMap zonesize keyToLoc zoneKeys)
    = concat $ checkAllKeys : map checkKeyLoc (IM.toList keyToLoc)
    where checkAllKeys = let !allZoneKeys = IS.unions $ IM.elems zoneKeys 
                             !allLocKeys = IM.keysSet keyToLoc
                             !missingKeys = IS.toAscList $ allZoneKeys `IS.difference` allLocKeys
                             !extraKeys = IS.toAscList $ allLocKeys `IS.difference` allZoneKeys
                         in concat [ map (\key -> "Missing key " ++ show key) missingKeys
                                   , map (\key -> "Extra key " ++ show key) extraKeys]
          checkKeyLoc (key, (loc, _)) = let !keyZoneSet = IM.keysSet $ IM.filter (\keyset -> key `IS.member` keyset) zoneKeys
                                            !locZoneSet = IS.fromList $ locZones zonesize loc
                                            !missingZones = IS.toAscList $ locZoneSet `IS.difference` keyZoneSet
                                            !extraZones = IS.toAscList $ keyZoneSet `IS.difference` locZoneSet
                                        in concat [ map (\zone -> "Missing zone " ++ show zone ++ " for " ++ show (key, loc)) missingZones
                                                  , map (\zone -> "Extra zone " ++ show zone ++ " for " ++ show (key, loc)) extraZones]