module Bio.Location.LocMap (
LocMap, fromList
, lookupWithin, lookupOverlaps
, delete, deleteBy, insert
, checkInvariants
) where
import qualified Data.IntMap as IM
import qualified Data.IntSet as IS
import Data.List hiding (insert, delete, deleteBy)
import Data.Maybe
import Data.Monoid
import qualified Bio.Location.Location as Loc
import qualified Bio.Location.Position as Pos
import Bio.Sequence.SeqData (Offset)
defaultZonesize :: Offset
defaultZonesize = 65536
data LocMap a = LocMap !Offset !(IM.IntMap (Loc.Loc, a)) !(IM.IntMap IS.IntSet)
instance Monoid (LocMap a) where
mempty = LocMap defaultZonesize IM.empty IM.empty
mappend lm0 (LocMap _ keyToLoc1 _) = foldl' (\lm (l,x) -> insert l x lm) lm0 $ IM.elems keyToLoc1
fromList :: Offset -> [(Loc.Loc, a)] -> LocMap a
fromList zonesize = foldl' (\lm0 (l,x) -> insert l x lm0) mempty
insert :: Loc.Loc -> a -> LocMap a -> LocMap a
insert l x (LocMap zonesize keyToLoc zoneKeys)
= let !key = case IM.maxViewWithKey keyToLoc of
Just ((k, _), _) -> k + 1
Nothing -> 1
!newKeyToLoc = IM.insertWithKey duplicateError key (l, x) keyToLoc
!newZoneKeys = foldl' (insertIntoZone key) zoneKeys $ locZones zonesize l
in LocMap zonesize newKeyToLoc newZoneKeys
where insertIntoZone key currZoneKeys z = case IM.lookup z currZoneKeys of
Nothing -> IM.insert z (IS.singleton key) currZoneKeys
Just currZoneKeySet -> IM.insert z (IS.insert key currZoneKeySet) currZoneKeys
duplicateError k (l1, _) (l2, _) = error $ "LocMap: key " ++ show k ++ " duplicated: " ++ show (l1, l2)
lookupWithin :: Pos.Pos -> LocMap a -> [(Loc.Loc, a)]
lookupWithin pos (LocMap zonesize keyToLoc zoneKeys)
= let !zoneKeySet = IM.findWithDefault IS.empty (posZone zonesize pos) zoneKeys
in filter ((Loc.isWithin pos) . fst) $ keySetLocs keyToLoc zoneKeySet
lookupOverlaps :: Loc.Loc -> LocMap a -> [(Loc.Loc, a)]
lookupOverlaps loc (LocMap zonesize keyToLoc zoneKeys)
= let !zonesKeySet = IS.unions $ map (\z -> IM.findWithDefault IS.empty z zoneKeys) $ locZones zonesize loc
in filter ((Loc.overlaps loc) . fst) $ keySetLocs keyToLoc zonesKeySet
delete :: (Eq a) => (Loc.Loc, a) -> LocMap a -> LocMap a
delete target = deleteBy (== target)
deleteBy :: ((Loc.Loc, a) -> Bool) -> LocMap a -> LocMap a
deleteBy isTarget lm0@(LocMap zonesize keyToLoc zoneKeys)
= case filter (isTarget . snd) $ IM.toList keyToLoc of
[] -> lm0
((key,(l,_)):_) -> let !newKeyToLoc = IM.delete key keyToLoc
!newZoneKeys = foldl' (deleteFromZone key) zoneKeys $ locZones zonesize l
in LocMap zonesize newKeyToLoc newZoneKeys
where deleteFromZone key currZoneKeys z = let !currZoneKeySet = IM.findWithDefault missingZone z currZoneKeys
in IM.insert z (IS.delete key currZoneKeySet) currZoneKeys
where missingZone = error $ "LocMap deleteBy: empty keyset for zone " ++ show z
posZone :: Offset -> Pos.Pos -> Int
posZone zonesize pos = fromIntegral $ (Pos.offset pos) `div` zonesize
locZones :: Offset -> Loc.Loc -> [Int]
locZones zonesize loc = let !(pmin, pmax) = Loc.bounds loc
!zmin = fromIntegral $ pmin `div` zonesize
!zmax = fromIntegral $ pmax `div` zonesize
in [zmin..zmax]
keySetLocs :: (IM.IntMap (Loc.Loc, a)) -> IS.IntSet -> [(Loc.Loc, a)]
keySetLocs keyToLoc = map keyLoc . IS.toList
where keyLoc key = IM.findWithDefault unknownKey key keyToLoc
where unknownKey = error $ "LocMap: unknown key " ++ show key
checkInvariants :: LocMap a -> [String]
checkInvariants (LocMap zonesize keyToLoc zoneKeys)
= concat $ checkAllKeys : map checkKeyLoc (IM.toList keyToLoc)
where checkAllKeys = let !allZoneKeys = IS.unions $ IM.elems zoneKeys
!allLocKeys = IM.keysSet keyToLoc
!missingKeys = IS.toAscList $ allZoneKeys `IS.difference` allLocKeys
!extraKeys = IS.toAscList $ allLocKeys `IS.difference` allZoneKeys
in concat [ map (\key -> "Missing key " ++ show key) missingKeys
, map (\key -> "Extra key " ++ show key) extraKeys]
checkKeyLoc (key, (loc, _)) = let !keyZoneSet = IM.keysSet $ IM.filter (\keyset -> key `IS.member` keyset) zoneKeys
!locZoneSet = IS.fromList $ locZones zonesize loc
!missingZones = IS.toAscList $ locZoneSet `IS.difference` keyZoneSet
!extraZones = IS.toAscList $ keyZoneSet `IS.difference` locZoneSet
in concat [ map (\zone -> "Missing zone " ++ show zone ++ " for " ++ show (key, loc)) missingZones
, map (\zone -> "Extra zone " ++ show zone ++ " for " ++ show (key, loc)) extraZones]