module Data.Align
(
align
, AlignConfig
, alignConfig
, Step
, Trace, traceScore, trace
, windowedAlign
, centerStar
, MultiStep, center, others, stepOfAll
, MultiTrace, centerIndex, otherIndices, allIndices, multiTrace
, debugAlign
, debugMultiAlign
) where
import Data.Function (fix, on)
import qualified Data.List as L
import Data.Maybe (fromMaybe)
import Data.MemoUgly
import Data.Ord
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
data AlignConfig a s = AlignConfig
{ acPairScore :: a -> a -> s
, ac_initial_gap_penalty :: s
, ac_gap_penalty :: s
}
alignConfig :: (a -> a -> s)
-> s
-> s
-> AlignConfig a s
alignConfig = AlignConfig
localAlignConfig
:: Num s
=> (a -> a -> s)
-> s
-> AlignConfig a s
localAlignConfig f = alignConfig f 0
type Step a = Either (Either a a) (a, a)
stepLeft = Left . Left
stepRight = Left . Right
stepBoth a b = Right (a,b)
isMatch :: Step a -> Bool
isMatch (Right _) = True
isMatch _ = False
isLeft :: Step a -> Bool
isLeft (Left (Left _)) = True
isLeft _ = False
isRight :: Step a -> Bool
isRight (Left (Right _)) = True
isRight _ = False
data Trace a s = Trace
{ traceScore :: s
, trace :: [Step a]
}
instance (Show a, Show s) => Show (Trace a s) where
show (Trace s t) = "Trace(score = " ++ show s ++ ", steps = " ++ show t ++ ")"
(Trace s ts) `tappend` (Trace z (t:_)) = Trace (s+z) (t:ts)
debugAlign :: [Step Char] -> String
debugAlign = go [] []
where
go as bs [] = reverse as ++ "\n" ++ reverse bs
go as bs (t:ts) = case t of
Left (Left c) -> go (c:as) ('-':bs) ts
Left (Right c) -> go ('-':as) (c:bs) ts
Right (c, d) -> go (c:as) (d:bs) ts
align :: (G.Vector v a, Num s, Ord s)
=> AlignConfig a s
-> v a
-> v a
-> Trace a s
align AlignConfig{..} as bs =
revTrace . fix (memo . go) $ (lastIndex as, lastIndex bs)
where
revTrace (Trace s t) = Trace s (reverse t)
lastIndex v = G.length v 1
go k (i,j)
| i == (1) || j == (1) =
if i == j then Trace 0 []
else if i == (1)
then skipInit j stepRight bs
else skipInit i stepLeft as
| otherwise =
let a = as G.! i
b = bs G.! j
diag = k (i1,j1) `tappend` Trace (acPairScore a b) [stepBoth a b]
a_gap = k (i1, j) `tappend` Trace ac_gap_penalty [stepLeft a]
b_gap = k ( i,j1) `tappend` Trace ac_gap_penalty [stepRight b]
in L.maximumBy (comparing traceScore) [diag, a_gap, b_gap]
skipInit idx stepFun xs =
let score = ac_initial_gap_penalty * fromIntegral (idx+1)
tr = reverse [stepFun (xs G.! xi) | xi <- [0..idx]]
in Trace score tr
windowedAlign :: (Num s, Eq s, Ord s)
=> AlignConfig a s
-> Int
-> [a]
-> [a]
-> [Step a]
windowedAlign cfg w as bs = go as bs []
where
go as [] rs = concat . reverse $ (map stepLeft as):rs
go [] bs rs = concat . reverse $ (map stepRight bs):rs
go as bs rs =
let ahs = take w as
bhs = take w bs
tr0 = trace $ align cfg (V.fromList ahs) (V.fromList bhs)
tr = let matched = dropTrailingMismatches tr0
len = length matched
in if len <= 1
then matched
else take (len `div` 2) matched
in if null tr
then let (a:ax) = as
(b:bx) = bs
in go ax bx ([stepLeft a, stepRight b]:rs)
else let ac = countMatchOr isLeft tr
bc = countMatchOr isRight tr
in go (drop ac as) (drop bc bs) (tr:rs)
dropTrailingMismatches =
reverse . dropWhile (not . isMatch) . reverse
countMatchOr f = length . filter (\s -> isMatch s || f s)
data MultiStep a = MultiStep
{ center :: Maybe a
, others :: [Maybe a]
}
data MultiTrace i a s = MultiTrace
{ centerIndex :: i
, otherIndices :: [i]
, multiTrace :: [MultiStep a]
}
stepOfAll :: MultiStep a -> [Maybe a]
stepOfAll MultiStep{..} = center:others
allIndices :: MultiTrace i a s -> [i]
allIndices MultiTrace{..} = centerIndex:otherIndices
debugMultiAlign :: [MultiStep Char] -> String
debugMultiAlign =
unlines . map (map charOrDash) . L.transpose . map stepOfAll
where
charOrDash = fromMaybe '-'
centerStar :: (G.Vector v a, Num s, Ord s, Ord i)
=> AlignConfig a s
-> [(i, v a)]
-> MultiTrace i a s
centerStar conf vs =
let (firstPair:rest) = centerPairs
initialTrace = MultiTrace
{ centerIndex = fst . fst $ firstPair
, otherIndices = [snd . fst $ firstPair]
, multiTrace = initialSteps . trace . snd $ firstPair
}
in foldl mergePair initialTrace rest
where
initialSteps = go []
where
go acc [] = reverse acc
go acc (s:xs) = go (conv s []:acc) xs
conv s rest = case s of
Right (c, d) -> MultiStep (Just c) (Just d:rest)
Left (Left c) -> MultiStep (Just c) (Nothing:rest)
Left (Right d) -> MultiStep Nothing (Just d:rest)
mergePair MultiTrace{..} ((_,j), tr) = MultiTrace
{ centerIndex = centerIndex
, otherIndices = j:otherIndices
, multiTrace = mergeSteps multiTrace (trace tr)
}
where
mergeSteps mss = go [] mss
where
noOthers = map (const Nothing) . others . head $ mss
go acc [] [] = reverse acc
go acc (MultiStep{..}:mss) [] =
go (MultiStep center (Nothing:others):acc) mss []
go acc [] (s:ss) = go (conv s noOthers:acc) [] ss
go acc (m@MultiStep{..}:mss) (s:ss) = case (center, s) of
(Nothing, Left (Right d)) ->
go (MultiStep center (Just d:others):acc) mss ss
(Nothing, _) ->
go (MultiStep center (Nothing:others):acc) mss (s:ss)
(Just _, Right (_, d)) ->
go (MultiStep center (Just d:others):acc) mss ss
(Just _, Left (Left _)) ->
go (MultiStep center (Nothing:others):acc) mss ss
(Just _, Left (Right d)) ->
go (MultiStep Nothing (Just d:noOthers):acc) (m:mss) ss
centerPairs
= snd
. L.maximumBy (comparing fst)
. map (\g -> (starSum g, g))
. L.groupBy ((==) `on` (fst . fst))
. L.sortBy (comparing fst)
$ pairAligns
where
pairAligns = do
((i,v):rest) <- L.tails vs
(j,w) <- rest
let tr = align conf v w
[((i,j), tr), ((j,i), tr)]
starSum = sum . map (traceScore . snd)