module Bio.Alignment.AlignData (
Dir(..), Gaps, Alignment
, extractGaps, insertGaps
, Edit(..), EditList, SubstMx, Selector, Chr
, columns, eval, isRepl, on
, showalign, toStrings
) where
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as BC
import Bio.Sequence.SeqData
import Data.List (unfoldr)
import Data.Word
import Data.Char (chr)
data Dir = Fwd | Rev deriving (Eq,Show)
type Gaps = [Offset]
type Alignment = [(Offset,Dir,Sequence,Gaps)]
extractGaps :: SeqData -> (SeqData,Gaps)
extractGaps str = (BC.filter (/='*') str,BC.elemIndices '*' str)
insertGaps :: Char -> (SeqData,Gaps) -> SeqData
insertGaps c (str',gaps) = go str' B.empty 0 gaps
where go str acc p (next:rest) = let (a,b) = BC.splitAt (nextp) str
in go b (BC.concat [acc,a,BC.pack [c]]) (next+1) rest
go str acc _ [] = BC.append acc str
showalign a = let (s1,s2) = toStrings a in s1++"\n"++s2
toStrings :: EditList -> (String,String)
toStrings [] = ("","")
toStrings (x:xs) = let (a1',a2') = toStrings xs
chr' = chr . fromIntegral
in case x of Ins c -> ('-':a1', chr' c:a2')
Del c -> (chr' c:a1', '-':a2')
Repl c1 c2 -> (chr' c1:a1', chr' c2:a2')
type Chr = Word8
data Edit = Ins Chr | Del Chr | Repl Chr Chr deriving (Show,Eq)
type EditList = [Edit]
isRepl :: Edit -> Bool
isRepl (Repl _ _) = True
isRepl _ = False
type SubstMx a = (Chr,Chr) -> a
eval :: SubstMx a -> a -> Edit -> a
eval mx g c = case c of Ins _ -> g; Del _ -> g; Repl x y -> mx (x,y)
type Selector a = [(a,Edit)] -> a
columns :: Selector a -> a -> Sequence -> Sequence -> [[a]]
columns f z (Seq _ s1 _) (Seq _ s2 _) = columns' f z s1 s2
columns' :: Selector a -> a -> SeqData -> SeqData -> [[a]]
columns' f zero s1 s2 = let
c0 = zero : map (f.return) (zip c0 (map Ins (B.unpack s2)))
mkcol (p0:prev,x) = if B.null x then Nothing
else let xi = B.head x
ys = B.unpack s2
c = f [(p0,Del xi)] : [f [del,ins,rep] | del <- zip prev $ repeat (Del xi)
| ins <- zip c $ map Ins ys
| rep <- zip (p0:prev) $ map (Repl xi) ys]
in Just (c,(c,B.tail x))
in c0 : unfoldr mkcol (c0,s1)
on c f x y = c (f x) (f y)