module Bio.Sequence
(
Sequence(..), Unknown, Offset, SeqData, Qual, QualData
, seqlength, seqlabel, seqheader, seqdata, seqqual, (!)
, appendHeader, setHeader
, fromStr, toStr
, compl, revcompl, revcompl', Nuc, castToNuc
, Amino(..), translate, fromIUPAC, toIUPAC, castToAmino
, defragSeq, seqmap
, readNuc, readProt
, readFasta, hReadFasta
, writeFasta, hWriteFasta
, readQual, writeQual, hWriteQual
, readFastaQual
, writeFastaQual, hWriteFastaQual
, readFastQ, writeFastQ, hReadFastQ, hWriteFastQ
, readSangerQ, writeSangerQ, hReadSangerQ, hWriteSangerQ
, readIllumina, writeIllumina, hReadIllumina, hWriteIllumina
, readPhd, hReadPhd
, decode2Bit, read2Bit, hRead2Bit
, HashF (..)
, contigous, rcontig, rcpacked
, KWords(..), entropy
) where
import Bio.Sequence.SeqData
import Bio.Sequence.Fasta
import Bio.Sequence.FastQ
import Bio.Sequence.Phd
import Bio.Sequence.TwoBit
import Bio.Sequence.SFF
import Bio.Sequence.Entropy
import Bio.Sequence.HashWord
import Control.Monad (filterM)
import System.Directory (doesFileExist)
readNuc :: FilePath -> IO [Sequence Nuc]
readNuc fp
| ext `elem` ["fasta", "fna", "fa", "fst"] = do
ps <- findQual fp
ss <- (case ps of [q] -> readFastaQual fp q
[] -> readFasta fp
qs -> error ("Ambigous quality file for "++show fp++": "++show qs))
return (map castSeq ss)
| ext == "2bit" = read2Bit $ fp
| ext == "sff" = fmap sffToSequence . readSFF $ fp
| ext `elem` ["fq","fastq"] = readFastQ $ fp
| ext2 == "phd" = fmap return . readPhd $ fp
| otherwise = error "readNuc: unknown file suffix!"
where
ext = reverse . takeWhile (/='.') . reverse $ fp
ext2 = reverse . takeWhile (/='.') . dropWhile (=='.') . dropWhile (/='.') . reverse $ fp
basename = reverse . dropWhile (=='.') . dropWhile (/= '.') . reverse
findQual = filterM doesFileExist . qualnames
qualnames f = [f++".qual",basename f++".qual"]
readProt :: FilePath -> IO [Sequence Amino]
readProt xs = map castSeq `fmap` readFasta xs