{- | This is a meta-module importing and re-exporting sequence-related stuff. It encompasses the "Bio.Sequence.SeqData", "Bio.Sequence.Fasta", and "Bio.Sequence.TwoBit" modules. -} module Bio.Sequence ( -- * Data structures etc ("Bio.Sequence.SeqData") Sequence(..), Unknown, Offset, SeqData, Qual, QualData -- ** Accessor functions , seqlength, seqlabel, seqheader, seqdata, seqqual, (!) , appendHeader, setHeader -- ** Converting to and from String. , fromStr, toStr -- ** Nucleotide functionality. , compl, revcompl, revcompl', Nuc, castToNuc -- ** Protein sequence functionality , Amino(..), translate, fromIUPAC, toIUPAC, castToAmino -- ** Other utility functions , defragSeq, seqmap -- * File IO -- ** Generic sequence reading , readNuc, readProt -- ** The Fasta file format ("Bio.Sequence.Fasta") , readFasta, hReadFasta , writeFasta, hWriteFasta -- ** Quality data -- | Not part of the Fasta format, and treated separately. , readQual, writeQual, hWriteQual , readFastaQual , writeFastaQual, hWriteFastaQual -- ** The FastQ format ("Bio.Sequence.FastQ") -- Combines sequence data and quality in one file. -- Warning: Solexa uses a different formula for the quality values! , readFastQ, writeFastQ, hReadFastQ, hWriteFastQ -- ** The phd file format ("Bio.Sequence.Phd") -- | These contain base (nucleotide) calling information, -- and are generated by @phred@. , readPhd, hReadPhd -- ** TwoBit file format support ("Bio.Seqeunce.TwoBit") -- | Used by @BLAT@ and related tools. , decode2Bit, read2Bit, hRead2Bit -- ,encode2Bit, write2Bit, hWrite2Bit -- * Hashing functionality ("Bio.Sequence.HashWord") -- | Packing words from sequences into integral data types , HashF (..) , contigous, rcontig, rcpacked -- * Entropy calculations , KWords(..), entropy ) where -- basic sequence data structures import Bio.Sequence.SeqData -- file formats import Bio.Sequence.Fasta import Bio.Sequence.FastQ import Bio.Sequence.Phd import Bio.Sequence.TwoBit -- sequence-oriented stuff import Bio.Sequence.Entropy import Bio.Sequence.HashWord -- | Read nucleotide sequences in any format - Fasta, SFF, FastQ, 2bit, PHD... readNuc :: FilePath -> IO [Sequence Nuc] readNuc = undefined -- check file contents -- magic number -- | Read protein sequences in any supported format (i.e. Fasta) readProt :: FilePath -> IO [Sequence Amino] readProt xs = map castSeq `fmap` readFasta xs