-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Library and executables for working with SFF files
--   
--   The library contains the functionality for reading and writing SFF
--   files (sequencing data from 454 and Ion Torrent). It duplicates code
--   from (and is incompatible with) the <a>bio</a> library.
@package biosff
@version 0.2


-- | Read and write the SFF file format used by Roche/454 sequencing to
--   store flowgram data.
--   
--   A flowgram is a series of values (intensities) representing
--   homopolymer runs of A,G,C, and T in a fixed cycle, and usually
--   displayed as a histogram.
--   
--   This file is based on information in the Roche FLX manual. Among other
--   sources for information about the format, are The Staden Package,
--   which contains an io_lib with a C routine for parsing this format.
--   According to comments in the sources, the io_lib implementation is
--   based on a file called getsff.c, which I've been unable to track down.
--   Other software parsing SFFs are QIIME, sff_extract, and Celera's
--   sffToCa.
--   
--   It is believed that all values are stored big endian.
module Bio.Sequence.SFF

-- | The data structure storing the contents of an SFF file (modulo the
--   index)
data SFF
SFF :: !CommonHeader -> [ReadBlock] -> SFF

-- | SFF has a 31-byte common header
--   
--   The format is open to having the index anywhere between reads, we
--   should really keep count and check for each read. In practice, it
--   seems to be places after the reads.
--   
--   The following two fields are considered part of the header, but as
--   they are static, they are not part of the data structure
--   
--   <pre>
--      
--   magic   :: Word32   -- 0x2e736666, i.e. the string ".sff"
--   version :: Word32   -- 0x00000001
--   </pre>
data CommonHeader
CommonHeader :: Int64 -> Int32 -> Int32 -> Int16 -> Int16 -> Word8 -> ByteString -> ByteString -> CommonHeader

-- | Points to a text(?) section
index_offset :: CommonHeader -> Int64
index_length :: CommonHeader -> Int32
num_reads :: CommonHeader -> Int32
key_length :: CommonHeader -> Int16
flow_length :: CommonHeader -> Int16
flowgram_fmt :: CommonHeader -> Word8
flow :: CommonHeader -> ByteString
key :: CommonHeader -> ByteString

-- | Each Read has a fixed read header, containing various information.
data ReadHeader
ReadHeader :: Int16 -> Int32 -> Int16 -> Int16 -> Int16 -> Int16 -> ByteString -> ReadHeader
name_length :: ReadHeader -> Int16
num_bases :: ReadHeader -> Int32
clip_qual_left :: ReadHeader -> Int16
clip_qual_right :: ReadHeader -> Int16
clip_adapter_left :: ReadHeader -> Int16
clip_adapter_right :: ReadHeader -> Int16
read_name :: ReadHeader -> ByteString

-- | This contains the actual flowgram for a single read.
data ReadBlock
ReadBlock :: !ReadHeader -> !ByteString -> !ByteString -> !SeqData -> !QualData -> ReadBlock
read_header :: ReadBlock -> !ReadHeader
flow_data :: ReadBlock -> !ByteString
flow_index :: ReadBlock -> !ByteString
bases :: ReadBlock -> !SeqData
quality :: ReadBlock -> !QualData

-- | Read an SFF file.
readSFF :: FilePath -> IO SFF

-- | Write an <a>SFF</a> to the specified file name
writeSFF :: FilePath -> SFF -> IO ()

-- | Write an <a>SFF</a> to the specified file name, but go back and update
--   the read count. Useful if you want to output a lazy stream of
--   <a>ReadBlock</a>s. Returns the number of reads written.
writeSFF' :: FilePath -> SFF -> IO Int

-- | Read an SFF file, but be resilient against errors.
recoverSFF :: FilePath -> IO SFF

-- | Trim a read according to clipping information
trim :: ReadBlock -> ReadBlock

-- | Trim a read to specific sequence position, inclusive bounds.
trimFromTo :: Integral i => i -> i -> ReadBlock -> ReadBlock

-- | Convert a sequence position to the corresponding flow position
baseToFlowPos :: Integral i => ReadBlock -> i -> Int

-- | Convert a flow position to the corresponding sequence position
flowToBasePos :: Integral i => ReadBlock -> i -> Int

-- | Trim a <a>ReadBlock</a> limiting the number of flows. If writing to an
--   SFF file, make sure you update the <a>CommonHeader</a> accordingly.
--   See <tt>examples/Flx.hs</tt> for how to use this.
trimFlows :: Integral i => i -> ReadBlock -> ReadBlock

-- | test serialization by output'ing the header and first two reads in an
--   SFF, and the same after a decode + encode cycle.
test :: FilePath -> IO ()

-- | Convert a file by decoding it and re-encoding it This will lose the
--   index (which isn't really necessary)
convert :: FilePath -> IO ()

-- | Helper function to access the flowgram
flowgram :: ReadBlock -> [Flow]

-- | Extract the sequence with masked bases in lower case
masked_bases :: ReadBlock -> SeqData

-- | Extract the index as absolute coordinates, not relative.
cumulative_index :: ReadBlock -> [Int]

-- | Pack a list of flows into the corresponding binary structure (the
--   flow_data field)
packFlows :: [Flow] -> ByteString

-- | Unpack the flow_data field into a list of flow values
unpackFlows :: ByteString -> [Flow]

-- | The type of flowgram value
type Flow = Int16

-- | A quality value is in the range 0..255.
data Qual :: *
type Index = Word8

-- | Sequence data are lazy bytestrings of ASCII characters.
data SeqData :: *

-- | Quality data are lazy bytestrings of <a>Qual</a>s.
data QualData :: *

-- | Read names encode various information, as per this struct.
data ReadName
ReadName :: (Int, Int, Int) -> (Int, Int, Int) -> Int -> Int -> Int -> ReadName
date :: ReadName -> (Int, Int, Int)
time :: ReadName -> (Int, Int, Int)
region :: ReadName -> Int
x_loc :: ReadName -> Int
y_loc :: ReadName -> Int
decodeReadName :: ByteString -> Maybe ReadName
encodeReadName :: ReadName -> ByteString

-- | A ReadBlock can't be an instance of Binary directly, since it depends
--   on information from the CommonHeader.
putRB :: Int -> ReadBlock -> Put

-- | Helper function for decoding a <a>ReadBlock</a>.
getRB :: Int -> ReadHeader -> Get ReadBlock
instance Binary PartialReadHeader
instance Binary RSFF
instance Show ReadBlock
instance Binary ReadHeader
instance Show ReadHeader
instance Binary CommonHeader
instance Show CommonHeader
instance Binary SFF
instance Show SFF
instance Binary RBI
instance BioSeqQual ReadBlock
instance BioSeq ReadBlock