{- | Module : EvoMod.Data.Alphabet Description : Alphabets store hereditary information. Copyright : (c) Dominik Schrempf 2018 License : GPL-3 Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable Creation date: Thu Oct 4 18:57:08 2018. Hierarchy: 1. 'Character' type class. 2. 'Character' instances such as 'Nucleotide's or 'AminoAcid's. 3. The character types form 'Alphabet's. The different 'Code's are collected in a specific data type. New codes have to be added manually in this module. -} module EvoMod.Data.Alphabet ( Code (..) , codeNameVerbose , Alphabet (..) , alphabet ) where import qualified Data.Set as S import Data.Word (Word8) import EvoMod.Data.AminoAcid import EvoMod.Data.Character import EvoMod.Data.Nucleotide -- | The used genetic code. Could include Protein_IUPAC, CountsFile for -- population data and so on. data Code = DNA | DNA_IUPAC | Protein deriving (Show, Read, Eq, Ord, Enum, Bounded) -- | Verbose version of code name. codeNameVerbose :: Code -> String codeNameVerbose DNA = show DNA ++ " (nucleotides)" codeNameVerbose DNA_IUPAC = show DNA_IUPAC ++ " (nucleotides including IUPAC code)" codeNameVerbose Protein = show Protein ++ " (amino acids)" -- | 'Data.Set' is used because it uses an ordered, tree-like structure with -- fast queries. When parsing characters, they have to be checked for validity -- and so, the query speed is very important when reading in large data files. newtype Alphabet = Alphabet { fromAlphabet :: S.Set Word8 } deriving (Show, Read, Eq, Ord) -- | Since 'Character's are required to be enumerated and bounded, we can -- calculate the corresponding alphabet. toAlphabet :: Character a => [a] -> Alphabet toAlphabet = Alphabet . S.fromList . map toWord -- | New codes have to be added manually here. I tried to use type classes, so -- that each character has to supply an alphabet, but then the language -- extension TypeApplications has to be added. Like this, new codes have to be -- added manually, but the type handling is cleaner. alphabet :: Code -> Alphabet alphabet DNA = toAlphabet [(minBound :: Nucleotide) .. ] alphabet DNA_IUPAC = toAlphabet [(minBound :: NucleotideIUPAC) .. ] alphabet Protein = toAlphabet [(minBound :: AminoAcid) .. ]