-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Handle molecular sequences -- -- Examine, modify, and simulate molecular sequences in a reproducible -- way. Please see the README on GitHub at -- https://github.com/dschrempf/elynx. @package elynx-seq @version 0.3.2 -- | Creation date: Fri Oct 12 16:24:02 2018. -- -- See header of Alphabet. module ELynx.Data.Character.Character -- | A set of characters forms an Alphabet. At the moment, -- Word8 is used, since none of the alphabets has more than 255 -- characters. class (Show a, Read a, Eq a, Ord a, Enum a, Bounded a, Unbox a) => Character a -- | Write characters. toWord :: Character a => a -> Word8 -- | Read characters. fromWord :: Character a => Word8 -> a -- | Conversion from Char. fromChar :: Character a => Char -> a -- | Conversion to Char. toChar :: Character a => a -> Char -- | Conversion from String. fromString :: Character a => String -> [a] -- | Conversion to String. toString :: Character a => [a] -> String -- | An extended character type with gaps and unknowns. class Character a => CharacterX a gap :: CharacterX a => a -- | Is the character a gap or unknown? isGap :: CharacterX a => a -> Bool -- | IUPAC characters with a mapping to extended characters. class CharacterX a => CharacterI a unknown :: CharacterI a => a iupac :: CharacterI a => [a] toStandard :: CharacterI a => a -> [a] -- | Check if a IUPAC CharacterI is unknown (e.g., N for -- nucleotides). isUnknown :: CharacterI a => a -> Bool -- | Is the given character a IUPAC character? isIUPAC :: CharacterI a => a -> Bool -- | Is the given character a standard character? isStandard :: CharacterI a => a -> Bool -- | Convert between character classes. May throw error. convert :: (Character a, Character b) => a -> b -- | Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of Alphabet. -- -- Extended amino acid with gaps. See also -- https://www.bioinformatics.org/sms/iupac.html or -- https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry. -- --
--   Amino Acid Code:  Three letter Code:  Amino Acid:
--   ----------------  ------------------  -----------
--   A                 Ala                 Alanine
--   C                 Cys                 Cysteine
--   D                 Asp                 Aspartic Acid
--   E                 Glu                 Glutamic Acid
--   F                 Phe                 Phenylalanine
--   G                 Gly                 Glycine
--   H                 His                 Histidine
--   I                 Ile                 Isoleucine
--   K                 Lys                 Lysine
--   L                 Leu                 Leucine
--   M                 Met                 Methionine
--   N                 Asn                 Asparagine
--   P                 Pro                 Proline
--   Q                 Gln                 Glutamine
--   R                 Arg                 Arginine
--   S                 Ser                 Serine
--   T                 Thr                 Threonine
--   V                 Val                 Valine
--   W                 Trp                 Tryptophan
--   Y                 Tyr                 Tyrosine
--   ----------------  ------------------  -----------
--   -                 Gap                 No amino acid
--   .                 Gap                 No amino acid
--   
module ELynx.Data.Character.AminoAcidX -- | Amino acids. data AminoAcidX A :: AminoAcidX C :: AminoAcidX D :: AminoAcidX E :: AminoAcidX F :: AminoAcidX G :: AminoAcidX H :: AminoAcidX I :: AminoAcidX K :: AminoAcidX L :: AminoAcidX M :: AminoAcidX N :: AminoAcidX P :: AminoAcidX Q :: AminoAcidX R :: AminoAcidX S :: AminoAcidX T :: AminoAcidX V :: AminoAcidX W :: AminoAcidX Y :: AminoAcidX Gap :: AminoAcidX instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.AminoAcidX.AminoAcidX instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.AminoAcidX.AminoAcidX instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.AminoAcidX.AminoAcidX instance ELynx.Data.Character.Character.Character ELynx.Data.Character.AminoAcidX.AminoAcidX instance ELynx.Data.Character.Character.CharacterX ELynx.Data.Character.AminoAcidX.AminoAcidX instance GHC.Enum.Bounded ELynx.Data.Character.AminoAcidX.AminoAcidX instance GHC.Enum.Enum ELynx.Data.Character.AminoAcidX.AminoAcidX instance GHC.Classes.Ord ELynx.Data.Character.AminoAcidX.AminoAcidX instance GHC.Classes.Eq ELynx.Data.Character.AminoAcidX.AminoAcidX instance GHC.Read.Read ELynx.Data.Character.AminoAcidX.AminoAcidX instance GHC.Show.Show ELynx.Data.Character.AminoAcidX.AminoAcidX -- | Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of Alphabet. -- -- Amino acids with gaps and translation stops. -- --
--   Amino Acid Code:  Three letter Code:  Amino Acid:
--   ----------------  ------------------  -----------
--   A                 Ala                 Alanine
--   C                 Cys                 Cysteine
--   D                 Asp                 Aspartic Acid
--   E                 Glu                 Glutamic Acid
--   F                 Phe                 Phenylalanine
--   G                 Gly                 Glycine
--   H                 His                 Histidine
--   I                 Ile                 Isoleucine
--   K                 Lys                 Lysine
--   L                 Leu                 Leucine
--   M                 Met                 Methionine
--   N                 Asn                 Asparagine
--   P                 Pro                 Proline
--   Q                 Gln                 Glutamine
--   R                 Arg                 Arginine
--   S                 Ser                 Serine
--   T                 Thr                 Threonine
--   V                 Val                 Valine
--   W                 Trp                 Tryptophan
--   Y                 Tyr                 Tyrosine
--   ----------------  ------------------  -----------
--   *                 Stp                 No amino acid
--   ----------------  ------------------  -----------
--   -                 Gap                 No amino acid (preferred)
--   .                 Gap                 No amino acid
--   
module ELynx.Data.Character.AminoAcidS -- | Amino acids. data AminoAcidS A :: AminoAcidS C :: AminoAcidS D :: AminoAcidS E :: AminoAcidS F :: AminoAcidS G :: AminoAcidS H :: AminoAcidS I :: AminoAcidS K :: AminoAcidS L :: AminoAcidS M :: AminoAcidS N :: AminoAcidS P :: AminoAcidS Q :: AminoAcidS R :: AminoAcidS S :: AminoAcidS T :: AminoAcidS V :: AminoAcidS W :: AminoAcidS Y :: AminoAcidS Stop :: AminoAcidS Gap :: AminoAcidS instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.AminoAcidS.AminoAcidS instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.AminoAcidS.AminoAcidS instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.AminoAcidS.AminoAcidS instance ELynx.Data.Character.Character.Character ELynx.Data.Character.AminoAcidS.AminoAcidS instance ELynx.Data.Character.Character.CharacterX ELynx.Data.Character.AminoAcidS.AminoAcidS instance GHC.Enum.Bounded ELynx.Data.Character.AminoAcidS.AminoAcidS instance GHC.Enum.Enum ELynx.Data.Character.AminoAcidS.AminoAcidS instance GHC.Classes.Ord ELynx.Data.Character.AminoAcidS.AminoAcidS instance GHC.Classes.Eq ELynx.Data.Character.AminoAcidS.AminoAcidS instance GHC.Read.Read ELynx.Data.Character.AminoAcidS.AminoAcidS instance GHC.Show.Show ELynx.Data.Character.AminoAcidS.AminoAcidS -- | Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of Alphabet. -- -- Amino acid IUPAC code. See also -- https://www.bioinformatics.org/sms/iupac.html or -- https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry. -- -- Remarks: -- -- -- --
--   Amino Acid Code:  Three letter Code:  Amino Acid:
--   ----------------  ------------------  -----------
--   A                 Ala                 Alanine
--   C                 Cys                 Cysteine
--   D                 Asp                 Aspartic Acid
--   E                 Glu                 Glutamic Acid
--   F                 Phe                 Phenylalanine
--   G                 Gly                 Glycine
--   H                 His                 Histidine
--   I                 Ile                 Isoleucine
--   K                 Lys                 Lysine
--   L                 Leu                 Leucine
--   M                 Met                 Methionine
--   N                 Asn                 Asparagine
--   P                 Pro                 Proline
--   Q                 Gln                 Glutamine
--   R                 Arg                 Arginine
--   S                 Ser                 Serine
--   T                 Thr                 Threonine
--   V                 Val                 Valine
--   W                 Trp                 Tryptophan
--   Y                 Tyr                 Tyrosine
--   ----------------  ------------------  -----------
--   J                                     Leucine or Isoleucine
--   B                 Asx                 Aspartic acid or Asparagine
--   Z                 Glx                 Glutamine or Glutamic acid
--   ----------------  ------------------  -----------
--   X                 Xaa                 Any amino acid (preferred; used for printing)
--   ?                 Xaa                 Any amino acid
--   ----------------  ------------------  -----------
--   *                 Stp                 No amino acid
--   ----------------  ------------------  -----------
--   -                 Gap                 No amino acid (preferred; used for printing)
--   .                 Gap                 No amino acid
--   
module ELynx.Data.Character.AminoAcidI -- | Amino acids. data AminoAcidI A :: AminoAcidI C :: AminoAcidI D :: AminoAcidI E :: AminoAcidI F :: AminoAcidI G :: AminoAcidI H :: AminoAcidI I :: AminoAcidI K :: AminoAcidI L :: AminoAcidI M :: AminoAcidI N :: AminoAcidI P :: AminoAcidI Q :: AminoAcidI R :: AminoAcidI S :: AminoAcidI T :: AminoAcidI V :: AminoAcidI W :: AminoAcidI Y :: AminoAcidI J :: AminoAcidI B :: AminoAcidI Z :: AminoAcidI X :: AminoAcidI Stop :: AminoAcidI Gap :: AminoAcidI instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.AminoAcidI.AminoAcidI instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.AminoAcidI.AminoAcidI instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.AminoAcidI.AminoAcidI instance ELynx.Data.Character.Character.Character ELynx.Data.Character.AminoAcidI.AminoAcidI instance ELynx.Data.Character.Character.CharacterX ELynx.Data.Character.AminoAcidI.AminoAcidI instance ELynx.Data.Character.Character.CharacterI ELynx.Data.Character.AminoAcidI.AminoAcidI instance GHC.Enum.Bounded ELynx.Data.Character.AminoAcidI.AminoAcidI instance GHC.Enum.Enum ELynx.Data.Character.AminoAcidI.AminoAcidI instance GHC.Classes.Ord ELynx.Data.Character.AminoAcidI.AminoAcidI instance GHC.Classes.Eq ELynx.Data.Character.AminoAcidI.AminoAcidI instance GHC.Read.Read ELynx.Data.Character.AminoAcidI.AminoAcidI instance GHC.Show.Show ELynx.Data.Character.AminoAcidI.AminoAcidI -- | Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of Alphabet. -- -- Amino acids in alphabetical order. -- --
--   Amino Acid Code:  Three letter Code:  Amino Acid:
--   ----------------  ------------------  -----------
--   A                 Ala                 Alanine
--   C                 Cys                 Cysteine
--   D                 Asp                 Aspartic Acid
--   E                 Glu                 Glutamic Acid
--   F                 Phe                 Phenylalanine
--   G                 Gly                 Glycine
--   H                 His                 Histidine
--   I                 Ile                 Isoleucine
--   K                 Lys                 Lysine
--   L                 Leu                 Leucine
--   M                 Met                 Methionine
--   N                 Asn                 Asparagine
--   P                 Pro                 Proline
--   Q                 Gln                 Glutamine
--   R                 Arg                 Arginine
--   S                 Ser                 Serine
--   T                 Thr                 Threonine
--   V                 Val                 Valine
--   W                 Trp                 Tryptophan
--   Y                 Tyr                 Tyrosine
--   
module ELynx.Data.Character.AminoAcid -- | Amino acids. data AminoAcid A :: AminoAcid C :: AminoAcid D :: AminoAcid E :: AminoAcid F :: AminoAcid G :: AminoAcid H :: AminoAcid I :: AminoAcid K :: AminoAcid L :: AminoAcid M :: AminoAcid N :: AminoAcid P :: AminoAcid Q :: AminoAcid R :: AminoAcid S :: AminoAcid T :: AminoAcid V :: AminoAcid W :: AminoAcid Y :: AminoAcid instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.AminoAcid.AminoAcid instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.AminoAcid.AminoAcid instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.AminoAcid.AminoAcid instance ELynx.Data.Character.Character.Character ELynx.Data.Character.AminoAcid.AminoAcid instance GHC.Enum.Bounded ELynx.Data.Character.AminoAcid.AminoAcid instance GHC.Enum.Enum ELynx.Data.Character.AminoAcid.AminoAcid instance GHC.Classes.Ord ELynx.Data.Character.AminoAcid.AminoAcid instance GHC.Classes.Eq ELynx.Data.Character.AminoAcid.AminoAcid instance GHC.Read.Read ELynx.Data.Character.AminoAcid.AminoAcid instance GHC.Show.Show ELynx.Data.Character.AminoAcid.AminoAcid -- | Creation date: Sun May 19 21:06:38 2019. module ELynx.Data.Alphabet.Character -- | Alphabet characters; abstracted so that representation can be changed -- at some point. data Character -- | Conversion of Characters. toWord :: Character -> Word8 -- | Conversion of Characters. fromWord :: Word8 -> Character -- | Conversion of Characters. toChar :: Character -> Char -- | Conversion of Characters. fromChar :: Char -> Character -- | Conversion of Characters. toString :: [Character] -> String -- | Conversion of Characters. fromString :: String -> [Character] -- | Conversion of Characters. toCVec :: Character a => Vector Character -> Vector a -- | Conversion of Characters. fromCVec :: Character a => Vector a -> Vector Character instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Alphabet.Character.Character instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Alphabet.Character.Character instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Alphabet.Character.Character instance GHC.Enum.Bounded ELynx.Data.Alphabet.Character.Character instance GHC.Classes.Ord ELynx.Data.Alphabet.Character.Character instance GHC.Classes.Eq ELynx.Data.Alphabet.Character.Character instance GHC.Show.Show ELynx.Data.Alphabet.Character.Character instance GHC.Read.Read ELynx.Data.Alphabet.Character.Character -- | Creation date: Fri May 10 11:10:32 2019. -- -- Hierarchy: -- --
    --
  1. Character type.
  2. --
  3. Sets of Characters form Alphabets; each -- Alphabet has a specification AlphabetSpec.
  4. --
-- -- New alphabets have to be added manually in this module. -- -- This way of handling characters and alphabets IS NOT TYPE SAFE, but -- much, much faster. A second layer of modules such as Nucleotide -- depend on a Character type class. Hence, they provide a type -- safe way of handling alphabets. Conversion is possible, for instance, -- with fromCVec, and toCVec. module ELynx.Data.Alphabet.Alphabet -- | Available alphabets; for details see alphabetSpec. data Alphabet DNA :: Alphabet DNAX :: Alphabet DNAI :: Alphabet Protein :: Alphabet ProteinX :: Alphabet ProteinS :: Alphabet ProteinI :: Alphabet -- | Alphabet specification. Set is used because it provides fast -- lookups. data AlphabetSpec AlphabetSpec :: !Set Character -> !Set Character -> !Set Character -> !Set Character -> !Set Character -> (Character -> [Character]) -> AlphabetSpec -- | Standard characters. [std] :: AlphabetSpec -> !Set Character -- | Gap characters. [gap] :: AlphabetSpec -> !Set Character -- | Unknown characters. [unknown] :: AlphabetSpec -> !Set Character -- | Other IUPAC codes. [iupac] :: AlphabetSpec -> !Set Character -- | All characters in the alphabet. [all] :: AlphabetSpec -> !Set Character -- | Convert from IUPAC to the corresponding standard characters. [toStd] :: AlphabetSpec -> Character -> [Character] -- | Get the alphabet specification for a given alphabet. alphabetSpec :: Alphabet -> AlphabetSpec -- | Verbose alphabet name. alphabetDescription :: Alphabet -> String -- | Test if standard character. isStd :: Alphabet -> Character -> Bool -- | Test if gap. isGap :: Alphabet -> Character -> Bool -- | Test if unknown. isUnknown :: Alphabet -> Character -> Bool -- | Test if extended IUPAC character (excluding gaps and unknowns). isIUPAC :: Alphabet -> Character -> Bool -- | Test if member of alphabet. isMember :: Alphabet -> Character -> Bool instance GHC.Generics.Generic ELynx.Data.Alphabet.Alphabet.Alphabet instance GHC.Enum.Bounded ELynx.Data.Alphabet.Alphabet.Alphabet instance GHC.Enum.Enum ELynx.Data.Alphabet.Alphabet.Alphabet instance GHC.Classes.Ord ELynx.Data.Alphabet.Alphabet.Alphabet instance GHC.Classes.Eq ELynx.Data.Alphabet.Alphabet.Alphabet instance GHC.Read.Read ELynx.Data.Alphabet.Alphabet.Alphabet instance GHC.Show.Show ELynx.Data.Alphabet.Alphabet.Alphabet instance Data.Aeson.Types.FromJSON.FromJSON ELynx.Data.Alphabet.Alphabet.Alphabet instance Data.Aeson.Types.ToJSON.ToJSON ELynx.Data.Alphabet.Alphabet.Alphabet -- | Creation date: Mon Feb 25 13:32:56 2019. module ELynx.Data.Alphabet.DistributionDiversity -- | Entropy of vector. entropy :: Vector v Double => v Double -> Double -- | Effective number of used characters measured using entropy. The -- result only makes sense when the sum of the array is 1.0. kEffEntropy :: Vector v Double => v Double -> Double -- | Probability of homoplasy of vector. The result is the probability of -- binomially sampling the same character twice and only makes sense when -- the sum of the array is 1.0. homoplasy :: Vector v Double => v Double -> Double -- | Effective number of used characters measured using homoplasy. -- The result only makes sense when the sum of the array is 1.0. kEffHomoplasy :: Vector v Double => v Double -> Double -- | For a given code vector of characters, calculate frequency of -- characters. The input vector has arbitrary length (most often the -- number of sequences in an alignment), the length of the output vector -- is the number of characters in the alphabet. frequencyCharacters :: (Vector v Character, Vector v Int, Vector v Double) => AlphabetSpec -> v Character -> v Double -- | Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of Alphabet. -- --
--   Symbol  Description  Bases represented  Complement
--   ------  -----------  -----------------  ----------
--   A       Adenine      A                  T
--   C       Cytosine        C               G
--   G       Guanine            G            C
--   T       Thymine               T         A
--   
module ELynx.Data.Character.Nucleotide -- | Nucleotides. data Nucleotide A :: Nucleotide C :: Nucleotide G :: Nucleotide T :: Nucleotide instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.Nucleotide.Nucleotide instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.Nucleotide.Nucleotide instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.Nucleotide.Nucleotide instance ELynx.Data.Character.Character.Character ELynx.Data.Character.Nucleotide.Nucleotide instance GHC.Enum.Bounded ELynx.Data.Character.Nucleotide.Nucleotide instance GHC.Enum.Enum ELynx.Data.Character.Nucleotide.Nucleotide instance GHC.Classes.Ord ELynx.Data.Character.Nucleotide.Nucleotide instance GHC.Classes.Eq ELynx.Data.Character.Nucleotide.Nucleotide instance GHC.Read.Read ELynx.Data.Character.Nucleotide.Nucleotide instance GHC.Show.Show ELynx.Data.Character.Nucleotide.Nucleotide -- | Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of Alphabet. -- -- Nucleotide IUPAC code. See also -- https://www.bioinformatics.org/sms/iupac.html or -- https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry. -- -- Remarks: -- -- -- --
--   Symbol  Description  Bases represented  Complement
--   ------  -----------  -----------------  ----------
--   A       Adenine      A                  T
--   C       Cytosine        C               G
--   G       Guanine            G            C
--   T       Thymine               T         A
--   ------  -----------  -----------------  ----------
--   U       Uracil                U         A
--   W       Weak         A        T         W
--   S       Strong          C  G            S
--   M       aMino        A  C               K
--   K       Keto               G  T         M
--   R       puRine       A     G            Y
--   Y       pYrimidine      C     T         R
--   B       not A           C  G  T         V
--   D       not C        A     G  T         H
--   H       not G        A  C     T         D
--   V       not T        A  C  G            B
--   ------  -----------  -----------------  ----------
--   N       any          A  C  G  T         N           (preferred)
--   ?       any          A  C  G  T         N
--   ------  -----------  -----------------  ----------
--   -       Gap (Zero)                      -           (preferred)
--   .       Gap (Zero)                      -
--   
module ELynx.Data.Character.NucleotideI -- | NucleotideIs. data NucleotideI A :: NucleotideI C :: NucleotideI G :: NucleotideI T :: NucleotideI U :: NucleotideI W :: NucleotideI S :: NucleotideI M :: NucleotideI K :: NucleotideI R :: NucleotideI Y :: NucleotideI B :: NucleotideI D :: NucleotideI H :: NucleotideI V :: NucleotideI N :: NucleotideI Gap :: NucleotideI instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.NucleotideI.NucleotideI instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.NucleotideI.NucleotideI instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.NucleotideI.NucleotideI instance ELynx.Data.Character.Character.Character ELynx.Data.Character.NucleotideI.NucleotideI instance ELynx.Data.Character.Character.CharacterX ELynx.Data.Character.NucleotideI.NucleotideI instance ELynx.Data.Character.Character.CharacterI ELynx.Data.Character.NucleotideI.NucleotideI instance GHC.Enum.Bounded ELynx.Data.Character.NucleotideI.NucleotideI instance GHC.Enum.Enum ELynx.Data.Character.NucleotideI.NucleotideI instance GHC.Classes.Ord ELynx.Data.Character.NucleotideI.NucleotideI instance GHC.Classes.Eq ELynx.Data.Character.NucleotideI.NucleotideI instance GHC.Read.Read ELynx.Data.Character.NucleotideI.NucleotideI instance GHC.Show.Show ELynx.Data.Character.NucleotideI.NucleotideI -- | See header of Alphabet. -- -- Extended nucleotides with gaps. See also -- https://www.bioinformatics.org/sms/iupac.html or -- https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry. -- --
--   Symbol  Description  Bases represented  Complement
--   ------  -----------  -----------------  ----------
--   A       Adenine      A                  T
--   C       Cytosine        C               G
--   G       Guanine            G            C
--   T       Thymine               T         A
--   ------  -----------  -----------------  ----------
--   - or .  Gap (Zero)                      -
--   
module ELynx.Data.Character.NucleotideX -- | Extended nucleotides. data NucleotideX A :: NucleotideX C :: NucleotideX G :: NucleotideX T :: NucleotideX Gap :: NucleotideX instance Data.Vector.Unboxed.Base.Unbox ELynx.Data.Character.NucleotideX.NucleotideX instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector ELynx.Data.Character.NucleotideX.NucleotideX instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector ELynx.Data.Character.NucleotideX.NucleotideX instance ELynx.Data.Character.Character.Character ELynx.Data.Character.NucleotideX.NucleotideX instance ELynx.Data.Character.Character.CharacterX ELynx.Data.Character.NucleotideX.NucleotideX instance GHC.Enum.Bounded ELynx.Data.Character.NucleotideX.NucleotideX instance GHC.Enum.Enum ELynx.Data.Character.NucleotideX.NucleotideX instance GHC.Classes.Ord ELynx.Data.Character.NucleotideX.NucleotideX instance GHC.Classes.Eq ELynx.Data.Character.NucleotideX.NucleotideX instance GHC.Read.Read ELynx.Data.Character.NucleotideX.NucleotideX instance GHC.Show.Show ELynx.Data.Character.NucleotideX.NucleotideX -- | Creation date: Thu May 16 07:58:50 2019. -- -- The different universal codes. - -- https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c -- - http://www.bioinformatics.org/sms2/genetic_code.html - -- https://en.wikipedia.org/wiki/Genetic_code module ELynx.Data.Character.Codon -- | Codons are triplets of characters. newtype Codon a Codon :: (a, a, a) -> Codon a -- | Unsafe conversion from vector with at least three elements; only the -- first three elements are used, the rest is discarded. unsafeFromVec :: Vector v a => v a -> Codon a -- | Universal codes. data UniversalCode Standard :: UniversalCode VertebrateMitochondrial :: UniversalCode -- | Translate a codon to amino acids including translation stops. translate :: UniversalCode -> Codon Nucleotide -> AminoAcidS -- | Translate a codon to amino acids including translation stops. -- Translate codons including gaps to amino acid gaps. Be careful, single -- or two character gaps can lead to a reading frame shift and hence, the -- translated sequence may be bogus. translateX :: UniversalCode -> Codon NucleotideX -> AminoAcidS -- | Translate a codon to amino acids including translation stops. -- Translate gap triplets to amino acid gaps, and triplets including -- unknowns to amino acid unknowns. Be careful, also translates other -- IUPAC characters to amino acid Xs! translateI :: UniversalCode -> Codon NucleotideI -> AminoAcidI instance GHC.Generics.Generic ELynx.Data.Character.Codon.UniversalCode instance GHC.Enum.Bounded ELynx.Data.Character.Codon.UniversalCode instance GHC.Enum.Enum ELynx.Data.Character.Codon.UniversalCode instance GHC.Classes.Ord ELynx.Data.Character.Codon.UniversalCode instance GHC.Classes.Eq ELynx.Data.Character.Codon.UniversalCode instance GHC.Read.Read ELynx.Data.Character.Codon.UniversalCode instance GHC.Show.Show ELynx.Data.Character.Codon.UniversalCode instance GHC.Classes.Ord a => GHC.Classes.Ord (ELynx.Data.Character.Codon.Codon a) instance GHC.Classes.Eq a => GHC.Classes.Eq (ELynx.Data.Character.Codon.Codon a) instance GHC.Read.Read a => GHC.Read.Read (ELynx.Data.Character.Codon.Codon a) instance GHC.Show.Show a => GHC.Show.Show (ELynx.Data.Character.Codon.Codon a) instance Data.Aeson.Types.FromJSON.FromJSON ELynx.Data.Character.Codon.UniversalCode instance Data.Aeson.Types.ToJSON.ToJSON ELynx.Data.Character.Codon.UniversalCode -- | Creation date: Fri Oct 5 23:00:17 2018. module ELynx.Data.Sequence.Defaults -- | Space reserved for sequence names when printing them. nameWidth :: Int -- | The length shown when summarizing sequences. summaryLength :: Int -- | How many sequences are shown in summary. summaryNSequences :: Int -- | Field width for tables. fieldWidth :: Int -- | Creation date: Thu Oct 4 18:54:51 2018. -- -- This module is to be imported qualified. module ELynx.Data.Sequence.Sequence -- | For now, Names are just ByteStrings. type Name = ByteString -- | The description of a sequence. type Description = ByteString -- | The vector of characters of a sequence. type Characters = Vector Character -- | Sequences have a name, a possibly empty description, a code and -- hopefully a lot of data. data Sequence Sequence :: Name -> Description -> Alphabet -> Characters -> Sequence [name] :: Sequence -> Name [description] :: Sequence -> Description [alphabet] :: Sequence -> Alphabet [characters] :: Sequence -> Characters -- | Convert byte string to sequence characters. fromByteString :: ByteString -> Characters -- | Convert sequence characters to byte string. toByteString :: Characters -> ByteString -- | A short description of the sequence. header :: [Sequence] -> ByteString -- | Trim and show a Sequence. summarize :: Sequence -> ByteString -- | Trim and show a list of Sequences. summarizeSequences :: [Sequence] -> ByteString -- | Trim and show a list of Sequences. body :: [Sequence] -> ByteString -- | Calculate length of Sequence. length :: Sequence -> Int -- | Check if all Sequences have equal length. equalLength :: [Sequence] -> Bool -- | Find the longest Sequence in a list. longest :: [Sequence] -> Sequence -- | Trim to given length. trim :: Int -> Sequence -> Sequence -- | Concatenate two sequences. Names have to match. concat :: Sequence -> Sequence -> Sequence -- | Concatenate a list of sequences, see concat. concatSequences :: [[Sequence]] -> [Sequence] -- | Only take Sequences that are shorter than a given number. filterShorterThan :: Int -> [Sequence] -> [Sequence] -- | Only take Sequences that are longer than a given number. filterLongerThan :: Int -> [Sequence] -> [Sequence] -- | Only take Sequences that contain at least on non-IUPAC -- character. filterStandard :: [Sequence] -> [Sequence] instance GHC.Classes.Eq ELynx.Data.Sequence.Sequence.Sequence instance GHC.Show.Show ELynx.Data.Sequence.Sequence.Sequence -- | Creation date: Thu Oct 4 18:40:18 2018. -- -- This module is to be imported qualified. module ELynx.Data.Sequence.Alignment -- | A collection of sequences. data Alignment Alignment :: [Name] -> [Description] -> Alphabet -> Matrix Character -> Alignment [names] :: Alignment -> [Name] [descriptions] :: Alignment -> [Description] [alphabet] :: Alignment -> Alphabet [matrix] :: Alignment -> Matrix Character -- | Number of sites. length :: Alignment -> Int -- | Number of sequences. nSequences :: Alignment -> Int -- | Create Alignment from a list of Sequences. fromSequences :: [Sequence] -> Either String Alignment -- | Conversion to list of Sequences. toSequences :: Alignment -> [Sequence] -- | Similar to summarizeSequenceList but with different Header. summarize :: Alignment -> ByteString -- | Join two Alignments vertically. That is, add more sequences to -- an alignment. See also concat. join :: Alignment -> Alignment -> Alignment -- | Concatenate two Alignments horizontally. That is, add more -- sites to an alignment. See also join. concat :: Alignment -> Alignment -> Alignment -- | Concatenate a list of Alignments horizontally. See -- concat. concatAlignments :: [Alignment] -> Alignment -- | Only keep columns with standard characters. Alignment columns with -- IUPAC characters are removed. filterColsOnlyStd :: Alignment -> Alignment -- | Filter columns with proportion of standard character larger than given -- number. filterColsStd :: Double -> Alignment -> Alignment -- | Only keep columns without gaps or unknown characters. filterColsNoGaps :: Alignment -> Alignment -- | Frequency data; do not store the actual characters, but their -- frequencies. The matrix is of size N x K, where N is -- the number of sites, and K is the number of characters. type FrequencyData = Matrix Double -- | Calculate the distribution of characters. distribution :: FrequencyData -> [Double] -- | Calculcate frequency of characters at each site of a multi sequence -- alignment. toFrequencyData :: Alignment -> FrequencyData -- | Diversity analysis. See kEffEntropy. kEffEntropy :: FrequencyData -> [Double] -- | Diversity analysis. See kEffEntropy. kEffHomoplasy :: FrequencyData -> [Double] -- | Count the number of standard (i.e., not extended IUPAC) characters in -- the alignment. countIUPACChars :: Alignment -> Int -- | Count the number of gaps in the alignment. countGaps :: Alignment -> Int -- | Count the number of unknown characters in the alignment. countUnknowns :: Alignment -> Int -- | Sample the given sites from a multi sequence alignment. subSample :: [Int] -> Alignment -> Alignment -- | Randomly sample a given number of sites of the multi sequence -- alignment. randomSubSample :: PrimMonad m => Int -> Alignment -> Gen (PrimState m) -> m Alignment instance GHC.Classes.Eq ELynx.Data.Sequence.Alignment.Alignment instance GHC.Show.Show ELynx.Data.Sequence.Alignment.Alignment -- | Creation date: Fri May 17 13:49:18 2019. module ELynx.Data.Sequence.Translate -- | Translate a sequence from DNA or DNAX to -- ProteinS. translateSeq :: UniversalCode -> Int -> Sequence -> Sequence -- | Write FASTA files. -- -- NCBI file specifications. module ELynx.Export.Sequence.Fasta -- | Convert a Sequence to Fasta format. sequenceToFasta :: Sequence -> ByteString -- | Convert a list Sequences to Fasta format. A newline is added -- between any two Sequences. sequencesToFasta :: [Sequence] -> ByteString -- | Parse FASTA files. -- -- NCBI file specifications. module ELynx.Import.Sequence.Fasta -- | Parse a sequence of characters. fastaSequence :: Alphabet -> Parser Sequence -- | Parse a Fasta file with given Alphabet. fasta :: Alphabet -> Parser [Sequence]