-- | Provides detailed information on isostericity of RNA basepairs. All data
-- is extracted from csv files which were created from supplemental files in:
--
-- @
-- Frequency and isostericity of RNA base pairs
-- Jesse Stombaugh, Craig L. Zirbel, Eric Westhof, and Neocles B. Leontis
-- Nucl. Acids Res. (2009)
-- doi:10.1093/nar/gkp011
-- @
--

module Biobase.Secondary.Isostericity where

import           Data.ByteString.Char8 (ByteString)
import           Data.FileEmbed (makeRelativeToProject, embedFile)
import           Data.Function (on)
import           Data.List
import           Data.Tuple.Select
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M
import           Text.CSV

import           Biobase.Primary.Nuc
import           Biobase.Secondary.Basepair



-- | Methods to determine the isostericity classes for a given basepair type,
-- or alternatively which basepair types are in a certain isostericity class.
--
-- TODO This requires a major cleanup: right now we are handling 'String's as
-- class descriptors, but we should really be newtype-wrapping or create enum
-- data constructors.

class IsostericityLookup a where
  -- | To which classes does a basepair+type belong
  getClasses :: a -> [String] -- TODO this should return [Class]
  -- | What basepairs+type are in a particular class
  inClass :: String -> [a]

-- | For extended basepairs, we take the default mapping and go from there.
--
-- TODO inClass missing

instance IsostericityLookup (ExtPair n) where
  getClasses :: ExtPair n -> [String]
getClasses ExtPair n
p
    | Just [String]
cs <- ExtPair n -> Map (ExtPair n) [String] -> Maybe [String]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup ExtPair n
p Map (ExtPair n) [String]
forall k (n :: k). Map (ExtPair n) [String]
defaultIsostericityMap
    = [String]
cs
    | Bool
otherwise = []
  inClass :: String -> [ExtPair n]
inClass String
x = ((ExtPair n, [String]) -> ExtPair n)
-> [(ExtPair n, [String])] -> [ExtPair n]
forall a b. (a -> b) -> [a] -> [b]
map (ExtPair n, [String]) -> ExtPair n
forall a b. (a, b) -> a
fst ([(ExtPair n, [String])] -> [ExtPair n])
-> ([(ExtPair n, [String])] -> [(ExtPair n, [String])])
-> [(ExtPair n, [String])]
-> [ExtPair n]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((ExtPair n, [String]) -> Bool)
-> [(ExtPair n, [String])] -> [(ExtPair n, [String])]
forall a. (a -> Bool) -> [a] -> [a]
filter ((String
x String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`)([String] -> Bool)
-> ((ExtPair n, [String]) -> [String])
-> (ExtPair n, [String])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(ExtPair n, [String]) -> [String]
forall a b. (a, b) -> b
snd) ([(ExtPair n, [String])] -> [ExtPair n])
-> [(ExtPair n, [String])] -> [ExtPair n]
forall a b. (a -> b) -> a -> b
$ Map (ExtPair n) [String] -> [(ExtPair n, [String])]
forall k a. Map k a -> [(k, a)]
M.assocs Map (ExtPair n) [String]
forall k (n :: k). Map (ExtPair n) [String]
defaultIsostericityMap

-- | Normal basepairs are assumed to have cWW basepairing.
--
-- TODO inClass missing

instance IsostericityLookup (Pair n) where
  getClasses :: Pair n -> [String]
getClasses Pair n
p
    | Just [String]
cs <- (Pair n, (CTisomerism, Edge, Edge))
-> Map (Pair n, (CTisomerism, Edge, Edge)) [String]
-> Maybe [String]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (Pair n
p,(CTisomerism, Edge, Edge)
CWW) Map (Pair n, (CTisomerism, Edge, Edge)) [String]
forall k (n :: k). Map (ExtPair n) [String]
defaultIsostericityMap
    = [String]
cs
    | Bool
otherwise = []
  inClass :: String -> [Pair n]
inClass String
x = (((Pair n, (CTisomerism, Edge, Edge)), [String]) -> Pair n)
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])] -> [Pair n]
forall a b. (a -> b) -> [a] -> [b]
map ((Pair n, (CTisomerism, Edge, Edge)) -> Pair n
forall a b. Sel1 a b => a -> b
sel1 ((Pair n, (CTisomerism, Edge, Edge)) -> Pair n)
-> (((Pair n, (CTisomerism, Edge, Edge)), [String])
    -> (Pair n, (CTisomerism, Edge, Edge)))
-> ((Pair n, (CTisomerism, Edge, Edge)), [String])
-> Pair n
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Pair n, (CTisomerism, Edge, Edge)), [String])
-> (Pair n, (CTisomerism, Edge, Edge))
forall a b. (a, b) -> a
fst)            -- remove extended information
            ([((Pair n, (CTisomerism, Edge, Edge)), [String])] -> [Pair n])
-> ([((Pair n, (CTisomerism, Edge, Edge)), [String])]
    -> [((Pair n, (CTisomerism, Edge, Edge)), [String])])
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
-> [Pair n]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (((Pair n, (CTisomerism, Edge, Edge)), [String]) -> Bool)
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
forall a. (a -> Bool) -> [a] -> [a]
filter (((CTisomerism, Edge, Edge)
CWW(CTisomerism, Edge, Edge) -> (CTisomerism, Edge, Edge) -> Bool
forall a. Eq a => a -> a -> Bool
==)((CTisomerism, Edge, Edge) -> Bool)
-> (((Pair n, (CTisomerism, Edge, Edge)), [String])
    -> (CTisomerism, Edge, Edge))
-> ((Pair n, (CTisomerism, Edge, Edge)), [String])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Pair n, (CTisomerism, Edge, Edge)) -> (CTisomerism, Edge, Edge)
forall a b. (a, b) -> b
snd ((Pair n, (CTisomerism, Edge, Edge)) -> (CTisomerism, Edge, Edge))
-> (((Pair n, (CTisomerism, Edge, Edge)), [String])
    -> (Pair n, (CTisomerism, Edge, Edge)))
-> ((Pair n, (CTisomerism, Edge, Edge)), [String])
-> (CTisomerism, Edge, Edge)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Pair n, (CTisomerism, Edge, Edge)), [String])
-> (Pair n, (CTisomerism, Edge, Edge))
forall a b. (a, b) -> a
fst) -- keep only cWW pairs (baseT-ype)
            ([((Pair n, (CTisomerism, Edge, Edge)), [String])]
 -> [((Pair n, (CTisomerism, Edge, Edge)), [String])])
-> ([((Pair n, (CTisomerism, Edge, Edge)), [String])]
    -> [((Pair n, (CTisomerism, Edge, Edge)), [String])])
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (((Pair n, (CTisomerism, Edge, Edge)), [String]) -> Bool)
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
forall a. (a -> Bool) -> [a] -> [a]
filter ((String
x String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`)([String] -> Bool)
-> (((Pair n, (CTisomerism, Edge, Edge)), [String]) -> [String])
-> ((Pair n, (CTisomerism, Edge, Edge)), [String])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.((Pair n, (CTisomerism, Edge, Edge)), [String]) -> [String]
forall a b. (a, b) -> b
snd)     -- select based on class
            ([((Pair n, (CTisomerism, Edge, Edge)), [String])] -> [Pair n])
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])] -> [Pair n]
forall a b. (a -> b) -> a -> b
$ Map (Pair n, (CTisomerism, Edge, Edge)) [String]
-> [((Pair n, (CTisomerism, Edge, Edge)), [String])]
forall k a. Map k a -> [(k, a)]
M.assocs Map (Pair n, (CTisomerism, Edge, Edge)) [String]
forall k (n :: k). Map (ExtPair n) [String]
defaultIsostericityMap


-- ** default data

-- | The default isostericity mapping.

defaultIsostericityMap :: Map (ExtPair n) [String]
defaultIsostericityMap = [[[String]]] -> Map (ExtPair n) [String]
forall k (n :: k). [[[String]]] -> Map (ExtPair n) [String]
mkIsostericityMap [[[String]]]
parsedCSV

-- | Mapping of (pair,pairtype) to isostericity class.

mkIsostericityMap :: [[[String]]] -> Map (ExtPair n) [String]
mkIsostericityMap = ([String] -> [String] -> [String])
-> [(ExtPair n, [String])] -> Map (ExtPair n) [String]
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith (\[String]
x [String]
y -> [String] -> [String]
forall a. Eq a => [a] -> [a]
nub ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ [String]
x[String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++[String]
y) ([(ExtPair n, [String])] -> Map (ExtPair n) [String])
-> ([[[String]]] -> [(ExtPair n, [String])])
-> [[[String]]]
-> Map (ExtPair n) [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[[String]]] -> [(ExtPair n, [String])]
forall k (n :: k). [[[String]]] -> [(ExtPair n, [String])]
mkIsostericityList

-- | Process CSV list-of-lists to get the isostericity data.

mkIsostericityList :: [[[String]]] -> [(ExtPair n, [String])]
mkIsostericityList :: [[[String]]] -> [(ExtPair n, [String])]
mkIsostericityList [[[String]]]
gs = ((ExtPair n, [String]) -> (ExtPair n, [String]) -> Bool)
-> [(ExtPair n, [String])] -> [(ExtPair n, [String])]
forall a. (a -> a -> Bool) -> [a] -> [a]
nubBy (ExtPair n -> ExtPair n -> Bool
forall a. Eq a => a -> a -> Bool
(==) (ExtPair n -> ExtPair n -> Bool)
-> ((ExtPair n, [String]) -> ExtPair n)
-> (ExtPair n, [String])
-> (ExtPair n, [String])
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (ExtPair n, [String]) -> ExtPair n
forall a b. (a, b) -> a
fst) ([(ExtPair n, [String])] -> [(ExtPair n, [String])])
-> ([[[String]]] -> [(ExtPair n, [String])])
-> [[[String]]]
-> [(ExtPair n, [String])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((ExtPair n, [String]) -> [(ExtPair n, [String])])
-> [(ExtPair n, [String])] -> [(ExtPair n, [String])]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (ExtPair n, [String]) -> [(ExtPair n, [String])]
forall a a b b.
(((a, a), (a, b, b)), b) -> [(((a, a), (a, b, b)), b)]
turn ([(ExtPair n, [String])] -> [(ExtPair n, [String])])
-> ([[[String]]] -> [(ExtPair n, [String])])
-> [[[String]]]
-> [(ExtPair n, [String])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([[String]] -> [(ExtPair n, [String])])
-> [[[String]]] -> [(ExtPair n, [String])]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap [[String]] -> [(ExtPair n, [String])]
forall k k b (n :: k) (n :: k).
Read b =>
[[String]] -> [(((Letter RNA n, Letter RNA n), b), [String])]
f ([[[String]]] -> [(ExtPair n, [String])])
-> [[[String]]] -> [(ExtPair n, [String])]
forall a b. (a -> b) -> a -> b
$ [[[String]]]
gs where
  f :: [[String]] -> [(((Letter RNA n, Letter RNA n), b), [String])]
f [[String]]
g = ((String, [String])
 -> (((Letter RNA n, Letter RNA n), b), [String]))
-> [(String, [String])]
-> [(((Letter RNA n, Letter RNA n), b), [String])]
forall a b. (a -> b) -> [a] -> [b]
map (\(String, [String])
e ->  ( ( let [Char
x,Char
y] = (String, [String]) -> String
forall a b. (a, b) -> a
fst (String, [String])
e
                        in (Char -> Letter RNA n
forall k (n :: k). Char -> Letter RNA n
charRNA Char
x, Char -> Letter RNA n
forall k (n :: k). Char -> Letter RNA n
charRNA Char
y), String -> b
forall a. Read a => String -> a
read String
bpt
                      )
                    , [String] -> [String]
forall a. Eq a => [a] -> [a]
nub ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ (String, [String]) -> [String]
forall a b. (a, b) -> b
snd (String, [String])
e)
            ) ([(String, [String])]
 -> [(((Letter RNA n, Letter RNA n), b), [String])])
-> [(String, [String])]
-> [(((Letter RNA n, Letter RNA n), b), [String])]
forall a b. (a -> b) -> a -> b
$ ([String] -> (String, [String]))
-> [[String]] -> [(String, [String])]
forall a b. (a -> b) -> [a] -> [b]
map [String] -> (String, [String])
entry [[String]]
xs where
    bpt :: String
bpt = [String] -> String
forall a. [a] -> a
head ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [[String]] -> [String]
forall a. [a] -> a
head [[String]]
g
    xs :: [[String]]
xs = [[String]] -> [[String]]
forall a. [a] -> [a]
tail [[String]]
g
    entry :: [String] -> (String, [String])
entry [String]
x = ([String]
x[String] -> Int -> String
forall a. [a] -> Int -> a
!!Int
0, (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
filter (\Char
z -> Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char
z Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
bracket)) ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char
'I' Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
2 ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ [String]
x)
  bracket :: String
  bracket :: String
bracket = String
"()"
  turn :: (((a, a), (a, b, b)), b) -> [(((a, a), (a, b, b)), b)]
turn entry :: (((a, a), (a, b, b)), b)
entry@(((a
x,a
y),(a
wc,b
tx,b
ty)), b
cs) = [(((a, a), (a, b, b)), b)
entry, (((a
y,a
x),(a
wc,b
ty,b
tx)), b
cs)]

-- | Simple parsing of raw CSV data.

parsedCSV :: [[[Field]]]
parsedCSV :: [[[String]]]
parsedCSV = ([[String]] -> Bool) -> [[[String]]] -> [[[String]]]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> ([[String]] -> Bool) -> [[String]] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[String]] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null) [[[String]]]
gs where
  gs :: [[[String]]]
gs = ([[String]] -> [[String]]) -> [[[String]]] -> [[[String]]]
forall a b. (a -> b) -> [a] -> [b]
map (([String] -> Bool) -> [[String]] -> [[String]]
forall a. (a -> Bool) -> [a] -> [a]
filter ((String
""String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/=)(String -> Bool) -> ([String] -> String) -> [String] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.[String] -> String
forall a. [a] -> a
head)) ([[[String]]] -> [[[String]]])
-> ([[String]] -> [[[String]]]) -> [[String]] -> [[[String]]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([String] -> [String] -> Bool) -> [[String]] -> [[[String]]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (\[String]
x [String]
y -> String
""String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= ([String] -> String
forall a. [a] -> a
head [String]
y)) ([[String]] -> [[[String]]]) -> [[String]] -> [[[String]]]
forall a b. (a -> b) -> a -> b
$ [[String]]
csv
  Right [[String]]
csv = String -> String -> Either ParseError [[String]]
parseCSV String
"isostericity/detailed" (String -> Either ParseError [[String]])
-> String -> Either ParseError [[String]]
forall a b. (a -> b) -> a -> b
$ ByteString -> String
BS.unpack ByteString
detailedCSV



-- ** Raw embeddings

-- | Raw CSV data, embedded into the library.

detailedCSV :: ByteString
detailedCSV :: ByteString
detailedCSV = $(makeRelativeToProject "sources/isostericity-detailed.csv" >>= embedFile)