{- TooManyCells.MakeTree.Preprocess
Gregory W. Schwartz

Collects functions pertaining to preprocessing the data.
-}

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}

module TooManyCells.Matrix.Preprocess
    ( scaleRMat
    , scaleDenseMat
    , scaleSparseMat
    , totalScaleSparseMat
    , logCPMSparseMat
    , uqScaleSparseMat
    , medScaleSparseMat
    , minMaxNormSparseMat
    , transposeSparseMat
    , quantileScaleSparseMat
    , centerScaleSparseCell
    , tfidfScaleSparseMat
    , filterRMat
    , filterDenseMat
    , filterNumSparseMat
    , filterWhitelistSparseMat
    , getCellWhitelist
    , featureSelectionRandomForest
    , removeCorrelated
    , pcaRMat
    , pcaDenseSc
    , pcaSparseSc
    , lsaSparseSc
    , svdSparseSc
    , shiftPositiveSc
    , pcaDenseMat
    , pcaSparseMat
    , lsaSparseMat
    , svdSparseMat
    , shiftPositiveMat
    , emptyMatErr
    , labelRows
    , labelCols
    , fastBinJoinRows
    , fastBinJoinCols
    , transformChrRegions
    ) where

-- Remote
import BirchBeer.Types (LabelMap (..), Id (..), Label (..), Feature (..))
import Data.Bool (bool)
import Data.List (sort, foldl', transpose)
import Data.Monoid ((<>))
import Data.Maybe (fromMaybe, isJust)
import H.Prelude (io)
import Language.R as R
import Language.R.QQ (r)
import Math.Clustering.Spectral.Sparse (B1 (..), B2 (..), b1ToB2)
import Statistics.Quantile (quantile, s)
import Statistics.Sample (mean, stdDev)
import TextShow (showt)
import qualified Control.Foldl as Fold
import qualified Control.Lens as L
import qualified Data.HashMap.Strict as HMap
import qualified Data.HashSet as HSet
import qualified Data.IntMap as IMap
import qualified Data.IntSet as ISet
import qualified Data.IntervalMap.Interval as IntervalMap
import qualified Data.IntervalMap.Strict as IntervalMap
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Sparse.Common as S
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Radix as V
import qualified Data.Vector.Storable as VS
import qualified Math.Clustering.Spectral.Sparse as S
import qualified Numeric.LinearAlgebra as H
import qualified Numeric.LinearAlgebra.Devel as H
import qualified Numeric.LinearAlgebra.HMatrix as H
import qualified Numeric.LinearAlgebra.SVD.SVDLIBC as SVD

-- Local
import TooManyCells.File.Types
import TooManyCells.Matrix.Types
import TooManyCells.Matrix.Utility

-- | Empty matrix error.
emptyMatErr :: String -> String
emptyMatErr checkType = "Matrix is empty in " <> checkType <> ". Check --filter-thresholds, --normalization, or the input matrix for over filtering or incorrect input format."

-- | Check if valid features or cells are empty, error if so.
emptyMatCheckErr :: String -> [a] -> [a]
emptyMatCheckErr checkType xs = bool xs (error $ emptyMatErr checkType) . null $ xs

-- | Scale a matrix.
scaleRMat :: RMatObsRow s -> R s (RMatObsRow s)
scaleRMat (RMatObsRow mat) = do
    fmap
        RMatObsRow
        [r| mat = scale(t(mat_hs));
            t(mat[,colSums(!is.na(mat)) > 0])
        |]

-- | Scale a matrix based on the library size for each cell and median feature
-- size.
scaleDenseMat :: MatObsRow -> MatObsRow
scaleDenseMat (MatObsRow mat) = MatObsRow
                              . hToSparseMat
                              . H.fromColumns
                              . fmap scaleDenseMol
                              . H.toColumns
                              . H.fromRows
                              . fmap scaleDenseCell
                              . H.toRows
                              . sparseToHMat
                              $ mat

-- | Scale a matrix based on the library size for each cell and median feature
-- size.
scaleSparseMat :: MatObsRow -> MatObsRow
scaleSparseMat (MatObsRow mat) = MatObsRow
                               . S.sparsifySM
                               . S.fromColsL
                               . fmap scaleSparseMol
                               . S.toRowsL -- A bit confusing, but this returns the original columns due to the earlier fromColsL . toRowsL.
                               . S.fromColsL
                               . fmap scaleSparseCell
                               . S.toRowsL
                               $ mat

-- | Scale a matrix based on the library size.
totalScaleSparseMat :: MatObsRow -> MatObsRow
totalScaleSparseMat = MatObsRow
                    . S.sparsifySM
                    . S.fromRowsL
                    . fmap scaleSparseCell
                    . S.toRowsL
                    . unMatObsRow

-- | Scale a matrix based on the upper quartile.
uqScaleSparseMat :: MatObsRow -> MatObsRow
uqScaleSparseMat (MatObsRow mat) = MatObsRow
                                 . S.sparsifySM
                                 . S.fromRowsL
                                 . fmap uqScaleSparseCell
                                 . S.toRowsL
                                 $ mat

-- | Scale a matrix based on log(CPM + 1).
logCPMSparseMat :: Double -> MatObsRow -> MatObsRow
logCPMSparseMat b (MatObsRow mat) = MatObsRow
                                  . S.sparsifySM
                                  . S.fromRowsL
                                  . fmap (logCPMSparseCell b)
                                  . S.toRowsL
                                  $ mat

-- | Scale a matrix based on the median.
medScaleSparseMat :: MatObsRow -> MatObsRow
medScaleSparseMat (MatObsRow mat) = MatObsRow
                                 . S.sparsifySM
                                 . S.fromRowsL
                                 . fmap medScaleSparseCell
                                 . S.toRowsL
                                 $ mat

-- | Scale a matrix based quantile normalization. Will ignore 0s if missing from
-- input.
quantileScaleSparseMat :: MatObsRow -> MatObsRow
quantileScaleSparseMat (MatObsRow mat) =
  MatObsRow
    . S.sparsifySM
    . fmap (flip (IMap.findWithDefault 0) totalRankMap)
    $ rankMat
  where
    totalRankMap = IMap.fromList
                 . zip [1,2..]
                 . fmap (Fold.fold avg)
                 . transpose
                 . fmap (sort . fmap snd . S.toListSV)
                 . S.toRowsL
                 $ mat
    avg = (/) <$> Fold.sum <*> Fold.genericLength
    rankMat :: S.SpMatrix Int
    rankMat = S.fromRowsL . fmap rankTransform . S.toRowsL $ mat
    rankTransform :: S.SpVector Double -> S.SpVector Int
    rankTransform xs = fmap (flip (Map.findWithDefault 0) rankMap) xs
      where
        rankMap :: Map.Map Double Int
        rankMap = Map.fromList
                . flip zip [1,2..]
                . Set.toList
                . Set.fromList
                . sort
                . fmap snd
                . S.toListSV
                $ xs

-- | TF-IDF normalization.
tfidfScaleSparseMat :: MatObsRow -> MatObsRow
tfidfScaleSparseMat = MatObsRow . unB2 . b1ToB2 . B1 . unMatObsRow

-- | Scale matrix rows based on min-max normalization.
minMaxNormSparseMat :: MatObsRow -> MatObsRow
minMaxNormSparseMat (MatObsRow mat) =
  MatObsRow
    . S.sparsifySM
    . S.fromRowsL
    . fmap (\x -> fromMaybe x $ minMaxNormSV x)
    . S.toRowsL
    $ mat

-- | Transpose matrix (helper to apply normalizations to columns instead, for instance).
transposeSparseMat :: MatObsRow -> MatObsRow
transposeSparseMat (MatObsRow mat) = MatObsRow . S.transposeSM $ mat

-- | Scale a cell by the library size.
scaleDenseCell :: H.Vector H.R -> H.Vector H.R
scaleDenseCell xs = H.cmap (/ total) xs
  where
    total = H.sumElements xs

-- | Scale a cell by the library size.
scaleSparseCell :: S.SpVector Double -> S.SpVector Double
scaleSparseCell xs = fmap (/ total) xs
  where
    total = foldl' (+) 0 xs

-- | log(CPM + 1) normalization for cell.
logCPMSparseCell :: Double -> S.SpVector Double -> S.SpVector Double
logCPMSparseCell b xs = fmap (logBase b . (+ 1) . cpm) xs
  where
    cpm x = x / tpm
    tpm = foldl' (+) 0 xs / 1000000

-- | Upper quartile scale cells.
uqScaleSparseCell :: S.SpVector Double -> S.SpVector Double
uqScaleSparseCell xs = fmap (/ uq) xs
  where
    uq = quantile s 3 4
       . VS.fromList
       . filter (/= 0)
       . fmap snd
       . S.toListSV
       $ xs

-- | Median scale cells.
medScaleSparseCell :: S.SpVector Double -> S.SpVector Double
medScaleSparseCell xs = fmap (/ med) xs
  where
    med = quantile s 2 4
        . VS.fromList
        . filter (/= 0)
        . fmap snd
        . S.toListSV
        $ xs

-- | Center and scale a cell.
centerScaleSparseCell :: S.SpVector Double -> S.SpVector Double
centerScaleSparseCell xs = fmap standard xs
  where
    standard x
      | sigma == 0 = 0
      | otherwise  = (x - mu) / sigma
    mu = mean v
    sigma = stdDev v
    v = S.toVector xs

-- | Center a sparse vector.
centerSparseVector :: S.SpVector Double -> S.SpVector Double
centerSparseVector xs = fmap center xs
  where
    center x = x - mu
    mu = mean $ S.toVector xs

-- | Median scale molecules across cells.
scaleDenseMol :: H.Vector H.R -> H.Vector H.R
scaleDenseMol xs = H.cmap (/ med) xs
  where
    med = quantile s 2 4 . VS.filter (/= 0) $ xs

-- | Median scale molecules across cells.
scaleSparseMol :: S.SpVector Double -> S.SpVector Double
scaleSparseMol xs = fmap (/ med) xs
  where
    med = quantile s 2 4
        . VS.fromList
        . filter (/= 0)
        . fmap snd
        . S.toListSV
        $ xs

-- | Min-max normalization of sparse vector.
minMaxNormSV :: S.SpVector Double -> Maybe (S.SpVector Double)
minMaxNormSV xs = do
  (mi, ma) <-
    L.sequenceOf L.both $ Fold.fold ((,) <$> Fold.minimum <*> Fold.maximum) xs
  pure $ fmap (\x -> (x - mi) / (ma - mi)) xs

-- | Filter a matrix to remove low count cells and features.
filterDenseMat :: FilterThresholds -> SingleCells -> SingleCells
filterDenseMat (FilterThresholds (rowThresh, colThresh)) sc =
    SingleCells { _matrix   = m
                , _rowNames = r
                , _colNames = c
                }
  where
    m = MatObsRow . hToSparseMat $ filteredMat
    rowFilter = (>= rowThresh) . H.sumElements
    colFilter = (>= colThresh) . H.sumElements
    mat            = sparseToHMat . unMatObsRow . _matrix $ sc
    validRows = ISet.fromList
              . emptyMatCheckErr "cells"
              . fmap fst
              . filter (rowFilter . snd)
              . zip [0..]
              . H.toRows
              $ mat
    validCols = ISet.fromList
              . emptyMatCheckErr "features"
              . fmap fst
              . filter (colFilter . snd)
              . zip [0..]
              . H.toColumns
              $ mat
    filteredMat = mat
             H.?? ( H.Pos (H.idxs (ISet.toAscList validRows))
                  , H.Pos (H.idxs (ISet.toAscList validCols))
                  )
    r = V.ifilter (\i _ -> ISet.member i validRows)
      . _rowNames
      $ sc
    c = V.ifilter (\i _ -> ISet.member i validCols)
      . _colNames
      $ sc

-- | Filter a matrix to remove low count cells and features.
filterNumSparseMat :: FilterThresholds -> SingleCells -> SingleCells
filterNumSparseMat (FilterThresholds (rowThresh, colThresh)) sc =
    SingleCells { _matrix   = m
                , _rowNames = r
                , _colNames = c
                }
  where
    m = MatObsRow colFilteredMat
    rowFilter = (>= rowThresh) . foldl' (+) 0
    colFilter = (>= colThresh) . foldl' (+) 0
    mat            = unMatObsRow . _matrix $ sc
    validRows = ISet.fromList
              . emptyMatCheckErr "cells"
              . fmap fst
              . filter (rowFilter . snd)
              . zip [0..]
              . S.toRowsL
              $ mat
    validCols = ISet.fromList
              . emptyMatCheckErr "features"
              . fmap fst
              . filter (colFilter . snd)
              . zip [0..]
              . S.toRowsL
              . S.transposeSM -- toRowsL is much faster.
              $ mat
    rowFilteredMat = S.fromRowsL -- fromRowsL fixed.
                   . fmap snd
                   . filter (flip ISet.member validRows . fst)
                   . zip [0..]
                   . S.toRowsL
                   $ mat
    colFilteredMat = S.fromColsL
                   . fmap snd
                   . filter (flip ISet.member validCols . fst)
                   . zip [0..]
                   . S.toRowsL -- Rows of transpose are faster.
                   . S.transposeSM
                   $ rowFilteredMat
    r = V.ifilter (\i _ -> ISet.member i validRows)
      . _rowNames
      $ sc
    c = V.ifilter (\i _ -> ISet.member i validCols)
      . _colNames
      $ sc

-- | Filter a matrix to keep whitelist cells.
filterWhitelistSparseMat :: CellWhitelist
                         -> SingleCells
                         -> SingleCells
filterWhitelistSparseMat (CellWhitelist wl) sc =
    sc { _matrix   = m
       , _rowNames = r
       }
  where
    m = MatObsRow rowFilteredMat
    mat            = unMatObsRow . _matrix $ sc
    validIdx       = V.modify V.sort
                   . V.map fst
                   . V.filter (\(_, (Cell !c)) -> HSet.member c wl)
                   . V.imap (\i v -> (i, v))
                   . _rowNames
                   $ sc
    rowFilteredMat = S.fromRowsL -- fromRowsL fixed.
                   . fmap (S.extractRow mat)
                   . emptyMatCheckErr "cells"
                   . V.toList
                   $ validIdx
    r = V.map (\x -> fromMaybe (error $ "\nWhitelist row index out of bounds (do the whitelist barcodes match the data?): " <> show x <> " out of " <> (show . length . _rowNames $ sc))
                   . (V.!?) (_rowNames sc)
                   $ x
              )
      $ validIdx

-- | Get a cell white list from a file.
getCellWhitelist :: CellWhitelistFile -> IO CellWhitelist
getCellWhitelist (CellWhitelistFile file) = do
    contents <- T.readFile file

    let whiteList = CellWhitelist
                  . HSet.fromList
                  . filter (not . T.null)
                  . T.lines
                  $ contents

    return whiteList

-- | Filter a matrix to remove low count cells. R version.
filterRMat :: RMatObsRow s -> R s (RMatObsRow s)
filterRMat (RMatObsRow mat) =
    fmap RMatObsRow [r| mat = mat_hs[,colSums(mat_hs) >= 250] |]

-- | Perform feature selection on a matrix.
featureSelectionRandomForest :: RMatObsRow s -> R s (RMatObsRow s)
featureSelectionRandomForest (RMatObsRow mat) = do
    [r| suppressPackageStartupMessages(library(randomForest)) |]

    importance   <- [r| randomForest(mat_hs, na.action = na.omit)$importance |]
    importantMat <- [r| mat_hs[,importance_hs > sort(importance_hs, decreasing = TRUE)[10]] |]

    return . RMatObsRow $ importantMat

-- | Remove highly correlated (> 0.6) variables in a matrix.
removeCorrelated :: RMatObsRow s -> R s (RMatObsRow s)
removeCorrelated (RMatObsRow mat) = do
    [r| suppressPackageStartupMessages(library(caret)) |]

    cor   <- [r| cor(mat_hs) |]
    importantMat <- [r| mat_hs[,-findCorrelation(cor_hs, cutoff = 0.6, exact = FALSE)] |]

    return . RMatObsRow $ importantMat

-- | Conduct PCA on a matrix, using components > 5% of variance.
pcaRMat :: RMatObsRow s -> R s (RMatObsRow s)
pcaRMat (RMatObsRow mat) = do
    fmap
        RMatObsRow
        [r| mat = prcomp(t(mat_hs), tol = 0.95)$x
        |]

-- | Conduct PCA on a SingleCells, taking the first principal components.
pcaDenseSc :: DropDimensionFlag -> PCADim -> SingleCells -> SingleCells
pcaDenseSc df p@(PCADim n) sc =
  L.set colNames (V.fromList . fmap (\x -> Feature $ "PCA " <> showt x) $ [begin..end])
    . L.over matrix (pcaDenseMat df p)
    $ sc
  where
    begin = bool 1 (n + 1) . unDropDimensionFlag $ df
    end   = bool n ((S.ncols . unMatObsRow . L.view matrix $ sc) - n + 1)
          . unDropDimensionFlag
          $ df

-- | Conduct PCA on a SingleCells, taking the first principal components.
pcaSparseSc :: DropDimensionFlag -> PCADim -> SingleCells -> SingleCells
pcaSparseSc df p@(PCADim n) sc =
  L.set colNames (V.fromList . fmap (\x -> Feature $ "PCA " <> showt x) $ [begin..end])
    . L.over matrix (pcaSparseMat df p)
    $ sc
  where
    begin = bool 1 (n + 1) . unDropDimensionFlag $ df
    end   = bool n ((S.ncols . unMatObsRow . L.view matrix $ sc) - n + 1)
          . unDropDimensionFlag
          $ df

-- | Conduct LSA on a SingleCells, taking the first singular values.
lsaSparseSc :: DropDimensionFlag -> LSADim -> SingleCells -> SingleCells
lsaSparseSc df l@(LSADim n) sc =
  L.set colNames (V.fromList . fmap (\x -> Feature $ "LSA " <> showt x) $ [begin..end])
    . L.over matrix (lsaSparseMat df l)
    $ sc
  where
    begin = bool 1 (n + 1) . unDropDimensionFlag $ df
    end   = bool n ((S.ncols . unMatObsRow . L.view matrix $ sc) - n + 1)
          . unDropDimensionFlag
          $ df

-- | Conduct SVD on a SingleCells, taking the first singular values.
svdSparseSc :: DropDimensionFlag -> SVDDim -> SingleCells -> SingleCells
svdSparseSc df s@(SVDDim n) sc =
  L.set colNames (V.fromList . fmap (\x -> Feature $ "SVD " <> showt x) $ [begin..end])
    . L.over matrix (svdSparseMat df s)
    $ sc
  where
    begin = bool 1 (n + 1) . unDropDimensionFlag $ df
    end   = bool n ((S.ncols . unMatObsRow . L.view matrix $ sc) - n + 1)
          . unDropDimensionFlag
          $ df

-- | Obtain the right singular vectors from N to E on of a sparse
-- matrix.
sparseRightSVD :: Int -> Int -> S.SpMatrix Double -> [S.SpVector Double]
sparseRightSVD n e =
  fmap (S.sparsifySV . S.fromListDenseSV e . drop (n - 1) . H.toList)
    . H.toColumns
    . (\(_, _, !z) -> z)
    . SVD.sparseSvd (e + (n - 1))
    . H.mkCSR
    . fmap (\(!i, !j, !x) -> ((i, j), x))
    . S.toListSM

-- | Conduct PCA on a matrix, retaining the specified number of dimensions.
pcaDenseMat :: DropDimensionFlag -> PCADim -> MatObsRow -> MatObsRow
pcaDenseMat df (PCADim pcaDim) (MatObsRow matObs) =
  MatObsRow
    . hToSparseMat
    . H.takeColumns pcaDim
    . H.mul mat
    . (\(u, _, _) -> u)
    . H.svd
    . H.unSym
    . snd
    . H.meanCov
    $ mat
  where
    mat = sparseToHMat matObs
    begin = bool 1 (pcaDim + 1) . unDropDimensionFlag $ df
    end   = bool pcaDim (S.ncols matObs - pcaDim)
          . unDropDimensionFlag
          $ df

-- | Conduct SVD on a matrix, retaining the specified number of dimensions.
pcaSparseMat :: DropDimensionFlag -> PCADim -> MatObsRow -> MatObsRow
pcaSparseMat df (PCADim pcaDim) (MatObsRow mat) =
  MatObsRow
    . (S.#~#) scaled
    . S.fromRowsL
    . sparseRightSVD begin end
    $ scaled
  where
    scaled = S.fromColsL
           . fmap centerScaleSparseCell
           . S.toRowsL -- Scale features for PCA
           . S.transpose
           $ mat
    begin = bool 1 (pcaDim + 1) . unDropDimensionFlag $ df
    end   = bool pcaDim (S.ncols mat - pcaDim) . unDropDimensionFlag $ df

-- | Conduct LSA on a matrix, retaining the specified number of dimensions.
lsaSparseMat :: DropDimensionFlag -> LSADim -> MatObsRow -> MatObsRow
lsaSparseMat df (LSADim lsaDim) (MatObsRow mat) =
  MatObsRow
    . S.fromRowsL
    . S.secondLeft begin end
    . unB2
    . b1ToB2
    . B1
    $ mat
  where
    begin = bool 1 (lsaDim + 1) . unDropDimensionFlag $ df
    end   = bool lsaDim (S.ncols mat - lsaDim) . unDropDimensionFlag $ df

-- | Conduct SVD on a matrix, retaining the specified number of dimensions.
svdSparseMat :: DropDimensionFlag -> SVDDim -> MatObsRow -> MatObsRow
svdSparseMat df (SVDDim svdDim) (MatObsRow mat) =
  MatObsRow
    . S.fromRowsL
    . S.secondLeft begin end
    . S.fromRowsL
    . fmap centerScaleSparseCell
    . S.toRowsL
    $ mat
  where
    begin = bool 1 (svdDim + 1) . unDropDimensionFlag $ df
    end   = bool svdDim (S.ncols mat - svdDim) . unDropDimensionFlag $ df


-- | Shift features to positive values.
shiftPositiveMat :: MatObsRow -> MatObsRow
shiftPositiveMat = MatObsRow
              . S.fromColsL
              . fmap (\ xs -> bool xs (shift . S.toDenseListSV $ xs)
                            . any (< 0)
                            . S.toDenseListSV
                            $ xs
                     )
              . S.toRowsL -- toRowsL much faster than toColsL
              . S.transpose
              . unMatObsRow
  where
    shift xs = S.sparsifySV . S.vr . fmap (+ (abs $ minimum xs)) $ xs

-- | Shift features to positive values for SingleCells.
shiftPositiveSc :: SingleCells -> SingleCells
shiftPositiveSc = L.over matrix shiftPositiveMat

-- | Optionally give the filepath as a label to the rows.
labelRows :: Maybe CustomLabel -> SingleCells -> LabelMat
labelRows Nothing sc = LabelMat (sc, Nothing)
labelRows (Just (CustomLabel l)) sc =
  LabelMat (L.set rowNames newRowNames sc, Just labelMap)
  where
    labelMap = LabelMap
             . Map.fromList
             . flip zip (repeat (Label l))
             . fmap (Id . unCell)
             . V.toList
             $ newRowNames
    newRowNames = fmap labelRow . L.view rowNames $ sc
    labelRow (Cell x) = Cell $ x <> "-" <> l

-- | Optionally give the filepath as a label to the columns when transposing
-- matrix.
labelCols :: Maybe CustomLabel -> SingleCells -> LabelMat
labelCols Nothing sc = LabelMat (sc, Nothing)
labelCols (Just (CustomLabel l)) sc =
  LabelMat (L.set colNames newColNames sc, Just labelMap)
  where
    labelMap = LabelMap
             . Map.fromList
             . flip zip (repeat (Label l))
             . fmap (Id . unFeature)
             . V.toList
             $ newColNames
    newColNames = fmap labelCol . L.view colNames $ sc
    labelCol (Feature x) = Feature $ x <> "-" <> l

-- | Add or remove a single character to speed up joining if region data is
-- binned across rows before transposition.
fastBinJoinRows :: Maybe BinWidth -> Bool -> SingleCells -> SingleCells
fastBinJoinRows binWidth joining =
  bool id (L.over rowNames (fmap (Cell . j . unCell))) $ isJust binWidth
    where
      j = if joining then T.cons 'B' else T.drop 1

-- | Add or remove a single character to speed up joining if region data is
-- binned across columns.
fastBinJoinCols :: Maybe BinWidth -> Bool -> SingleCells -> SingleCells
fastBinJoinCols binWidth joining =
  bool id (L.over colNames (fmap (Feature . j . unFeature)))
    $ isJust binWidth
    where
      j = if joining then T.cons 'B' else T.drop 1

-- | Transform a list of chromosome region features to a list of custom features.
transformChrRegions :: CustomRegions -> SingleCells -> SingleCells
transformChrRegions (CustomRegions customRegions) sc =
  L.set matrix mat . L.set colNames features $ sc
  where
    features = V.fromList . fmap (Feature . showt) $ customRegions
    knownFeatureMap = foldl' (HMap.unionWith IntervalMap.union) HMap.empty
                    . fmap (\ (!i, (!c, !interval))
                           -> HMap.singleton c
                            $ IntervalMap.singleton interval i
                           )
                    . zip ([0..] :: [Int])
                    . fmap unChrRegion
                    $ customRegions
    findErrKnown :: T.Text -> [Int]
    findErrKnown x = fromMaybe []
                   . (\ (!c, !i) -> HMap.lookup c knownFeatureMap
                                >>= Just
                                  . IntervalMap.elems
                                  . flip IntervalMap.intersecting i
                     )
                   . either error unChrRegion
                   . parseChrRegion
                   $ x
    mat = MatObsRow
        . foldl' addToMat init
        . concatMap (\ (!i, !j, !v)
                    -> fmap (i, , v)
                     . findErrKnown
                     . unFeature
                     . fromMaybe (error $ "transformChrRegions/mat: No indices found: " <> show j)
                     $ (V.!?) (L.view colNames sc) j
                    )
        . S.toListSM
        $ oldMat
    init = S.zeroSM (S.nrows oldMat) $ V.length features
    addToMat !m x = m S.^+^ S.fromListSM (S.dim init) [x]
    oldMat = unMatObsRow . L.view matrix $ sc