{- birch-beer Gregory W. Schwartz Displays a hierarchical tree of clusters with colors, scaling, and more. -} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeOperators #-} module Main where -- Remote import Control.Monad (when) import Data.Char (ord) import Data.Colour.SRGB (sRGB24read) import Data.List (isSuffixOf) import Data.Maybe (fromMaybe) import Math.Clustering.Hierarchical.Spectral.Load (readSparseAdjMatrix) import Options.Generic import System.IO (openFile, hClose, IOMode (..)) import Text.Read (readMaybe, readEither) import TextShow (showt) import qualified Control.Lens as L import qualified Data.Aeson as A import qualified Data.ByteString.Lazy.Char8 as B import qualified Data.Csv as CSV import qualified Data.Set as Set import qualified Data.Sparse.Common as S import qualified Data.Text as T import qualified Data.Vector as V import qualified Diagrams.Backend.Cairo as D import qualified Diagrams.Prelude as D -- Local import BirchBeer.ColorMap import BirchBeer.Interactive import BirchBeer.LeafGraph import BirchBeer.Load import BirchBeer.MainDiagram import BirchBeer.Types import BirchBeer.Utility -- | Command line arguments data Options = Options { input :: String "(FILE) The input JSON file." , inputMatrix :: Maybe String "([Nothing] | FILE) The input adjacency matrix file for CollectionGraph (matrix market format if ends in .mtx, \"i,j,value\" without header otherwise and text labels will be sorted when converting indices)." , output :: Maybe String "([dendrogram.svg] | FILE) The filename for the dendrogram. Supported formats are PNG, PS, PDF, and SVG." , jsonOutput :: Maybe String "([Nothing] | FILE) The filename for the output json tree. The input tree can change based on pruning, so this option provides a way to output the new tree as a json." , delimiter :: Maybe Char "([,] | CHAR) The delimiter for csv files." , labelsFile :: Maybe String "([Nothing] | FILE) The input file containing the label for each item, with \"item,label\" header." , minSize :: Maybe Int "([1] | INT) The minimum size of a cluster. Defaults to 1." , maxStep :: Maybe Int "([Nothing] | INT) Only keep clusters that are INT steps from the root. Defaults to all steps." , maxProportion :: Maybe Double "([Nothing] | DOUBLE) Stopping criteria to stop at the node immediate after a node with DOUBLE proportion split. So a node N with L and R children will stop with this criteria at 0.5 if |L| / |R| < 0.5 or > 2 (absolute log2 transformed), that is, if one child has over twice as many items as the other child. Includes L and R in the final result." , minDistance :: Maybe Double "([Nothing] | DOUBLE) Stopping criteria to stop at the node immediate after a node with DOUBLE distance. So a node N with L and R children will stop with this criteria the distance at N to L and R is < DOUBLE. Includes L and R in the final result." , minDistanceSearch :: Maybe Double "([Nothing] | DOUBLE) Similar to --min-distance, but searches from the leaves to the root -- if a path from a subtree contains a distance of at least DOUBLE, keep that path, otherwise prune it. This argument assists in finding distant nodes." , smartCutoff :: Maybe Double "([Nothing] | DOUBLE) Whether to set the cutoffs for --min-size, --max-proportion, --min-distance, and --min-distance-search based off of the distributions (median + (DOUBLE * MAD)) of all nodes. To use smart cutoffs, use this argument and then set one of the three arguments to an arbitrary number, whichever cutoff type you want to use. --min-size distribution is log2 transformed." , customCut :: [Int] "([Nothing] | NODE) List of nodes to prune (make these nodes leaves). Invoked by --custom-cut 34 --custom-cut 65 etc." , order :: Maybe Double "([1] | DOUBLE) The order of diversity for DrawItem DrawDiversity." , drawLeaf :: Maybe String "([DrawText] | DrawItem DrawItemType) How to draw leaves in the dendrogram. DrawText is the number of items in that leaf. DrawItem is the collection of items represented by circles, consisting of: DrawItem DrawLabel, where each item is colored by its label, DrawItem (DrawContinuous [FEATURE]), where each item is colored by the expression of FEATURE (corresponding to a feature name in the input matrix, [FEATURE] is a list, so if more than one FEATURE is listed, uses the average of the feature values), DrawItem (DrawThresholdContinuous [(FEATURE, DOUBLE)]), where each item is colored by the binary high / low expression of FEATURE based on DOUBLE and multiple FEATUREs can be used to combinatorically label items (FEATURE1 high / FEATURE2 low, etc.), DrawItem DrawSumContinuous, where each item is colored by the sum of the post-normalized columns (use --normalization NoneNorm for UMI counts, default), and DrawItem DrawDiversity, where each node is colored by the diversity based on the labels of each item and the color is normalized separately for the leaves and the inner nodes. The default is DrawText, unless --labels-file is provided, in which DrawItem DrawLabel is the default. If the label or feature cannot be found, the default color will be black (check your spelling!)." , drawCollection :: Maybe String "([PieChart] | PieRing | PieNone | CollectionGraph MAXWEIGHT THRESHOLD [NODE]) How to draw item leaves in the dendrogram. PieRing draws a pie chart ring around the items. PieChart only draws a pie chart instead of items. PieNone only draws items, no pie rings or charts. (CollectionGraph MAXWEIGHT THRESHOLD [NODE]) draws the nodes and edges within leaves that are descendents of NODE (empty list [] indicates draw all leaf networks) based on the input matrix, normalizes edges based on the MAXWEIGHT, and removes edges for display less than THRESHOLD (after normalization, so for CollectionGraph 2 0.5 [26], draw the leaf graphs for all leaves under node 26, then a edge of 0.7 would be removed because (0.7 / 2) < 0.5). For CollectionGraph with no colors, use --draw-leaf \"DrawItem DrawLabel\" and all nodes will be black. If you don't specify this option, DrawText from --draw-leaf overrides this argument and only the number of cells will be plotted." , drawMark :: Maybe String "([MarkNone] | MarkModularity) How to draw annotations around each inner node in the tree. MarkNone draws nothing and MarkModularity draws a black circle representing the modularity at that node, darker black means higher modularity for that next split." , drawNodeNumber :: Bool "Draw the node numbers on top of each node in the graph." , drawMaxNodeSize :: Maybe Double "([72] | DOUBLE) The max node size when drawing the graph. 36 is the theoretical default, but here 72 makes for thicker branches." , drawMaxLeafNodeSize :: Maybe Double "([--draw-max-node-size] | DOUBLE) The max leaf node size when drawing the graph. Defaults to the value of --draw-max-node-size." , drawNoScaleNodes :: Bool "Do not scale inner node size when drawing the graph. Instead, uses draw-max-node-size as the size of each node and is highly recommended to change as the default may be too large for this option." , drawLegendSep :: Maybe Double "([1] | DOUBLE) The amount of space between the legend and the tree." , drawLegendAllLabels :: Bool "Whether to show all the labels in the label file instead of only showing labels within the current tree. The program generates colors from all labels in the label file first in order to keep consistent colors. By default, this value is false, meaning that only the labels present in the tree are shown (even though the colors are the same). The subset process occurs after --draw-colors, so when using that argument make sure to account for all labels." , drawPalette :: Maybe String "([Set1] | Hsv | Ryb) Palette to use for legend colors. With high saturation in --draw-scale-saturation, consider using Hsv to better differentiate colors." , drawColors :: Maybe String "([Nothing] | COLORS) Custom colors for the labels or continuous features. Will repeat if more labels than provided colors. For continuous feature plots, uses first two colors [high, low], defaults to [red, gray]. For instance: --draw-colors \"[\\\"#e41a1c\\\", \\\"#377eb8\\\"]\"" , drawDiscretize :: Maybe String "([Nothing] | COLORS | INT) Discretize colors by finding the nearest color for each item and node. For instance, --draw-discretize \"[\\\"#e41a1c\\\", \\\"#377eb8\\\"]\" will change all node and item colors to one of those two colors, based on Euclidean distance. If using \"--draw-discretize INT\", will instead take the default map and segment (or interpolate) it into INT colors, rather than a more continuous color scheme. May have unintended results when used with --draw-scale-saturation." , drawScaleSaturation :: Maybe Double "([Nothing] | DOUBLE) Multiply the saturation value all nodes by this number in the HSV model. Useful for seeing more visibly the continuous colors by making the colors deeper against a gray scale." , interactive :: Bool "Display interactive tree." } deriving (Generic) modifiers :: Modifiers modifiers = lispCaseModifiers { shortNameModifier = short } where short "customCut" = Nothing short "inputMatrix" = Just 'X' short "minSize" = Just 'M' short "maxStep" = Just 'S' short "maxProportion" = Just 'P' short "minDistance" = Just 'T' short "minDistanceSearch" = Nothing short "drawLeaf" = Just 'L' short "drawCollection" = Just 'D' short "drawDiscretize" = Nothing short "drawNodeNumber" = Just 'N' short "drawMark" = Just 'K' short "drawColors" = Just 'R' short "drawNoScaleNodes" = Just 'W' short "drawMaxNodeSize" = Just 'A' short "drawMaxLeafNodeSize" = Nothing short "drawLegendSep" = Just 'Q' short "drawLegendAllLabels" = Just 'J' short "drawPalette" = Just 'Y' short "drawScaleSaturation" = Just 'V' short "order" = Just 'O' short "interactive" = Just 'I' short x = firstLetter x instance ParseRecord Options where parseRecord = parseRecordWithModifiers modifiers main :: IO () main = do opts <- getRecord "birch-beer, Gregory W. Schwartz.\ \ Displays a hierarchical tree of clusters with colors,\ \ scaling, and more." let input' = unHelpful . input $ opts inputMatrix' = unHelpful . inputMatrix $ opts delimiter' = Delimiter . fromMaybe ',' . unHelpful . delimiter $ opts labelsFile' = fmap LabelFile . unHelpful . labelsFile $ opts minSize' = fmap MinClusterSize . unHelpful . minSize $ opts maxStep' = fmap MaxStep . unHelpful . maxStep $ opts maxProportion' = fmap MaxProportion . unHelpful . maxProportion $ opts minDistance' = fmap MinDistance . unHelpful . minDistance $ opts minDistanceSearch' = fmap MinDistanceSearch . unHelpful . minDistanceSearch $ opts smartCutoff' = fmap SmartCutoff . unHelpful . smartCutoff $ opts customCut' = CustomCut . Set.fromList . unHelpful . customCut $ opts order' = fmap Order . unHelpful . order $ opts drawLeaf' = maybe (maybe DrawText (const (DrawItem DrawLabel)) labelsFile') (fromMaybe (error "Cannot read draw-leaf.") . readMaybe) . unHelpful . drawLeaf $ opts drawCollection' = maybe PieChart (fromMaybe (error "Cannot read draw-collection.") . readMaybe) . unHelpful . drawCollection $ opts drawMark' = maybe MarkNone (fromMaybe (error "Cannot read draw-mark.") . readMaybe) . unHelpful . drawMark $ opts drawNodeNumber' = DrawNodeNumber . unHelpful . drawNodeNumber $ opts drawMaxNodeSize' = DrawMaxNodeSize . fromMaybe 72 . unHelpful . drawMaxNodeSize $ opts drawMaxLeafNodeSize' = DrawMaxLeafNodeSize . fromMaybe (unDrawMaxNodeSize drawMaxNodeSize') . unHelpful . drawMaxLeafNodeSize $ opts drawNoScaleNodes' = DrawNoScaleNodesFlag . unHelpful . drawNoScaleNodes $ opts drawLegendSep' = DrawLegendSep . fromMaybe 1 . unHelpful . drawLegendSep $ opts drawLegendAllLabels' = DrawLegendAllLabels . unHelpful . drawLegendAllLabels $ opts drawPalette' = maybe Set1 (fromMaybe (error "Cannot read palette") . readMaybe) . unHelpful . drawPalette $ opts drawColors' = fmap ( CustomColors . fmap sRGB24read . (\x -> read x :: [String]) ) . unHelpful . drawColors $ opts drawDiscretize' = (=<<) (\x -> either error Just . either (\ err -> either (\y -> Left $ finalError err y) (Right . SegmentColorMap) (readEither x :: Either String Int) ) (Right . CustomColorMap . fmap sRGB24read) $ (readEither x :: Either String [String]) ) . unHelpful . drawDiscretize $ opts where finalError err x = "Error in draw-discretize: " <> err <> " " <> x drawScaleSaturation' = fmap DrawScaleSaturation . unHelpful . drawScaleSaturation $ opts output' = fromMaybe "dendrogram.svg" . unHelpful . output $ opts tree <- loadTreeOrDendFromFile input' -- Get the label map from either a file or from expression thresholds. labelMap <- case drawLeaf' of (DrawItem (DrawThresholdContinuous gs)) -> error "Threshold not supported here." -- fmap -- ( Just -- . getLabelMapThresholdContinuous -- (fmap (L.over L._1 Feature) gs) -- . fromMaybe (error "Requires matrix.") -- ) -- mat _ -> sequence . fmap (loadLabelData delimiter') $ labelsFile' let readMat file = if isSuffixOf ".mtx" file then do mat <- loadMatrix file let items = V.fromList . fmap showt $ [1..S.nrows mat] return . SimilarityMatrix $ NamedMatrix mat items items else do let decodeOpt = CSV.defaultDecodeOptions { CSV.decDelimiter = fromIntegral (ord . unDelimiter $ delimiter') } h <- openFile file ReadMode (items, mat) <- readSparseAdjMatrix decodeOpt h hClose h return . SimilarityMatrix $ NamedMatrix mat items items simMat <- sequence . fmap readMat $ inputMatrix' let config :: Config T.Text NamedMatrix config = Config { _birchLabelMap = labelMap , _birchMinSize = minSize' , _birchMaxStep = maxStep' , _birchMaxProportion = maxProportion' , _birchMinDistance = minDistance' , _birchMinDistanceSearch = minDistanceSearch' , _birchSmartCutoff = smartCutoff' , _birchCustomCut = customCut' , _birchOrder = order' , _birchDrawLeaf = drawLeaf' , _birchDrawCollection = drawCollection' , _birchDrawMark = drawMark' , _birchDrawNodeNumber = drawNodeNumber' , _birchDrawMaxNodeSize = drawMaxNodeSize' , _birchDrawMaxLeafNodeSize = drawMaxLeafNodeSize' , _birchDrawNoScaleNodes = drawNoScaleNodes' , _birchDrawLegendSep = drawLegendSep' , _birchDrawLegendAllLabels = drawLegendAllLabels' , _birchDrawPalette = drawPalette' , _birchDrawColors = drawColors' , _birchDrawDiscretize = drawDiscretize' , _birchDrawScaleSaturation = drawScaleSaturation' , _birchTree = tree , _birchMat = Nothing , _birchSimMat = simMat } (plot, _, _, _, tree', _) <- mainDiagram config -- Plot tree. D.renderCairo output' (D.mkHeight 1000) plot -- Write new tree if necessary. mapM_ (\x -> B.writeFile x . A.encode $ tree') . unHelpful . jsonOutput $ opts when (unHelpful . interactive $ opts) $ interactiveDiagram tree labelMap (Nothing :: Maybe NamedMatrix) simMat return ()