{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module DataFrame.Display.Web.Plot where import Control.Monad import Data.Char import qualified Data.List as L import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Text.IO as T import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl)) import Data.Typeable (Typeable) import qualified Data.Vector as V import qualified Data.Vector.Generic as VG import qualified Data.Vector.Unboxed as VU import GHC.Stack (HasCallStack) import System.Random (newStdGen, randomRs) import Type.Reflection (typeRep) import DataFrame.Internal.Column (Column (..), isNumeric) import qualified DataFrame.Internal.Column as D import DataFrame.Internal.DataFrame (DataFrame (..), getColumn) import DataFrame.Operations.Core import qualified DataFrame.Operations.Subset as D import System.Directory import System.Info import System.Process ( StdStream (NoStream), createProcess, proc, std_err, std_in, std_out, waitForProcess, ) newtype HtmlPlot = HtmlPlot T.Text deriving (Show) data PlotConfig = PlotConfig { plotType :: PlotType , plotTitle :: T.Text , plotWidth :: Int , plotHeight :: Int , plotFile :: Maybe FilePath } data PlotType = Histogram | Scatter | Line | Bar | BoxPlot | Pie | StackedBar | Heatmap deriving (Eq, Show) defaultPlotConfig :: PlotType -> PlotConfig defaultPlotConfig ptype = PlotConfig { plotType = ptype , plotTitle = "" , plotWidth = 600 , plotHeight = 400 , plotFile = Nothing } chartJsScript :: T.Text chartJsScript = "\n" generateChartId :: IO T.Text generateChartId = do gen <- newStdGen let randomWords = filter (\c -> c `elem` ([49 .. 57] ++ [65 .. 90] ++ [97 .. 122])) (take 64 (randomRs (49, 126) gen :: [Int])) return $ "chart_" <> T.pack (map chr randomWords) wrapInHTML :: T.Text -> T.Text -> Int -> Int -> T.Text wrapInHTML chartId content width height = T.concat [ "\n" , "\n" , "\n" ] plotHistogram :: (HasCallStack) => T.Text -> DataFrame -> IO HtmlPlot plotHistogram colName = plotHistogramWith colName (defaultPlotConfig Histogram) plotHistogramWith :: (HasCallStack) => T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotHistogramWith colName config df = do chartId <- generateChartId let values = extractNumericColumn colName df (minVal, maxVal) = if null values then (0, 1) else (minimum values, maximum values) numBins = 30 binWidth = (maxVal - minVal) / fromIntegral numBins bins = [minVal + fromIntegral i * binWidth | i <- [0 .. numBins - 1]] counts = calculateHistogram values bins binWidth labels = T.intercalate "," ["\"" <> T.pack (show (round b :: Int)) <> "\"" | b <- bins] dataPoints = T.intercalate "," [T.pack (show c) | c <- counts] chartTitle = if T.null (plotTitle config) then "Histogram of " <> colName else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " label: \"" , colName , "\",\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: \"rgba(75, 192, 192, 0.6)\",\n" , " borderColor: \"rgba(75, 192, 192, 1)\",\n" , " borderWidth: 1\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " yAxes: [{ ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) calculateHistogram :: [Double] -> [Double] -> Double -> [Int] calculateHistogram values bins binWidth = let countBin b = length [v | v <- values, v >= b && v < b + binWidth] in map countBin bins plotScatter :: (HasCallStack) => T.Text -> T.Text -> DataFrame -> IO HtmlPlot plotScatter xCol yCol = plotScatterWith xCol yCol (defaultPlotConfig Scatter) plotScatterWith :: (HasCallStack) => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotScatterWith xCol yCol config df = do chartId <- generateChartId let xVals = extractNumericColumn xCol df yVals = extractNumericColumn yCol df points = zip xVals yVals dataPoints = T.intercalate "," [ "{x:" <> T.pack (show x) <> ", y:" <> T.pack (show y) <> "}" | (x, y) <- points ] chartTitle = if T.null (plotTitle config) then xCol <> " vs " <> yCol else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"scatter\",\n" , " data: {\n" , " datasets: [{\n" , " label: \"" , chartTitle , "\",\n" , " data: [" , dataPoints , "],\n" , " pointRadius: 4,\n" , " pointBackgroundColor: \"rgb(75, 192, 192)\"\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " xAxes: [{ scaleLabel: { display: true, labelString: \"" , xCol , "\" } }],\n" , " yAxes: [{ scaleLabel: { display: true, labelString: \"" , yCol , "\" } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) plotScatterBy :: (HasCallStack) => T.Text -> T.Text -> T.Text -> DataFrame -> IO HtmlPlot plotScatterBy xCol yCol grouping = plotScatterByWith xCol yCol grouping (defaultPlotConfig Scatter) plotScatterByWith :: (HasCallStack) => T.Text -> T.Text -> T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotScatterByWith xCol yCol grouping config df = do chartId <- generateChartId let vals = extractStringColumn grouping df df' = insertColumn grouping (D.fromList vals) df uniqueVals = L.nub vals colors = cycle [ "rgb(255, 99, 132)" , "rgb(54, 162, 235)" , "rgb(255, 206, 86)" , "rgb(75, 192, 192)" , "rgb(153, 102, 255)" , "rgb(255, 159, 64)" ] datasets <- forM (zip uniqueVals colors) $ \(val, color) -> do let filtered = D.filter grouping (== val) df' xVals = extractNumericColumn xCol filtered yVals = extractNumericColumn yCol filtered points = zip xVals yVals dataPoints = T.intercalate "," [ "{x:" <> T.pack (show x) <> ", y:" <> T.pack (show y) <> "}" | (x, y) <- points ] return $ T.concat [ " {\n" , " label: \"" , val , "\",\n" , " data: [" , dataPoints , "],\n" , " pointRadius: 4,\n" , " pointBackgroundColor: \"" , color , "\"\n" , " }" ] let datasetsStr = T.intercalate ",\n" datasets chartTitle = if T.null (plotTitle config) then xCol <> " vs " <> yCol <> " by " <> grouping else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"scatter\",\n" , " data: {\n" , " datasets: [\n" , datasetsStr , "\n ]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " xAxes: [{ scaleLabel: { display: true, labelString: \"" , xCol , "\" } }],\n" , " yAxes: [{ scaleLabel: { display: true, labelString: \"" , yCol , "\" } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) plotLines :: (HasCallStack) => T.Text -> [T.Text] -> DataFrame -> IO HtmlPlot plotLines xAxis colNames = plotLinesWith xAxis colNames (defaultPlotConfig Line) plotLinesWith :: (HasCallStack) => T.Text -> [T.Text] -> PlotConfig -> DataFrame -> IO HtmlPlot plotLinesWith xAxis colNames config df = do chartId <- generateChartId let xValues = extractNumericColumn xAxis df labels = T.intercalate "," [T.pack (show x) | x <- xValues] colors = cycle [ "rgb(255, 99, 132)" , "rgb(54, 162, 235)" , "rgb(255, 206, 86)" , "rgb(75, 192, 192)" , "rgb(153, 102, 255)" , "rgb(255, 159, 64)" ] datasets <- forM (zip colNames colors) $ \(col, color) -> do let values = extractNumericColumn col df dataPoints = T.intercalate "," [T.pack (show v) | v <- values] return $ T.concat [ " {\n" , " label: \"" , col , "\",\n" , " data: [" , dataPoints , "],\n" , " fill: false,\n" , " borderColor: \"" , color , "\",\n" , " tension: 0.1\n" , " }" ] let datasetsStr = T.intercalate ",\n" datasets chartTitle = if T.null (plotTitle config) then "Line Chart" else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"line\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [\n" , datasetsStr , "\n ]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " xAxes: [{ scaleLabel: { display: true, labelString: \"" , xAxis , "\" } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) plotBars :: (HasCallStack) => T.Text -> DataFrame -> IO HtmlPlot plotBars colName = plotBarsWith colName Nothing (defaultPlotConfig Bar) plotBarsWith :: (HasCallStack) => T.Text -> Maybe T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotBarsWith colName groupByCol config df = case groupByCol of Nothing -> plotSingleBars colName config df Just grpCol -> plotGroupedBarsWith grpCol colName config df plotSingleBars :: (HasCallStack) => T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotSingleBars colName config df = do chartId <- generateChartId let barData = getCategoricalCounts colName df case barData of Just counts -> do let grouped = groupWithOther 10 counts labels = T.intercalate "," ["\"" <> label <> "\"" | (label, _) <- grouped] dataPoints = T.intercalate "," [T.pack (show val) | (_, val) <- grouped] chartTitle = if T.null (plotTitle config) then colName else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " label: \"Count\",\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: \"rgba(54, 162, 235, 0.6)\",\n" , " borderColor: \"rgba(54, 162, 235, 1)\",\n" , " borderWidth: 1\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " yAxes: [{ ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) Nothing -> do let values = extractNumericColumn colName df labels' = if length values > 20 then take 20 ["Item " <> T.pack (show i) | i <- [1 ..]] else ["Item " <> T.pack (show i) | i <- [1 .. length values]] vals = if length values > 20 then take 20 values else values labels = T.intercalate "," ["\"" <> label <> "\"" | label <- labels'] dataPoints = T.intercalate "," [T.pack (show val) | val <- vals] chartTitle = if T.null (plotTitle config) then colName else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " label: \"Value\",\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: \"rgba(54, 162, 235, 0.6)\",\n" , " borderColor: \"rgba(54, 162, 235, 1)\",\n" , " borderWidth: 1\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " yAxes: [{ ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) plotPie :: (HasCallStack) => T.Text -> Maybe T.Text -> DataFrame -> IO HtmlPlot plotPie valCol labelCol = plotPieWith valCol labelCol (defaultPlotConfig Pie) plotPieWith :: (HasCallStack) => T.Text -> Maybe T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotPieWith valCol labelCol config df = do chartId <- generateChartId let categoricalData = getCategoricalCounts valCol df case categoricalData of Just counts -> do let grouped = groupWithOtherForPie 8 counts labels = T.intercalate "," ["\"" <> label <> "\"" | (label, _) <- grouped] dataPoints = T.intercalate "," [T.pack (show val) | (_, val) <- grouped] colors = T.intercalate "," ["\"" <> c <> "\"" | c <- take (length grouped) pieColors] chartTitle = if T.null (plotTitle config) then valCol else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"pie\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: [" , colors , "]\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) Nothing -> do let values = extractNumericColumn valCol df labels' = case labelCol of Nothing -> map (\i -> "Item " <> T.pack (show i)) [1 .. length values] Just lCol -> extractStringColumn lCol df pieData = zip labels' values grouped = if length pieData > 10 then groupWithOtherForPie 8 pieData else pieData labels = T.intercalate "," ["\"" <> label <> "\"" | (label, _) <- grouped] dataPoints = T.intercalate "," [T.pack (show val) | (_, val) <- grouped] colors = T.intercalate "," ["\"" <> c <> "\"" | c <- take (length grouped) pieColors] chartTitle = if T.null (plotTitle config) then valCol else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"pie\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: [" , colors , "]\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) pieColors :: [T.Text] pieColors = [ "rgb(255, 99, 132)" , "rgb(54, 162, 235)" , "rgb(255, 206, 86)" , "rgb(75, 192, 192)" , "rgb(153, 102, 255)" , "rgb(255, 159, 64)" , "rgb(201, 203, 207)" , "rgb(255, 99, 71)" , "rgb(60, 179, 113)" , "rgb(238, 130, 238)" ] plotStackedBars :: (HasCallStack) => T.Text -> [T.Text] -> DataFrame -> IO HtmlPlot plotStackedBars categoryCol valueColumns = plotStackedBarsWith categoryCol valueColumns (defaultPlotConfig StackedBar) plotStackedBarsWith :: (HasCallStack) => T.Text -> [T.Text] -> PlotConfig -> DataFrame -> IO HtmlPlot plotStackedBarsWith categoryCol valueColumns config df = do chartId <- generateChartId let categories = extractStringColumn categoryCol df uniqueCategories = L.nub categories colors = cycle [ "rgb(255, 99, 132)" , "rgb(54, 162, 235)" , "rgb(255, 206, 86)" , "rgb(75, 192, 192)" , "rgb(153, 102, 255)" , "rgb(255, 159, 64)" ] datasets <- forM (zip valueColumns colors) $ \(col, color) -> do dataVals <- forM uniqueCategories $ \cat -> do let indices = [i | (i, c) <- zip [0 ..] categories, c == cat] allValues = extractNumericColumn col df values = [allValues !! i | i <- indices, i < length allValues] return $ sum values let dataPoints = T.intercalate "," [T.pack (show v) | v <- dataVals] return $ T.concat [ " {\n" , " label: \"" , col , "\",\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: \"" , color , "\"\n" , " }" ] let datasetsStr = T.intercalate ",\n" datasets labels = T.intercalate "," ["\"" <> cat <> "\"" | cat <- uniqueCategories] chartTitle = if T.null (plotTitle config) then "Stacked Bar Chart" else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [\n" , datasetsStr , "\n ]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " xAxes: [{ stacked: true }],\n" , " yAxes: [{ stacked: true, ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) plotBoxPlots :: (HasCallStack) => [T.Text] -> DataFrame -> IO HtmlPlot plotBoxPlots colNames = plotBoxPlotsWith colNames (defaultPlotConfig BoxPlot) plotBoxPlotsWith :: (HasCallStack) => [T.Text] -> PlotConfig -> DataFrame -> IO HtmlPlot plotBoxPlotsWith colNames config df = do chartId <- generateChartId boxData <- forM colNames $ \col -> do let values = extractNumericColumn col df sorted = L.sort values n = length values q1 = sorted !! (n `div` 4) median = sorted !! (n `div` 2) q3 = sorted !! (3 * n `div` 4) minVal = minimum values maxVal = maximum values return (col, minVal, q1, median, q3, maxVal) let labels = T.intercalate "," ["\"" <> col <> "\"" | (col, _, _, _, _, _) <- boxData] medians = T.intercalate "," [T.pack (show med) | (_, _, _, med, _, _) <- boxData] chartTitle = if T.null (plotTitle config) then "Box Plot" else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " label: \"Median\",\n" , " data: [" , medians , "],\n" , " backgroundColor: \"rgba(75, 192, 192, 0.6)\",\n" , " borderColor: \"rgba(75, 192, 192, 1)\",\n" , " borderWidth: 1\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , " (showing medians)\" },\n" , " scales: {\n" , " yAxes: [{ ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) plotGroupedBarsWith :: (HasCallStack) => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotGroupedBarsWith = plotGroupedBarsWithN 10 plotGroupedBarsWithN :: (HasCallStack) => Int -> T.Text -> T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotGroupedBarsWithN n groupCol valCol config df = do chartId <- generateChartId let colIsNumeric = isNumericColumnCheck valCol df if colIsNumeric then do let groups = extractStringColumn groupCol df values = extractNumericColumn valCol df m = M.fromListWith (+) (zip groups values) grouped = map (\v -> (v, m M.! v)) groups labels = T.intercalate "," ["\"" <> label <> "\"" | (label, _) <- grouped] dataPoints = T.intercalate "," [T.pack (show val) | (_, val) <- grouped] chartTitle = if T.null (plotTitle config) then groupCol <> " by " <> valCol else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " label: \"" , valCol , "\",\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: \"rgba(54, 162, 235, 0.6)\",\n" , " borderColor: \"rgba(54, 162, 235, 1)\",\n" , " borderWidth: 1\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " yAxes: [{ ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) else do let groups = extractStringColumn groupCol df vals = extractStringColumn valCol df pairs = zip groups vals counts = M.toList $ M.fromListWith (+) [(g <> " - " <> v, 1) | (g, v) <- pairs] finalCounts = groupWithOther n [(k, fromIntegral v) | (k, v) <- counts] labels = T.intercalate "," ["\"" <> label <> "\"" | (label, _) <- finalCounts] dataPoints = T.intercalate "," [T.pack (show val) | (_, val) <- finalCounts] chartTitle = if T.null (plotTitle config) then groupCol <> " by " <> valCol else plotTitle config jsCode = T.concat [ "new Chart(\"" , chartId , "\", {\n" , " type: \"bar\",\n" , " data: {\n" , " labels: [" , labels , "],\n" , " datasets: [{\n" , " label: \"Count\",\n" , " data: [" , dataPoints , "],\n" , " backgroundColor: \"rgba(54, 162, 235, 0.6)\",\n" , " borderColor: \"rgba(54, 162, 235, 1)\",\n" , " borderWidth: 1\n" , " }]\n" , " },\n" , " options: {\n" , " title: { display: true, text: \"" , chartTitle , "\" },\n" , " scales: {\n" , " yAxes: [{ ticks: { beginAtZero: true } }]\n" , " }\n" , " }\n" , "});" ] return $ HtmlPlot $ wrapInHTML chartId jsCode (plotWidth config) (plotHeight config) -- TODO: Move these helpers to a common module. isNumericColumn :: DataFrame -> T.Text -> Bool isNumericColumn df colName = maybe False isNumeric (getColumn colName df) isNumericColumnCheck :: T.Text -> DataFrame -> Bool isNumericColumnCheck colName df = isNumericColumn df colName extractStringColumn :: (HasCallStack) => T.Text -> DataFrame -> [T.Text] extractStringColumn colName df = case M.lookup colName (columnIndices df) of Nothing -> error $ "Column " ++ T.unpack colName ++ " not found" Just idx -> let col = columns df V.! idx in case col of BoxedColumn (vec :: V.Vector a) -> case testEquality (typeRep @a) (typeRep @T.Text) of Just Refl -> V.toList vec Nothing -> V.toList $ V.map (T.pack . show) vec UnboxedColumn vec -> V.toList $ VG.map (T.pack . show) (VG.convert vec) OptionalColumn (vec :: V.Vector (Maybe a)) -> case testEquality (typeRep @a) (typeRep @T.Text) of Nothing -> V.toList $ V.map (T.pack . show) vec Just Refl -> V.toList $ V.map (maybe "Nothing" ("Just " <>)) vec extractNumericColumn :: (HasCallStack) => T.Text -> DataFrame -> [Double] extractNumericColumn colName df = case M.lookup colName (columnIndices df) of Nothing -> error $ "Column " ++ T.unpack colName ++ " not found" Just idx -> let col = columns df V.! idx in case col of BoxedColumn vec -> vectorToDoubles vec UnboxedColumn vec -> unboxedVectorToDoubles vec _ -> [] vectorToDoubles :: forall a. (Typeable a, Show a) => V.Vector a -> [Double] vectorToDoubles vec = case testEquality (typeRep @a) (typeRep @Double) of Just Refl -> V.toList vec Nothing -> case testEquality (typeRep @a) (typeRep @Int) of Just Refl -> V.toList $ V.map fromIntegral vec Nothing -> case testEquality (typeRep @a) (typeRep @Integer) of Just Refl -> V.toList $ V.map fromIntegral vec Nothing -> case testEquality (typeRep @a) (typeRep @Float) of Just Refl -> V.toList $ V.map realToFrac vec Nothing -> error $ "Column is not numeric (type: " ++ show (typeRep @a) ++ ")" unboxedVectorToDoubles :: forall a. (Typeable a, VU.Unbox a, Show a) => VU.Vector a -> [Double] unboxedVectorToDoubles vec = case testEquality (typeRep @a) (typeRep @Double) of Just Refl -> VU.toList vec Nothing -> case testEquality (typeRep @a) (typeRep @Int) of Just Refl -> VU.toList $ VU.map fromIntegral vec Nothing -> case testEquality (typeRep @a) (typeRep @Float) of Just Refl -> VU.toList $ VU.map realToFrac vec Nothing -> error $ "Column is not numeric (type: " ++ show (typeRep @a) ++ ")" getCategoricalCounts :: (HasCallStack) => T.Text -> DataFrame -> Maybe [(T.Text, Double)] getCategoricalCounts colName df = case M.lookup colName (columnIndices df) of Nothing -> error $ "Column " ++ T.unpack colName ++ " not found" Just idx -> let col = columns df V.! idx in case col of BoxedColumn (vec :: V.Vector a) -> let counts = countValues vec in case testEquality (typeRep @a) (typeRep @T.Text) of Nothing -> Just [(T.pack (show k), fromIntegral v) | (k, v) <- counts] Just Refl -> Just [(k, fromIntegral v) | (k, v) <- counts] UnboxedColumn vec -> let counts = countValuesUnboxed vec in Just [(T.pack (show k), fromIntegral v) | (k, v) <- counts] OptionalColumn (vec :: V.Vector (Maybe a)) -> let counts = countValues vec in case testEquality (typeRep @a) (typeRep @T.Text) of Nothing -> Just [((T.pack . show) k, fromIntegral v) | (k, v) <- counts] Just Refl -> Just [(maybe "Nothing" ("Just " <>) k, fromIntegral v) | (k, v) <- counts] where countValues :: (Ord a, Show a) => V.Vector a -> [(a, Int)] countValues vec = M.toList $ V.foldr' (\x acc -> M.insertWith (+) x 1 acc) M.empty vec countValuesUnboxed :: (Ord a, Show a, VU.Unbox a) => VU.Vector a -> [(a, Int)] countValuesUnboxed vec = M.toList $ VU.foldr' (\x acc -> M.insertWith (+) x 1 acc) M.empty vec groupWithOther :: Int -> [(T.Text, Double)] -> [(T.Text, Double)] groupWithOther n items = let sorted = L.sortOn (negate . snd) items (topN, rest) = splitAt n sorted otherSum = sum (map snd rest) result = if null rest || otherSum == 0 then topN else topN ++ [("Other (" <> T.pack (show (length rest)) <> " items)", otherSum)] in result groupWithOtherForPie :: Int -> [(T.Text, Double)] -> [(T.Text, Double)] groupWithOtherForPie n items = let total = sum (map snd items) sorted = L.sortOn (negate . snd) items (topN, rest) = splitAt n sorted otherSum = sum (map snd rest) otherPct = round (100 * otherSum / total) :: Int result = if null rest || otherSum == 0 then topN else topN ++ [ ( "Other (" <> T.pack (show (length rest)) <> " items, " <> T.pack (show otherPct) <> "%)" , otherSum ) ] in result plotBarsTopN :: (HasCallStack) => Int -> T.Text -> DataFrame -> IO HtmlPlot plotBarsTopN n colName = plotBarsTopNWith n colName (defaultPlotConfig Bar) plotBarsTopNWith :: (HasCallStack) => Int -> T.Text -> PlotConfig -> DataFrame -> IO HtmlPlot plotBarsTopNWith n colName config df = do let config' = config{plotTitle = plotTitle config <> " (Top " <> T.pack (show n) <> ")"} plotBarsWith colName Nothing config' df plotValueCounts :: (HasCallStack) => T.Text -> DataFrame -> IO HtmlPlot plotValueCounts colName = plotValueCountsWith colName 10 (defaultPlotConfig Bar) plotValueCountsWith :: (HasCallStack) => T.Text -> Int -> PlotConfig -> DataFrame -> IO HtmlPlot plotValueCountsWith colName maxBars config df = do let config' = config{plotTitle = "Value counts for " <> colName} plotBarsTopNWith maxBars colName config' df plotAllHistograms :: (HasCallStack) => DataFrame -> IO HtmlPlot plotAllHistograms df = do let numericCols = filter (isNumericColumn df) (columnNames df) xs <- forM numericCols $ \col -> do plotHistogram col df let allPlots = L.foldl' (\acc (HtmlPlot contents) -> acc <> "\n" <> contents) "" xs return (HtmlPlot allPlots) plotCategoricalSummary :: (HasCallStack) => DataFrame -> IO HtmlPlot plotCategoricalSummary df = do let cols = columnNames df xs <- forM cols $ \col -> do let counts = getCategoricalCounts col df case counts of Just c -> do if length c > 1 then ( do let numUnique = length c putStrLn $ "\n" if numUnique > 15 then plotBarsTopN 10 col df else plotBars col df ) else return (HtmlPlot "") Nothing -> return (HtmlPlot "") let allPlots = L.foldl' (\acc (HtmlPlot contents) -> acc <> "\n" <> contents) "" xs return (HtmlPlot allPlots) plotBarsWithPercentages :: (HasCallStack) => T.Text -> DataFrame -> IO HtmlPlot plotBarsWithPercentages colName df = do let config = (defaultPlotConfig Bar){plotTitle = "Distribution of " <> colName} plotBarsWith colName Nothing config df smartPlotBars :: (HasCallStack) => T.Text -> DataFrame -> IO HtmlPlot smartPlotBars colName df = do let counts = getCategoricalCounts colName df case counts of Just c -> do let numUnique = length c config = (defaultPlotConfig Bar) { plotTitle = colName <> " (" <> T.pack (show numUnique) <> " unique values)" } if numUnique <= 12 then plotBarsWith colName Nothing config df else plotBarsTopNWith 10 colName config df Nothing -> plotBars colName df showInDefaultBrowser :: HtmlPlot -> IO () showInDefaultBrowser (HtmlPlot p) = do plotId <- generateChartId home <- getHomeDirectory let operatingSystem = os let path = "plot-" <> T.unpack plotId <> ".html" let fullPath = if operatingSystem == "mingw32" then home <> "\\" <> path else home <> "/" <> path putStr "Saving plot to: " putStrLn fullPath T.writeFile fullPath p if operatingSystem == "mingw32" then openFileSilently "start" fullPath else openFileSilently "xdg-open" fullPath pure () openFileSilently :: FilePath -> FilePath -> IO () openFileSilently program path = do (_, _, _, ph) <- createProcess (proc program [path]) { std_in = NoStream , std_out = NoStream , std_err = NoStream } void (waitForProcess ph)