{-
	Copyright (C) 2013-2015 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]	Profiles lists of file-sizes.
-}

module FishFood.Profiler(
-- * Types
-- ** Type-synonyms
--	Probability,
--	Result,
--	FileSizeDistribution,
-- * Functions
        calculateFileSizeDistribution,
        formatFileSizeDistribution,
-- ** Accessors
        getFileSize,
        getValue
) where

import                  Control.Arrow((&&&),(***))
import qualified        Control.Monad.Writer
import qualified        Data.Default
import qualified        Data.List
import qualified        Data.Map
import qualified        Data.Maybe
import qualified        FishFood.Data.CommandOptions    as Data.CommandOptions
import qualified        FishFood.Data.File              as Data.File
import                  FishFood.Data.Verbosity()
import qualified        Text.Printf

-- | Define a type to represent the fractional closed unit-interval.
type Probability        = Double

-- | Defines either the number of files or the probability that a files has a specific size.
type Result     = (Data.File.FileSize, Either Int {-file-count-} Probability)

-- | Accessor.
getFileSize :: Result -> Data.File.FileSize
getFileSize     = fst

-- | Accessor.
getValue :: Result -> Either Int {-file-count-} Probability
getValue        = snd

-- | Defines either a /Probability Mass Function/ or /Frequency-distribution/.
type FileSizeDistribution       = [Result]

-- | Calculates either the /Probability Mass Function/ or /Frequency-distribution/ for the specified files.
calculateFileSizeDistribution :: (Floating ratio, RealFrac ratio) => Data.CommandOptions.CommandOptions ratio -> [Data.File.FileSize] -> Control.Monad.Writer.Writer [String] FileSizeDistribution
calculateFileSizeDistribution commandOptions fileSizes  = let
        binSizeDelta                    = Data.CommandOptions.getBinSizeDelta commandOptions
        deriveProbabilityMassFunction   = Data.CommandOptions.getDeriveProbabilityMassFunction commandOptions
        nDecimalDigits                  = Data.CommandOptions.getNDecimalDigits commandOptions

        mean, standardDeviation :: Double
        (nFiles, mean, standardDeviation)       = Data.File.getFileSizeStatistics fileSizes
 in do
        Control.Monad.Writer.tell [Text.Printf.printf "Files=%d, mean=%.*f, standard-deviation=%.*f" nFiles nDecimalDigits mean nDecimalDigits standardDeviation]

        return {-to Writer-monad-} $ if standardDeviation == 0
                then return {-to List-monad-} . (,) (head fileSizes) $ if deriveProbabilityMassFunction
                        then Right 1            -- i.e. certainty.
                        else Left nFiles        -- i.e. all.
                else let
                        getDefaultedBinSizeIncrement :: Maybe Data.File.FileSize -> Data.File.FileSize
                        getDefaultedBinSizeIncrement    = Data.Maybe.fromMaybe $ round standardDeviation `max` 1 {-minimum increment-}  -- CAVEAT: guard against subsequent division by zero or infinite iteration.

                        calculatedBinSizes :: [Data.File.FileSize]
                        calculatedBinSizes      = map (
                                \fileSize       -> either (
                                        div {-round down-} fileSize . getDefaultedBinSizeIncrement {-non-zero-}
                                ) (
                                        floor {-round down-} . (`logBase` fromIntegral fileSize)        -- CAVEAT: converts file-size 0, to bin-size -infinity.
                                ) binSizeDelta
                         ) fileSizes    -- Each bin spans the semi-closed integral interval [size, succ size), so round down fractional values to match the lower bin.

                        initialFrequencyDistribution :: Data.Map.Map Data.File.FileSize Int
                        initialFrequencyDistribution    = Data.Map.fromAscList . (
                                `zip` repeat 0  -- The initial file-count.
                         ) . takeWhile (
                                <= maximum calculatedBinSizes
                         ) . dropWhile (
                                < minimum calculatedBinSizes
                         ) $ either (
                                \maybeBinSizeIncrement  -> iterate (+ getDefaultedBinSizeIncrement {-non-zero-} maybeBinSizeIncrement) 0
                         ) (
                                \binRatio               -> map round {-file-sizes are integral-} $ iterate (* binRatio) 1       -- The sequence could be started at fractional values in the open unit-interval, but the only value less than 1 which may be required is 0 (which isn't a sequence-member), which will be created later on demand.
                         ) binSizeDelta

                        mapBinSizeToFileSize :: Data.Map.Map Data.File.FileSize value -> Data.Map.Map Data.File.FileSize value
                        mapBinSizeToFileSize    = Data.Map.mapKeys $ \binSize -> either (
                                (* binSize) . getDefaultedBinSizeIncrement
                         ) (
                                ceiling {-round up-} . (^^ binSize)     -- Converts binSize -infinity, back to file-size 0.
                         ) binSizeDelta -- Represent each bin by the minimum file-size it can accept.
                in Data.Map.toList . (
                        if deriveProbabilityMassFunction
                                then Data.Map.map Right . mapBinSizeToFileSize . Data.Map.map ((/ fromIntegral nFiles {-non-zero-}) . fromIntegral)
                                else Data.Map.map Left . mapBinSizeToFileSize
                ) $ foldr (
                        Data.Map.insertWith (+) `flip` 1        -- Count the files allocated to each bin.
                ) initialFrequencyDistribution calculatedBinSizes

-- | Formats a file-size distribution.
formatFileSizeDistribution :: Data.CommandOptions.CommandOptions ratio -> FileSizeDistribution -> String
formatFileSizeDistribution commandOptions       = Data.List.intercalate "\n" . map (
        \(fileSize, value)      -> fileSize ++ " " ++ value
 ) . (
        if Data.CommandOptions.getVerbosity commandOptions > Data.Default.def
                then (
                        [
                                (
                                        ($ (fileSizeWidth, fileSizeHeader)) &&& ($ (valueWidth, valueHeader))
                                ) . uncurry $ Text.Printf.printf "%*s", -- Column-headers.
                                (`replicate` '=') *** (`replicate` '=') $ columnWidths  -- Separator-bar.
                        ] ++
                ) -- Section.
                else id
 ) . map (
        Text.Printf.printf "%*d" fileSizeWidth *** either (
                Text.Printf.printf "%*d" valueWidth
        ) (
                Text.Printf.printf "%.*f" $ Data.CommandOptions.getNDecimalDigits commandOptions
        )
 ) where
        fileSizeHeader, valueHeader :: String
        headers@(fileSizeHeader, valueHeader)   = (,) "Bin-size" $ if Data.CommandOptions.getDeriveProbabilityMassFunction commandOptions then "Probability" else "Frequency"

        fileSizeWidth, valueWidth :: Int
        columnWidths@(fileSizeWidth, valueWidth)        = (`max` 10) . length *** length $ headers      -- CAVEAT: the data-length may exceed the header-length, so define a minimum.