{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}

{-|
Module      : Graphics.Vega.VegaLite.Input
Copyright   : (c) Douglas Burke, 2018-2020
License     : BSD3

Maintainer  : dburke.gw@gmail.com
Stability   : unstable
Portability : CPP, OverloadedStrings, TupleSections

Data input (from file or created inline).

-}

module Graphics.Vega.VegaLite.Input
       ( Data
       , DataColumn
       , DataRow

       , Format(..)
       , DataType(..)

       , dataFromUrl
       , dataFromColumns
       , dataFromRows
       , dataFromJson
       , dataFromSource
       , dataName
       , datasets
       , dataColumn
       , dataRow
       , noData

       , dataSequence
       , dataSequenceAs

       ) where

import qualified Data.Aeson as A
import qualified Data.HashMap.Strict as HM
import qualified Data.Text as T

import Control.Arrow (second)

import Data.Aeson ((.=), Value, decode, encode, object, toJSON)
import Data.Maybe (fromMaybe, mapMaybe)

#if !(MIN_VERSION_base(4, 12, 0))
import Data.Monoid ((<>))
#endif

import Graphics.Vega.VegaLite.Data
  ( DataValue(..)
  , DataValues(..)
  , dataValueSpec
  )
import Graphics.Vega.VegaLite.Foundation
  ( FieldName
  )
import Graphics.Vega.VegaLite.Specification
  ( VLProperty(VLData, VLDatasets)
  , VLSpec
  , LabelledSpec
  )
import Graphics.Vega.VegaLite.Time (dateTimeProperty)


{-|

Indicates the type of data to be parsed when reading input data. For @FoDate@
and @FoUtc@, the formatting specification can be specified using
<https://vega.github.io/vega-lite/docs/data.html#format D3's formatting specifiers>
or left as an empty string if default date formatting is to be applied. Care should
be taken when assuming default parsing of dates because different browsers can
parse dates differently. Being explicit about the date format is usually safer.

-}
data DataType
    = FoNumber
      -- ^ Indicate numeric data type to be parsed when reading input data.
    | FoBoolean
      -- ^ Indicate Boolean data type to be parsed when reading input data.
    | FoDate T.Text
      -- ^ Date format for parsing input data using
      --   [D3's formatting specifiers](https://vega.github.io/vega-lite/docs/data.html#format)
      --   or left as an empty string for default formatting.
    | FoUtc T.Text
      -- ^ Similar to 'FoDate' but for UTC format dates.


{-|

Specifies the type of format a data source uses. If the format is indicated by
the file name extension (@".tsv"@, @".csv"@, @".json"@) there is no need to indicate the
format explicitly. However this can be useful if the filename extension does not
indicate type (e.g. @".txt"@) or you wish to customise the parsing of a file. For
example, when specifying the @JSON@ format, its parameter indicates the name of
property field containing the attribute data to extract. For details see the
<https://vega.github.io/vega-lite/docs/data.html#format Vega-Lite documentation>.
-}
data Format
    = JSON T.Text
      -- ^ Property to be extracted from some JSON when it has some surrounding structure.
      --   e.g., specifying the property @values.features@ is equivalent to retrieving
      --   @json.values.features@ from a JSON object with a custom delimiter.
    | CSV
      -- ^ Comma-separated (CSV) data file format.
    | TSV
      -- ^ Tab-separated (TSV) data file format
    | DSV Char
      -- ^ The fields are separated by the given character (which must be a
      --   single 16-bit code unit).
      --
      --   @since 0.4.0.0
      {- This isn't in the current vega-lite v3 schema as far as I can see
    | Arrow
      -- ^ <https://observablehq.com/@theneuralbit/introduction-to-apache-arrow Apache Arrow> format.
      --
      -- @since 0.4.0.0
      -}
    | TopojsonFeature T.Text
      -- ^ A topoJSON feature format containing an object with the given name. For example:
      --
      -- @
      -- 'dataFromUrl' \"londonBoroughs.json\" ['TopojsonFeature' \"boroughs\"]
      -- @
    | TopojsonMesh T.Text
      -- ^ A topoJSON mesh format containing an object with the given name. Unlike
      --   'TopojsonFeature', the corresponding geo data are returned as a single unified mesh,
      --   not as individual GeoJSON features.
    | Parse [(FieldName, DataType)]
      -- ^ Parsing rules when processing some data text, specified as
      --   a list of tuples in the form @(fieldname,
      --   datatype)@. Useful when automatic type inference needs to
      --   be overridden, for example when converting integers representing
      --   years into dates and strings into numbers:
      --
      -- @
      -- 'dataFromUrl' \"myDataFile.csv\"
      --    [ 'Parse' [ ( \"year\", 'FoDate' \"%Y\" ), ( "\y\", 'FoNumber' ) ] ]
      -- @


{-|

Represents a single column of data. Used when generating inline data with
'dataColumn' and 'dataFromColumns'.
-}
type DataColumn = [LabelledSpec]

{-|

Represents a single row of data. Used when generating inline data with
'dataRow' and 'dataFromRows'.
-}
type DataRow = VLSpec

{-|

Convenience type-annotation label for use with data generation functions.

@
myRegion : ['DataColumn'] -> 'Data'
myRegion =
    'dataFromColumns' []
        . 'dataColumn' "easting" ('Graphics.Vega.VegaLite.Numbers' [ -3, 4, 4, -3, -3 ])
        . 'dataColumn' "northing" ('Graphics.Vega.VegaLite.Numbers' [ 52, 52, 45, 45, 52 ])
@

It is the same as 'Graphics.Vega.VegaLite.PropertySpec' (which was added in @0.4.0.0@),
but kept separate to help better-document code.

-}
type Data = (VLProperty, VLSpec)


formatProperty :: Format -> [LabelledSpec]
formatProperty (JSON js) =
  let ps = [("type", "json")]
           <> if T.null (T.strip js) then [] else [("property", js)]
  in map (second toJSON) ps

formatProperty CSV = [("type", "csv")]
formatProperty TSV = [("type", "tsv")]
formatProperty (DSV delim) = [("type", "dsv"), "delimiter" .= delim]
-- formatProperty Arrow = [("type", "arrow")]
formatProperty (TopojsonFeature os) = [("type", "topojson")
                                      , "feature" .= os
                                      ]
formatProperty (TopojsonMesh os) = [("type", "topojson")
                                   , "mesh" .= os
                                   ]
formatProperty (Parse fmts) =
  let pObj = object (map (second dataTypeSpec) fmts)
  in [("parse", pObj)]


dataTypeSpec :: DataType -> VLSpec
dataTypeSpec dType =
  let s = case dType of
        FoNumber -> "number"
        FoBoolean -> "boolean"
        FoDate fmt | T.null fmt -> "date"
                   | otherwise -> "date:'" <> fmt <> "'"
        FoUtc fmt | T.null fmt -> "utc"
                  | otherwise -> "utc:'" <> fmt <> "'"
  in toJSON s


{-|

Create a row of data. A row comprises a list of (columnName, value) pairs.
The final parameter is the list of any other rows to which this is added.

This is expected to be used with 'dataFromRows'.

@
'dataRow' [(\"Animal\", 'Graphics.Vega.VegaLite.Str' \"Fish\"), (\"Age\", 'Graphics.Vega.VegaLite.Number' 28), (\"Year\", 'Graphics.Vega.VegaLite.Str' "2010")] []
@
-}
dataRow :: [(FieldName, DataValue)] -> [DataRow] -> [DataRow]
dataRow rw = (object (map (second dataValueSpec) rw) :)


{-|

Create a dataset comprising a collection of named 'Data' items. Each data item
can be created with normal data generating functions such as 'dataFromRows' or
'dataFromJson'. These can be later referred to using 'dataFromSource'.

@
let toJS = Data.Aeson.toJSON
    obj = Data.Aeson.object

    dvals = 'dataFromRows' []
            . 'dataRow' [ ( "cat", 'Graphics.Vega.VegaLite.Str' "a" ), ( "val", 'Graphics.Vega.VegaLite.Number' 10 ) ]
            . 'dataRow' [ ( "cat", 'Graphics.Vega.VegaLite.Str' "b" ), ( "val", 'Graphics.Vega.VegaLite.Number' 18 ) ]
    json = toJS
            [ obj [ ( "cat", toJS "a" ), ( "val", toJS 120 ) ]
            , obj [ ( "cat", toJS "b" ), ( "val", toJS 180 ) ]
            ]

    enc = ...

in 'Graphics.Vega.VegaLite.toVegaLite'
    [ 'datasets' [ ( \"myData\", dvals [] ),  ( \"myJson\", 'dataFromJson' json [] ) ]
    , 'dataFromSource' \"myData\" []
    , 'Graphics.Vega.VegaLite.mark' 'Graphics.Vega.VegaLite.Bar' []
    , enc []
    ]
@
-}
datasets :: [(T.Text, Data)] -> Data
datasets namedData =
  -- Follow Elm in parsing the structure to get what we want, but the code is
  -- written rather differently.
  --
  -- The input is expected to be a singleton list containing a pair.
  let converted = extract . snd
      specs = map (second converted) namedData

      convert :: Value -> Maybe [(T.Text, Value)]
      convert v = HM.toList <$> decode (encode v)

      extract din =
        let extract' [(_, v)] = Just v
            extract' _ = Nothing

        in fromMaybe din (convert din >>= extract')

  in (VLDatasets, object specs)


-- | This is for composed specifications, and it tells the visualization to
--   ignore the data from the parent.
--
--   @since 0.4.0.0
noData :: Data
noData = (VLData, A.Null)


{-|

Name to give a data source. Useful when a specification needs to reference a
data source, such as one generated via an API call.

@
dvals = 'dataName' \"myName\" ('dataFromUrl' \"myData.json\" [])
@

@since 0.4.0.0

-}

-- TODO: can we restrict this to only those with a VLProperty of VLData?

dataName ::
  T.Text
  -- ^ The name to give the data source
  -> Data
  -- ^ The data source to be named.
  -> Data
  -- ^ If the input @Data@ argument is not a data source then
  --   this is just the input value.
dataName s odata@(_, dataSpec) =
  -- follow Elm in parsing the structure to get what we want, but the code is
  -- written rather differently. This is based on the code used in datasets.
  --
  -- The input is expected to be a singleton list containing a pair.
  --
  let converted = convert >>= extract

      -- Aeson's objects are wrappers around a hash map, so this should be
      -- a relatively easy conversion. The type annotation isn't needed
      -- but left in for reference.
      --
      convert :: Maybe [(T.Text, Value)]
      convert = HM.toList <$> decode (encode dataSpec)

      extract [v] = Just v
      extract _ = Nothing

  in case converted of
       Just v -> (VLData, object [ "name" .= s, v ])
       _ -> odata


{-|

Declare a data source from a list of column values. Each column has a
specific type (e.g. 'Number' or 'String'), but different columns can have
different types.

Note that the columns are truncated to match the length of the shortest column.

@
'dataFromColumns' [ 'Parse' [ ( \"Year\", 'FoDate' "%Y" ) ] ]
  . 'dataColumn' \"Animal\" ('Strings' [ \"Fish\", \"Dog\", \"Cat\" ])
  . 'dataColumn' \"Age\" ('Numbers' [ 28, 12, 6 ])
  . 'dataColumn' \"Year\" ('Strings' [ "2010", "2014", "2015" ])
@
-}
dataFromColumns ::
  [Format]
  -- ^ An optional list of formatting instructions for the columns.
  --
  --   Simple numbers and strings do not normally need formatting, but it is
  --   good practice to explicitly declare date-time formats as handling of
  --   these values can vary between different viewers (e.g. browsers).
  --
  --   See the
  --   <https://vega.github.io/vega-lite/docs/data.html#format Vega-Lite documentation>
  --   for more details.
  -> [DataColumn]
  -- ^ The columns to add. This is expected to be created with one or more
  --   calls to 'dataColumn'.
  -> Data
dataFromColumns fmts cols =
  let dataArray = map object (transpose cols)

      vals = [("values", toJSON dataArray)]
             <> if null fmts
                then []
                else [("format", toJSON fmtObject)]

      fmtObject = object (concatMap formatProperty fmts)

  in (VLData, object vals)


transpose :: [[a]] -> [[a]]
transpose [] = []
transpose ([]:xss) = transpose xss
transpose ((x:xs) : xss) =
  let heads = filterMap elmHead xss --
      tails = filterMap elmTail xss

      elmHead (h:_) = Just h
      elmHead [] = Nothing

      elmTail [] = Nothing
      elmTail (_:ts) = Just ts

      filterMap = mapMaybe

  in (x : heads) : transpose (xs : tails)


{-|

Declare a data source from a provided json specification. The most likely use-case
for specifying json inline is when creating <http://geojson.org geojson> objects,
when 'Graphics.Vega.VegaLite.geometry', 'Graphics.Vega.VegaLite.geometryCollection', and 'Graphics.Vega.VegaLite.geoFeatureCollection' functions
may be used. For more general cases of json creation, consider 'Data.Aeson.encode'.

@
let geojson =
        'Graphics.Vega.VegaLite.geometry' ('Graphics.Vega.VegaLite.GeoPolygon' [ [ ( -3, 59 ), ( 4, 59 ), ( 4, 52 ), ( -3, 59 ) ] ]) []
in 'Graphics.Vega.VegaLite.toVegaLite'
    [ 'Graphics.Vega.VegaLite.width' 200
    , 'Graphics.Vega.VegaLite.height' 200
    , 'dataFromJson' geojson []
    , 'Graphics.Vega.VegaLite.projection' [ 'Graphics.Vega.VegaLite.PrType' 'Graphics.Vega.VegaLite.Orthographic' ]
    , 'Graphics.Vega.VegaLite.mark' 'Graphics.Vega.VegaLite.Geoshape' []
    ]
@
-}
dataFromJson :: VLSpec -> [Format] -> Data
dataFromJson vlspec fmts =
  let js = if null fmts
           then object [("values", vlspec)]
           else object [ ("values", vlspec)
                       , ("format",
                          object (concatMap formatProperty fmts)) ]
  in (VLData, js)


{-|

Create a column of data. A column has a name and a list of values. The final
parameter is the list of any other columns to which this is added.

This is expected to be used with 'dataFromColumns'.

@
'dataColumn' \"Animal\" ('Strings' [ \"Cat\", \"Dog\", \"Mouse\"]) []
@
-}
dataColumn :: FieldName -> DataValues -> [DataColumn] -> [DataColumn]
dataColumn colName dVals xs =
  let col = case dVals of
        Booleans cs -> map toJSON cs
        DateTimes cs -> map dtToJSON cs
        Numbers cs -> map toJSON cs
        Strings cs -> map toJSON cs

      dtToJSON = object . map dateTimeProperty
      x = map (colName,) col

  in x : xs


{-|

Declare a data source from a provided list of row values. Each row
contains a list of tuples where the first value is a string
representing the column name, and the second the column value for that
row. Each column can have a value of a different type but
__you must ensure__
that when subsequent rows are added, they match the types of
previous values with shared column names.

Note though that generally if you are creating data inline (as opposed
to reading from a file), adding data by column is more efficient and
less error-prone.

@
dataFromRows [ 'Parse' [ ( \"Year\", 'FoDate' "%Y" ) ] ]
  . 'dataRow' [ ( \"Animal\", 'Str' \"Fish\" ), ( \"Age\", 'Number' 28 ), ( \"Year\", 'Str' "2010" ) ]
  . 'dataRow' [ ( \"Animal\", 'Str' \"Dog\" ), ( \"Age\", 'Number' 12 ), ( \"Year\", 'Str' "2014" ) ]
  . 'dataRow' [ ( \"Animal\", 'Str' \"Cat\" ), ( \"Age\", 'Number' 6 ), ( \"Year\", 'Str' "2015" ) ]
@
-}
dataFromRows ::
  [Format]
  -- ^ An optional list of formatting instructions for the rows.
  --
  --   Simple numbers and strings do not normally need formatting, but it is
  --   good practice to explicitly declare date-time formats as handling of
  --   these values can vary between different viewers (e.g. browsers).
  --
  --   See the
  --   <https://vega.github.io/vega-lite/docs/data.html#format Vega-Lite documentation>
  --   for more details.
  -> [DataRow]
  -- ^ The rows to add. This is expected to be created with one or more
  --   calls to 'dataRow'.
  -> Data
dataFromRows fmts rows =
  let kvs = ("values", toJSON rows)
            : if null fmts
              then []
              else [("format", object (concatMap formatProperty fmts))]
  in (VLData, object kvs)


{-|

Declare data from a named source. The source may be from named 'datasets' within
a specification or a named data source created via the
<https://vega.github.io/vega/docs/api/view/#data Vega View API>.
An optional list of field formatting instructions can be provided as the second
parameter or an empty list to use the default formatting. See the
<https://vega.github.io/vega-lite/docs/data.html#named Vega-Lite documentation>
for details.

@
'Graphics.Vega.VegaLite.toVegaLite'
    [ 'datasets' [ ( "myData", dvals [] ),  ( "myJson", 'dataFromJson' json [] ) ]
    , 'dataFromSource' "myData" []
    , 'Graphics.Vega.VegaLite.mark' 'Graphics.Vega.VegaLite.Bar' []
    , ...
    ]
@
-}
dataFromSource :: T.Text -> [Format] -> Data
dataFromSource sourceName fmts =
  let kvs = ("name" .= sourceName)
            : if null fmts
              then []
              else [("format", object (concatMap formatProperty fmts))]
  in (VLData, object kvs)


{-|

Declare data source from a url. The url can be a local path on a web server
or an external http(s) url. Used to create a data ( property, specification ) pair.
An optional list of field formatting instructions can be provided as the second
parameter or an empty list to use the default formatting. See the
<https://vega.github.io/vega-lite/docs/data.html#format Vega-Lite documentation>
for details.

@
'dataFromUrl' "data/weather.csv" [ 'Parse' [ ( "date", 'FoDate' "%Y-%m-%d %H:%M" ) ] ]
@
-}
-- TODO: should this use a URL type?
dataFromUrl :: T.Text -> [Format] -> Data
dataFromUrl url fmts =
  let kvs = ("url" .= url)
            : if null fmts
              then []
              else [("format", object (concatMap formatProperty fmts))]
  in (VLData, object kvs)


{-|

Generate a sequence of numbers as a data source. The resulting
sequence will have the name @\"data\"@. To give it an alternative name use
'dataSequenceAs'.

@
myData = 'dataSequence' 0 6.28 0.1
@

@since 0.4.0.0

-}
dataSequence ::
  Double     -- ^ start of the sequence (inclusive)
  -> Double  -- ^ end of the sequence (exclusive)
  -> Double  -- ^ step size
  -> Data
dataSequence start stop step =
  let vals = [("sequence", object svals)]
      svals = [ "start" .= start
              , "stop" .= stop
              , "step" .= step
              ]

  in (VLData, object vals)


{-|

Generate a sequence of numbers as a named data source. This extends
'dataSequence' by allowing you to name the data source.

@
myTheta = 'dataSequenceAs' 0 6.28 0.1 \"theta\"
@

@since 0.4.0.0

-}
dataSequenceAs ::
  Double     -- ^ start of the sequence (inclusive)
  -> Double  -- ^ end of the sequence (exclusive)
  -> Double  -- ^ step size
  -> FieldName  -- ^ The name of the data source
  -> Data
dataSequenceAs start stop step outName =
  let vals = [ "sequence" .= object svals ]
      svals = [ "start" .= start
              , "stop" .= stop
              , "step" .= step
              , "as" .= outName
              ]

  in (VLData, object vals)