{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} import Data.Time import qualified Data.Vector.Unboxed as VU import qualified DataFrame as D import qualified DataFrame.Functions as F import System.Random.Stateful import Data.Text (Text) import DataFrame ((|>)) import DataFrame.DecisionTree import DataFrame.Functions ((.=)) $(F.declareColumnsFromCsvFile "../../Downloads/playground-series-s5e11/train.csv") main :: IO () main = do train <- D.readCsv "../../Downloads/playground-series-s5e11/train.csv" -- Create a new symbol for loan paid back since we are changing the type. let (loanPaidBack, train') = train |> D.deriveWithExpr (F.name loan_paid_back) (F.lift (round @Double @Int) loan_paid_back) let model = fitDecisionTree (TreeConfig 15 2) loanPaidBack (train' |> D.exclude ["id"]) let trainPred = D.derive "prediction" model train' print $ trainPred |> D.groupBy [F.name loanPaidBack, "prediction"] |> D.aggregate ["count" .= F.count loanPaidBack] |> D.sortBy [D.Desc "prediction", D.Desc (F.name loanPaidBack)] test <- D.readCsv "../../Downloads/playground-series-s5e11/test.csv" let withPredictions = D.derive "prediction" model test D.writeCsv "predictions.csv" ( withPredictions |> D.select ["id", "prediction"] |> D.rename "prediction" (F.name loan_paid_back) )