module Data.CSV.Table.Ops (
join, joinBy
, padJoin, padJoinBy
, diff, diffBy
, index
, indexBy
, sortBy
, project
, project1
, hide
, moveColL
, mapRows
, newColumn
) where
import Text.CSV
import System.FilePath
import Control.Applicative ((<$>))
import Data.Maybe
import qualified Data.List as L
import qualified Data.Map.Strict as M
import Data.CSV.Table.Types
sortBy :: Col -> TField -> Order -> Table -> Table
sortBy c t o tab = tab { body = L.sortBy (cmpRow i t o) (body tab)}
where
i = columnIndex tab c
cmpRow :: Int -> TField -> Order -> Row -> Row -> Ordering
cmpRow i t o (R fs) (R fs') = orient o $ compareAs t (fs !! i) (fs' !! i)
compareAs :: TField -> Field -> Field -> Ordering
compareAs FStr x y = compare x y
compareAs FInt x y = compare (read x :: Int) (read y :: Int)
compareAs FDbl x y = compare (read x :: Double) (read y :: Double)
orient :: Order -> Ordering -> Ordering
orient Dsc LT = GT
orient Dsc GT = LT
orient _ x = x
moveColL :: Table -> Col -> Table
moveColL t@(T n cs b) c
| i == 0 = t
| otherwise = T n (pluck i cs) [R $ pluck i r | R r <- b]
where
i = columnIndex t c
pluck = go []
where
go ls 0 (r:rs) = r : (ls +++ rs)
go ls n (r:rs) = go (r:ls) (n1) rs
[] +++ ys = ys
(x:xs) +++ ys = xs +++ (x:ys)
join :: Table -> Table -> Table
join t1 t2 = T n' cs b'
where
m1 = index t1
m2 = index t2
m' = M.intersectionWith (\r1 r2 -> r1 ++ tail r2) m1 m2
b' = R <$> M.elems m'
n' = dim t1 + dim t2 1
cs = cols t1 ++ tail (cols t2)
index :: Table -> M.Map Field [Field]
index t = M.fromList [(head r, r) | r <- getRows t]
joinBy :: Col -> Table -> Table -> Table
joinBy c t1 t2 = join t1' t2'
where
t1' = moveColL t1 c
t2' = moveColL t2 c
diff :: Table -> Table -> Table
diff t1@(T n1 c1 b1) t2 = T n1 c1 b1'
where
m1 = index t1
m2 = index t2
m1' = M.difference m1 m2
b1' = R <$> M.elems m1'
diffBy :: Col -> Table -> Table -> Table
diffBy c t1 t2 = diff t1' t2'
where
t1' = moveColL t1 c
t2' = moveColL t2 c
padJoin :: Row -> Table -> Table -> Table
padJoin (R xs) t1 t2 = T n' cs b'
where
m1 = index t1
m2 = index t2
m1' = M.difference m1 m2
m2' = M.mapWithKey (\k _ -> k:xs) m1
m' = M.intersectionWith (\r1 r2 -> r1 ++ tail r2) m1 (M.union m2 m2')
b' = R <$> M.elems m'
n' = dim t1 + dim t2 1
cs = (cols t1) ++ tail (cols t2)
padJoinBy c r t1 t2 = padJoin r t1' t2'
where
t1' = moveColL t1 c
t2' = moveColL t2 c
indexBy :: Table -> Col -> Table
indexBy t c = t { body = [ R fs | (_, fs) <- M.toList $ indexBy' t c] }
indexBy' :: Table -> Col -> M.Map Field [Field]
indexBy' t c
| ok = M.fromList [ (r !! i, r) | r <- getRows t]
| otherwise = error $ "indexBy: " ++ show c ++ " is not a unique column!"
where
i = columnIndex t c
ok = isUnique t c
isUnique :: Table -> Col -> Bool
isUnique t c = not $ isDup $ project1 t c
isDup xs = length xs /= length xs' where xs' = nubOrd xs
nubOrd = go . L.sort
where
go [] = []
go (x:xs) = x : go ys where (_,ys) = span (x ==) xs
project1 :: Table -> Col -> [Field]
project1 t c = [ r !! i | r <- getRows t]
where
i = columnIndex t c
project :: Table -> [Col] -> Table
project t cs = T n cs body'
where
body' = projRow is' <$> body t
is = (t `columnIndex`) <$> cs
n = length is
is' = L.sort is
projRow is (R xs) = R [ xs !! i | i <- is ]
hide :: Table -> [Col] -> Table
hide t cs = project t (cols t L.\\ cs)
mapRows :: (RowInfo -> a) -> Table -> [a]
mapRows f = map f . rowCols
rowCols :: Table -> [RowInfo]
rowCols t = [zip cs r | r <- rs]
where
cs = cols t
rs = getRows t
newColumn :: Col -> (RowInfo -> Field) -> Table -> Table
newColumn c f t = addColumn t c (mapRows f t)
addColumn :: Table -> Col -> [Field] -> Table
addColumn t c fs = T n' cs' b'
where
n' = 1 + dim t
cs' = cols t ++ [c]
b' = zipWith addField (body t) fs
addField :: Row -> Field -> Row
addField (R fs) f = R (fs ++ [f])
columnIndex :: Table -> Col -> Int
columnIndex t c = fromJust $ L.elemIndex c $ cols t