import Data.Char (ord)
import System.Environment (getArgs)
import System.IO (IOMode(..), hSeek, SeekMode(..))

import qualified Streamly.Data.Fold as FL
import qualified Streamly.FileSystem.Handle as FH
import qualified System.IO as FH
import qualified Streamly.Memory.Array as A
import qualified Streamly.Prelude as S
-- import qualified Streamly.FileSystem.FD as FH

import qualified Streamly.Internal.Data.Fold as FL
import qualified Streamly.Internal.Data.Unicode.Stream as US
import qualified Streamly.Internal.Memory.ArrayStream as AS
import qualified Streamly.Internal.Prelude as S

-- Read the contents of a file to stdout.
--
-- FH.read reads the file in 32KB chunks and converts the chunks into a byte
-- stream. FH.write takes the byte stream as input, converts it into chunks of
-- 32KB and writes those chunks to stdout.
--
_cat :: FH.Handle -> IO ()
_cat src = S.fold (FH.write FH.stdout) $ S.unfold FH.read src

-- Chunked version, more efficient than the byte stream version above. Reads
-- the file in 256KB chunks and writes those chunks to stdout.
cat :: FH.Handle -> IO ()
cat src =
      S.fold (FH.writeChunks FH.stdout)
    $ S.unfold FH.readChunksWithBufferOf ((256*1024), src)

-- Copy a source file to a destination file.
--
-- FH.read reads the file in 32KB chunks and converts the chunks into a byte
-- stream. FH.write takes the byte stream as input, converts it into chunks of
-- 32KB and writes those chunks to the destination file.
_cp :: FH.Handle -> FH.Handle -> IO ()
_cp src dst = S.fold (FH.write dst) $ S.unfold FH.read src

-- Chunked version, more efficient than the byte stream version above. Reads
-- the file in 256KB chunks and writes those chunks to stdout.
cp :: FH.Handle -> FH.Handle -> IO ()
cp src dst =
      S.fold (FH.writeChunks dst)
    $ S.unfold FH.readChunksWithBufferOf ((256*1024), src)

ord' :: Num a => Char -> a
ord' = (fromIntegral . ord)

-- Count lines like wc -l.
--
-- Char stream version. Reads the input as a byte stream, splits it into lines
-- and counts the lines..
_wcl :: FH.Handle -> IO ()
_wcl src = print =<< (S.length
    $ US.lines FL.drain
    $ US.decodeLatin1
    $ S.unfold FH.read src)

-- More efficient chunked version. Reads chunks from the input handles and
-- splits the chunks directly instead of converting them into byte stream
-- first.
wcl :: FH.Handle -> IO ()
wcl src = print =<< (S.length
    $ AS.splitOn 10
    $ S.unfold FH.readChunks src)

-- grep -c
--
-- count the occurrences of a pattern in a file.
grepc :: String -> FH.Handle -> IO ()
grepc pat src = print . (subtract 1) =<< (S.length
    $ S.splitOnSeq (A.fromList (map ord' pat)) FL.drain
    $ S.unfold FH.read src)

-- Compute the average line length in a file.
avgll :: FH.Handle -> IO ()
avgll src = print =<< (S.fold avg
    $ S.splitWithSuffix (== ord' '\n') FL.length
    $ S.unfold FH.read src)
    where avg = (/) <$> toDouble FL.sum <*> toDouble FL.length
          toDouble = fmap (fromIntegral :: Int -> Double)

-- histogram of line lengths in a file
llhisto :: FH.Handle -> IO ()
llhisto src = print =<< (S.fold (FL.classify FL.length)
    $ S.map bucket
    $ S.splitWithSuffix (== ord' '\n') FL.length
    $ S.unfold FH.read src)
    where
    bucket n = let i = n `mod` 10 in if i > 9 then (9,n) else (i,n)

main :: IO ()
main = do
    name <- fmap head getArgs
    src <- FH.openFile name ReadMode
    let rewind = hSeek src AbsoluteSeek 0

    rewind >> putStrLn "cat"    >> cat src          -- Unix cat program
    rewind >> putStr "wcl "     >> wcl src          -- Unix wc -l program
    rewind >> putStr "grepc "   >> grepc "aaaa" src -- Unix grep -c program
    rewind >> putStr "avgll "   >> avgll src        -- get average line length
    rewind >> putStr "llhisto " >> llhisto src      -- get line length histogram

    dst <- FH.openFile "dst-xyz.txt" WriteMode
    rewind >> putStr "cp " >> cp src dst       -- Unix cp program