{-|
Module      : Language.Rust.Data.InputStream
Description : Interface to the underlying input of parsing
Copyright   : (c) Alec Theriault, 2017-2018
License     : BSD-style
Maintainer  : alec.theriault@gmail.com
Stability   : experimental
Portability : portable

These are the only functions that need to be implemented in order to use the parser. Whether this
wraps 'BS.ByteString' or 'String' depends on whether the @useByteStrings@ option is on or not (it is
by default). Using 'BS.ByteString' means better handling of weird characters ('takeByte' for plain
'String' fails badly if you try to take a byte that doesn't fall on a character boundary), but it
means incurring a dependency on the [utf8-string](https://hackage.haskell.org/package/utf8-string)
package.
-}
{-# LANGUAGE CPP #-}

module Language.Rust.Data.InputStream (
  -- * InputStream type
  InputStream,
  countLines,
  inputStreamEmpty,
  
  -- * Introduction forms
  readInputStream,
  hReadInputStream,
  inputStreamFromString,
  
  -- * Elimination forms
  inputStreamToString,
  takeByte,
  takeChar,
  peekChars,
) where

import Data.Word   ( Word8 )
import Data.Coerce ( coerce )
import Data.String ( IsString(..) )
import System.IO

#ifdef USE_BYTESTRING
import qualified Data.ByteString as BS
import qualified Data.ByteString.UTF8 as BE
#else
import qualified Data.Char as Char
#endif

-- | Read an encoded file into an 'InputStream'
readInputStream :: FilePath -> IO InputStream
{-# INLINE readInputStream #-}

-- | Read an 'InputStream' from a 'Handle'
hReadInputStream :: Handle -> IO InputStream
{-# INLINE hReadInputStream #-}

-- | Convert 'InputStream' to 'String'.
inputStreamToString :: InputStream -> String
{-# INLINE inputStreamToString #-}

-- | Convert a 'String' to an 'InputStream'.
inputStreamFromString :: String -> InputStream
{-# INLINE inputStreamFromString #-}

-- | Uses 'inputStreamFromString'
instance IsString InputStream where fromString = inputStreamFromString

-- | Read the first byte from an 'InputStream' and return that byte with what remains of the
-- 'InputStream'. Behaviour is undefined when 'inputStreamEmpty' returns 'True'.
--
-- >>> takeByte "foo bar"
-- (102, "oo bar")
--
-- >>> takeByte "Ĥăƨĸëļļ"
-- (196, "\ETX\168\&8\235<<")
--
takeByte :: InputStream -> (Word8, InputStream)
{-# INLINE takeByte #-}

-- | Read the first character from an 'InputStream' and return that 'Char' with what remains of the
-- 'InputStream'. Behaviour is undefined when 'inputStreamEmpty' returns 'True'.
--
-- >>> takeChar "foo bar"
-- ('f', "oo bar")
--
-- >>> takeChar "Ĥăƨĸëļļ"
-- ('Ĥ', "ăƨĸëļļ")
--
takeChar :: InputStream -> (Char, InputStream)
{-# INLINE takeChar #-}

-- | Return @True@ if the given input stream is empty.
--
-- >>> inputStreamEmpty ""
-- True
--
-- >>> inputStreamEmpty "foo"
-- False
--
inputStreamEmpty :: InputStream -> Bool
{-# INLINE inputStreamEmpty #-}

-- | Returns the first @n@ characters of the given input stream, without removing them.
--
-- >>> peekChars 5 "foo bar"
-- "foo ba"
--
-- >>> peekChars 5 "foo"
-- "foo"
--
-- >>> peekChars 3 "Ĥăƨĸëļļ"
-- "Ĥăƨ"
--
peekChars :: Int -> InputStream -> String
{-# INLINE peekChars #-}

-- | Returns the number of text lines in the given 'InputStream'
--
-- >>> countLines ""
-- 0
--
-- >>> countLines "foo"
-- 1
--
-- >>> countLines "foo\n\nbar"
-- 3
--
-- >>> countLines "foo\n\nbar\n"
-- 3
--
countLines :: InputStream -> Int
{-# INLINE countLines #-}

#ifdef USE_BYTESTRING

-- | Opaque input type.
newtype InputStream = IS BS.ByteString deriving (Eq, Ord)
takeByte bs = (BS.head (coerce bs), coerce (BS.tail (coerce bs)))
takeChar bs = maybe (error "takeChar: no char left") coerce (BE.uncons (coerce bs))
inputStreamEmpty = BS.null . coerce
peekChars n = BE.toString . BE.take n . coerce
readInputStream f = coerce <$> BS.readFile f
hReadInputStream h = coerce <$> BS.hGetContents h
inputStreamToString = BE.toString . coerce
inputStreamFromString = IS . BE.fromString 
countLines = length . BE.lines . coerce

instance Show InputStream where
  show (IS bs) = show bs

#else

-- | Opaque input type.
newtype InputStream = IS String deriving (Eq, Ord)
takeByte (IS ~(c:str))
  | Char.isLatin1 c = let b = fromIntegral (Char.ord c) in b `seq` (b, IS str)
  | otherwise       = error "takeByte: not a latin-1 character"
takeChar (IS ~(c:str)) = (c, IS str)
inputStreamEmpty (IS str) = null str
peekChars n (IS str) = take n str
readInputStream f = IS <$> readFile f
hReadInputStream h = IS <$> hGetContents h
inputStreamToString = coerce
inputStreamFromString = IS
countLines (IS str) = length . lines $ str

instance Show InputStream where
  show (IS bs) = show bs

#endif