{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ViewPatterns #-}

-- | Specify newline character sequences and algorithms generic over them.
--
-- Note that this module only understands simple character strings as newlines,
-- whereas some encodings have complex algorithmically-defined newlines.
-- Of course, most users will be interested in only Unix- or Windows-style newline.
--
-- Information for the pattern synonyms of this module comes from
-- https://en.wikipedia.org/wiki/Newline
module Text.Newline
  ( -- * Newline Type
    Newline(..)
  -- ** Common Newlines
  , pattern Unix
  , pattern Windows
  , pattern ClassicMac
  , pattern PrePosixQnx
  , pattern RiscOsSpool
  , pattern IbmMainframe
  -- * Text Operations
  -- ** Split Lines
  , breakLine
  -- ** Join Lines
  , linesUnix
  , linesBy
  , unlinesBy
  -- ** Conversion
  , pattern NlText
  , toText
  , fromText
  , toString
  ) where

import Data.Foldable (toList)
import Data.Maybe (catMaybes,fromJust)
import Data.String (IsString(..))
import Data.Text (Text)

import qualified Data.Text as T


-- | Specification for a newline character sequence
data Newline = OtherNl {-# UNPACK #-} !Char {-# UNPACK #-} !Text
  deriving (Newline -> Newline -> Bool
(Newline -> Newline -> Bool)
-> (Newline -> Newline -> Bool) -> Eq Newline
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Newline -> Newline -> Bool
$c/= :: Newline -> Newline -> Bool
== :: Newline -> Newline -> Bool
$c== :: Newline -> Newline -> Bool
Eq)

instance Show Newline where
  show :: Newline -> [Char]
show Newline
Unix = [Char]
"Unix"
  show Newline
Windows = [Char]
"Windows"
  show Newline
ClassicMac = [Char]
"ClassicMac"
  show Newline
PrePosixQnx = [Char]
"PrePosixQnx"
  show Newline
RiscOsSpool = [Char]
"RiscOsSpool"
  show Newline
IbmMainframe = [Char]
"IbmMainframe"
  show Newline
nl = Newline -> [Char]
toString Newline
nl

-- TODO a Read instance

------------ Text Operations ------------

-- | Equivalent to 'linesBy [Unix]'.
linesUnix :: Text -> [Text]
linesUnix :: Text -> [Text]
linesUnix Text
"" = []
linesUnix Text
str =
  let (Text
pre, Text
atpost) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') Text
str
   in case Text -> Maybe (Char, Text)
T.uncons Text
atpost of
        Maybe (Char, Text)
Nothing -> [Text
pre]
        Just (Char
'\n', Text
post) -> Text
pre Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Text -> [Text]
linesUnix Text
post
        Just (Char
c, Text
_) -> [Char] -> [Text]
forall a. [Char] -> a
errorWithoutStackTrace ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
          [Char]
"internal error in Text.Newline.linesUnix (expecting '\\n', but found" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> [Char]
forall a. Show a => a -> [Char]
show Char
c [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
")"

-- | Split text into lines where any of the given 'Newline' values are considered as a newline.
-- Returns not just the text of each line, but also the matched newline itself.
-- Returns 'Nothing' for the 'Newline' for a line that was ended by the end of input.
linesBy :: [Newline] -> Text -> [(Text, Maybe Newline)]
linesBy :: [Newline] -> Text -> [(Text, Maybe Newline)]
linesBy [Newline]
valid = Text -> [(Text, Maybe Newline)]
go
  where
  go :: Text -> [(Text, Maybe Newline)]
go Text
str = case [Newline] -> Text -> (Text, Maybe (Newline, Text))
breakLine [Newline]
valid Text
str of
    (Text
str', Maybe (Newline, Text)
Nothing) -> [(Text
str', Maybe Newline
forall a. Maybe a
Nothing)]
    (Text
str', Just (Newline
nl, Text
rest)) -> (Text
str', Newline -> Maybe Newline
forall a. a -> Maybe a
Just Newline
nl) (Text, Maybe Newline)
-> [(Text, Maybe Newline)] -> [(Text, Maybe Newline)]
forall a. a -> [a] -> [a]
: Text -> [(Text, Maybe Newline)]
go Text
rest

-- | Split one line from the input.
-- Also returns the newline that was matched and any following text.
-- If no newline was matched, then all the input is placed in the first return value.
breakLine :: [Newline] -> Text -> (Text, Maybe (Newline, Text))
breakLine :: [Newline] -> Text -> (Text, Maybe (Newline, Text))
breakLine [Newline]
valid Text
str =
  let (Text
pre, Text
atpost) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
nlStarts) Text
str
   in case Text -> Maybe (Char, Text)
T.uncons Text
atpost of
        Maybe (Char, Text)
Nothing -> (Text
str, Maybe (Newline, Text)
forall a. Maybe a
Nothing)
        Just (Char
at, Text
post) -> case [Newline] -> Text -> Maybe (Newline, Text)
takeSomeNl [Newline]
valid Text
atpost of
          Maybe (Newline, Text)
Nothing ->
            let preAt :: Text
preAt = Text
pre Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Char -> Text
T.singleton Char
at
                (Text
pre', Maybe (Newline, Text)
post') = [Newline] -> Text -> (Text, Maybe (Newline, Text))
breakLine [Newline]
valid Text
post
             in (Text
preAt Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
pre', Maybe (Newline, Text)
post')
          Just (Newline
nl, Text
post') -> (Text
pre, (Newline, Text) -> Maybe (Newline, Text)
forall a. a -> Maybe a
Just (Newline
nl, Text
post'))
  where
  nlStarts :: [Char]
nlStarts = (\(OtherNl Char
c Text
_) -> Char
c) (Newline -> Char) -> [Newline] -> [Char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Newline]
valid

takeSomeNl :: [Newline] -> Text -> Maybe (Newline, Text)
takeSomeNl :: [Newline] -> Text -> Maybe (Newline, Text)
takeSomeNl [Newline]
valid Text
str = case [Maybe (Newline, Text)] -> [(Newline, Text)]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Newline, Text)] -> [(Newline, Text)])
-> [Maybe (Newline, Text)] -> [(Newline, Text)]
forall a b. (a -> b) -> a -> b
$ (Newline -> Text -> Maybe (Newline, Text))
-> Text -> Newline -> Maybe (Newline, Text)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Newline -> Text -> Maybe (Newline, Text)
takeNl Text
str (Newline -> Maybe (Newline, Text))
-> [Newline] -> [Maybe (Newline, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Newline]
valid of
  [] -> Maybe (Newline, Text)
forall a. Maybe a
Nothing
  ((Newline, Text)
it:[(Newline, Text)]
_) -> (Newline, Text) -> Maybe (Newline, Text)
forall a. a -> Maybe a
Just (Newline, Text)
it

takeNl :: Newline -> Text -> Maybe (Newline, Text)
takeNl :: Newline -> Text -> Maybe (Newline, Text)
takeNl nl :: Newline
nl@(NlText Text
t) Text
str = if Text
t Text -> Text -> Bool
`T.isPrefixOf` Text
str
  then (Newline, Text) -> Maybe (Newline, Text)
forall a. a -> Maybe a
Just (Newline
nl, Int -> Text -> Text
T.drop (Text -> Int
T.length Text
t) Text
str)
  else Maybe (Newline, Text)
forall a. Maybe a
Nothing

-- | Join lines by inserting newlines between them.
--
-- Mirrors 'Prelude.unlines', but allows different line endings.
unlinesBy :: (IsString str, Monoid str, Foldable f) => Newline -> f str -> str
unlinesBy :: forall str (f :: * -> *).
(IsString str, Monoid str, Foldable f) =>
Newline -> f str -> str
unlinesBy Newline
nl = [str] -> str
forall {a}. (Monoid a, IsString a) => [a] -> a
go ([str] -> str) -> (f str -> [str]) -> f str -> str
forall b c a. (b -> c) -> (a -> b) -> a -> c
. f str -> [str]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
  where
  go :: [a] -> a
go [] = a
forall a. Monoid a => a
mempty
  go [a]
ts = (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 (\a
x a
xs -> a
x a -> a -> a
forall a. Semigroup a => a -> a -> a
<> [Char] -> a
forall a. IsString a => [Char] -> a
fromString (Newline -> [Char]
toString Newline
nl) a -> a -> a
forall a. Semigroup a => a -> a -> a
<> a
xs) [a]
ts

{-# COMPLETE NlText #-}
-- | Construct/deconstruct 'Newline' as a whole 'Text'.
-- The constructor is partial: it is undefined constructed with an empty string.
pattern NlText :: Text -> Newline
pattern $bNlText :: Text -> Newline
$mNlText :: forall {r}. Newline -> (Text -> r) -> ((# #) -> r) -> r
NlText t <- (toText -> t)
  where
  NlText Text
t = Text -> Newline
unsafeFromText Text
t

-- | Convert a newline specification to the text it matches.
toText :: Newline -> Text
toText :: Newline -> Text
toText (OtherNl Char
c Text
t) = Char -> Text -> Text
T.cons Char
c Text
t

-- | Convert text into a newline specification that matches it.
fromText :: Text -> Maybe Newline
fromText :: Text -> Maybe Newline
fromText Text
t = (Char -> Text -> Newline) -> (Char, Text) -> Newline
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Char -> Text -> Newline
OtherNl ((Char, Text) -> Newline) -> Maybe (Char, Text) -> Maybe Newline
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe (Char, Text)
T.uncons Text
t

-- | As 'fromText', but is partial in the case of empty input.
unsafeFromText :: Text -> Newline
unsafeFromText :: Text -> Newline
unsafeFromText = Maybe Newline -> Newline
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Newline -> Newline)
-> (Text -> Maybe Newline) -> Text -> Newline
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe Newline
fromText

-- | As 'toText', but targeting the 'String' type.
toString :: Newline -> String
toString :: Newline -> [Char]
toString (OtherNl Char
c Text
str) = Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: Text -> [Char]
T.unpack Text
str

------------ Shortcuts ------------

-- | For Unix and Unix-like systems.
-- It's by far the most common (and easy-to-recognize) newline,
--   so when in doubt, don't generate anything else.
pattern Unix :: Newline
pattern $bUnix :: Newline
$mUnix :: forall {r}. Newline -> ((# #) -> r) -> ((# #) -> r) -> r
Unix = OtherNl '\n' ""

-- | For DOS and DOS-like systems, including Microsoft Windows.
-- Still common, and in being so, is a pain.
pattern Windows :: Newline
pattern $bWindows :: Newline
$mWindows :: forall {r}. Newline -> ((# #) -> r) -> ((# #) -> r) -> r
Windows = OtherNl '\r' "\n"


-- | For a variety of older machines, such as
--   Commodore 8-bit machines, ZX Spectrum, TRS-80,
--   Apple II series, the classic Mac OS,
--   and the MIT Lisp Machine.
pattern ClassicMac :: Newline
pattern $bClassicMac :: Newline
$mClassicMac :: forall {r}. Newline -> ((# #) -> r) -> ((# #) -> r) -> r
ClassicMac = OtherNl '\r' ""

-- | For QNX version <4
pattern PrePosixQnx :: Newline
pattern $bPrePosixQnx :: Newline
$mPrePosixQnx :: forall {r}. Newline -> ((# #) -> r) -> ((# #) -> r) -> r
PrePosixQnx = OtherNl '\RS' ""

-- | For RISC OS spoolet text output.
--
-- Reportedly also used for Acorn BBC, but that machine is also listed as using `"\r"`.
-- The manual (http://stardot.org.uk/mirrors/www.bbcdocs.com/filebase/essentials/BBC%20Microcomputer%20Advanced%20User%20Guide.pdf)
-- does back up this assertion, though.
pattern RiscOsSpool :: Newline
pattern $bRiscOsSpool :: Newline
$mRiscOsSpool :: forall {r}. Newline -> ((# #) -> r) -> ((# #) -> r) -> r
RiscOsSpool = OtherNl '\n' "\r"

-- | EBCDIC systems
-- — mainly IBM mainframe systems, including z/OS (OS/390) and IBM i (OS/400) —
-- use NL (New Line, 0x15)[8] as the character combining the functions of line feed and carriage return.
-- The equivalent Unicode character (0x85) is called NEL (Next Line).
--
-- Citation: IBM System/360 Reference Data Card, Publication GX20-1703, IBM Data Processing Division, White Plains, NY
pattern IbmMainframe :: Newline
pattern $bIbmMainframe :: Newline
$mIbmMainframe :: forall {r}. Newline -> ((# #) -> r) -> ((# #) -> r) -> r
IbmMainframe = OtherNl '\x85' ""