{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}

-- Module      : Data.Text.Manipulate
-- Copyright   : (c) 2014-2020 Brendan Hay <brendan.g.hay@gmail.com>
-- License     : This Source Code Form is subject to the terms of
--               the Mozilla Public License, v. 2.0.
--               A copy of the MPL can be found in the LICENSE file or
--               you can obtain it at http://mozilla.org/MPL/2.0/.
-- Maintainer  : Brendan Hay <brendan.g.hay@gmail.com>
-- Stability   : experimental
-- Portability : non-portable (GHC extensions)

-- | Manipulate identifiers and structurally non-complex pieces
-- of text by delimiting word boundaries via a combination of whitespace,
-- control-characters, and case-sensitivity.
--
-- Assumptions have been made about word boundary characteristics inherint
-- in predominantely English text, please see individual function documentation
-- for further details and behaviour.
module Data.Text.Manipulate
  ( -- * Strict vs lazy types
    -- $strict

    -- * Unicode
    -- $unicode

    -- * Fusion
    -- $fusion

    -- * Subwords

    -- ** Removing words
    takeWord,
    dropWord,
    stripWord,

    -- ** Breaking on words
    breakWord,
    splitWords,

    -- * Character manipulation
    lowerHead,
    upperHead,
    mapHead,

    -- * Line manipulation
    indentLines,
    prependLines,

    -- * Ellipsis
    toEllipsis,
    toEllipsisWith,

    -- * Acronyms
    toAcronym,

    -- * Ordinals
    toOrdinal,

    -- * Casing
    toTitle,
    toCamel,
    toPascal,
    toSnake,
    toSpinal,
    toTrain,

    -- * Boundary predicates
    isBoundary,
    isWordBoundary,
  )
where

import qualified Data.Char as Char
import Data.List (intersperse)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LText
import qualified Data.Text.Lazy.Manipulate as LMan
import Data.Text.Manipulate.Internal.Fusion (strict)
import qualified Data.Text.Manipulate.Internal.Fusion as Fusion
import Data.Text.Manipulate.Internal.Types

-- $strict
-- This library provides functions for manipulating both strict and lazy Text types.
-- The strict functions are provided by the "Data.Text.Manipulate" module, while the lazy
-- functions are provided by the "Data.Text.Lazy.Manipulate" module.

-- $unicode
-- While this library supports Unicode in a similar fashion to the
-- underlying <http://hackage.haskell.org/package/text text> library,
-- more explicit Unicode handling of word boundaries can be found in the
-- <http://hackage.haskell.org/package/text-icu text-icu> library.

-- $fusion
-- Many functions in this module are subject to fusion, meaning that
-- a pipeline of such functions will usually allocate at most one Text value.
--
-- Functions that can be fused by the compiler are documented with the
-- phrase /Subject to fusion/.

-- DEBUG:
-- import Data.Text.Internal.Fusion        (stream)
-- import Data.Text.Internal.Fusion.Common (unstreamList)
-- tokens = unstreamList . Fusion.tokenise . stream

-- FIXME:
-- dropWord "ALong" == ""

-- | Lowercase the first character of a piece of text.
--
-- >>> lowerHead "Title Cased"
-- "title Cased"
lowerHead :: Text -> Text
lowerHead :: Text -> Text
lowerHead = (Char -> Char) -> Text -> Text
mapHead Char -> Char
Char.toLower

-- | Uppercase the first character of a piece of text.
--
-- >>> upperHead "snake_cased"
-- "Snake_cased"
upperHead :: Text -> Text
upperHead :: Text -> Text
upperHead = (Char -> Char) -> Text -> Text
mapHead Char -> Char
Char.toUpper

-- | Apply a function to the first character of a piece of text.
mapHead :: (Char -> Char) -> Text -> Text
mapHead :: (Char -> Char) -> Text -> Text
mapHead Char -> Char
f Text
x =
  case Text -> Maybe (Char, Text)
Text.uncons Text
x of
    Just (Char
c, Text
cs) -> Char -> Text
Text.singleton (Char -> Char
f Char
c) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
cs
    Maybe (Char, Text)
Nothing -> Text
x

-- | Indent newlines by the given number of spaces.
--
-- /See:/ 'prependLines'
indentLines :: Int -> Text -> Text
indentLines :: Int -> Text -> Text
indentLines Int
n = Text -> Text -> Text
prependLines (Int -> Text -> Text
Text.replicate Int
n Text
" ")

-- | Prepend newlines with the given separator
prependLines :: Text -> Text -> Text
prependLines :: Text -> Text -> Text
prependLines Text
sep = Text -> Text -> Text
forall a. Monoid a => a -> a -> a
mappend Text
sep (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
Text.unlines ([Text] -> Text) -> (Text -> [Text]) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
intersperse Text
sep ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
Text.lines

-- | O(n) Truncate text to a specific length.
-- If the text was truncated the ellipsis sign "..." will be appended.
--
-- /See:/ 'toEllipsisWith'
toEllipsis :: Int -> Text -> Text
toEllipsis :: Int -> Text -> Text
toEllipsis Int
n = Int -> Text -> Text -> Text
toEllipsisWith Int
n Text
"..."

-- | O(n) Truncate text to a specific length.
-- If the text was truncated the given ellipsis sign will be appended.
toEllipsisWith ::
  -- | Length.
  Int ->
  -- | Ellipsis.
  Text ->
  Text ->
  Text
toEllipsisWith :: Int -> Text -> Text -> Text
toEllipsisWith Int
n Text
suf Text
x
  | Text -> Int
Text.length Text
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
n = Int -> Text -> Text
Text.take Int
n Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
suf
  | Bool
otherwise = Text
x

-- | O(n) Returns the first word, or the original text if no word
-- boundary is encountered. /Subject to fusion./
takeWord :: Text -> Text
takeWord :: Text -> Text
takeWord = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.takeWord

-- | O(n) Return the suffix after dropping the first word. If no word
-- boundary is encountered, the result will be empty. /Subject to fusion./
dropWord :: Text -> Text
dropWord :: Text -> Text
dropWord = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.dropWord

-- | Break a piece of text after the first word boundary is encountered.
--
-- >>> breakWord "PascalCasedVariable"
-- ("Pacal", "CasedVariable")
--
-- >>> breakWord "spinal-cased-variable"
-- ("spinal", "cased-variable")
breakWord :: Text -> (Text, Text)
breakWord :: Text -> (Text, Text)
breakWord Text
x = (Text -> Text
takeWord Text
x, Text -> Text
dropWord Text
x)

-- | O(n) Return the suffix after removing the first word, or 'Nothing'
-- if no word boundary is encountered.
--
-- >>> stripWord "HTML5Spaghetti"
-- Just "Spaghetti"
--
-- >>> stripWord "noboundaries"
-- Nothing
stripWord :: Text -> Maybe Text
stripWord :: Text -> Maybe Text
stripWord Text
x
  | Text -> Int
Text.length Text
y Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Text -> Int
Text.length Text
x = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
y
  | Bool
otherwise = Maybe Text
forall a. Maybe a
Nothing
  where
    y :: Text
y = Text -> Text
dropWord Text
x

-- | O(n) Split into a list of words delimited by boundaries.
--
-- >>> splitWords "SupercaliFrag_ilistic"
-- ["Supercali","Frag","ilistic"]
splitWords :: Text -> [Text]
splitWords :: Text -> [Text]
splitWords = Text -> [Text]
go
  where
    go :: Text -> [Text]
go Text
x = case Text -> (Text, Text)
breakWord Text
x of
      (Text
h, Text
t)
        | Text -> Bool
Text.null Text
h -> Text -> [Text]
go Text
t
        | Text -> Bool
Text.null Text
t -> [Text
h]
        | Bool
otherwise -> Text
h Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Text -> [Text]
go Text
t

-- | O(n) Create an adhoc acronym from a piece of cased text.
--
-- >>> toAcronym "AmazonWebServices"
-- Just "AWS"
--
-- >>> toAcronym "Learn-You Some_Haskell"
-- Just "LYSH"
--
-- >>> toAcronym "this_is_all_lowercase"
-- Nothing
toAcronym :: Text -> Maybe Text
toAcronym :: Text -> Maybe Text
toAcronym ((Char -> Bool) -> Text -> Text
Text.filter Char -> Bool
Char.isUpper -> Text
x)
  | Text -> Int
Text.length Text
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
x
  | Bool
otherwise = Maybe Text
forall a. Maybe a
Nothing

-- | Render an ordinal used to denote the position in an ordered sequence.
--
-- >>> toOrdinal (101 :: Int)
-- "101st"
--
-- >>> toOrdinal (12 :: Int)
-- "12th"
toOrdinal :: Integral a => a -> Text
toOrdinal :: a -> Text
toOrdinal = Text -> Text
LText.toStrict (Text -> Text) -> (a -> Text) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Text
forall a. Integral a => a -> Text
LMan.toOrdinal

-- | O(n) Convert casing to @Title Cased Phrase@. /Subject to fusion./
toTitle :: Text -> Text
toTitle :: Text -> Text
toTitle = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.toTitle

-- | O(n) Convert casing to @camelCasedPhrase@. /Subject to fusion./
toCamel :: Text -> Text
toCamel :: Text -> Text
toCamel = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.toCamel

-- | O(n) Convert casing to @PascalCasePhrase@. /Subject to fusion./
toPascal :: Text -> Text
toPascal :: Text -> Text
toPascal = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.toPascal

-- | O(n) Convert casing to @snake_cased_phrase@. /Subject to fusion./
toSnake :: Text -> Text
toSnake :: Text -> Text
toSnake = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.toSnake

-- | O(n) Convert casing to @spinal-cased-phrase@. /Subject to fusion./
toSpinal :: Text -> Text
toSpinal :: Text -> Text
toSpinal = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.toSpinal

-- | O(n) Convert casing to @Train-Cased-Phrase@. /Subject to fusion./
toTrain :: Text -> Text
toTrain :: Text -> Text
toTrain = (Stream Char -> Stream Char) -> Text -> Text
strict Stream Char -> Stream Char
Fusion.toTrain