{-# LANGUAGE OverloadedStrings #-}

{-|
Module      : Text.Slugify
Description : The module to convert text into /slug/s. A slug is a hyphen separated string of words.
Maintainer  : hapytexeu+gh@gmail.com
Stability   : experimental
Portability : POSIX

The module to convert text into /slug/s. A slug is a hyphen separated string of words. Slugs are often used
to make visually pleasant URLs by transforming for example the title of an article into a slug, and use this
in the URL, for more information see the <https://en.wikipedia.org/wiki/Clean_URL#Slug Wikipedia section>.
-}

module Text.Slugify (
  -- * Slug modes
    SlugMode(SlugAscii, SlugUnicode)
  -- * Character tests
  , isWordSeparator, isWordChar, isRetainChar
  -- * Slug algorithms
  , slugify, slugifyUnicode, slugifyWith
  ) where

import Data.Char(GeneralCategory(LowercaseLetter, ModifierLetter, OtherLetter, UppercaseLetter, TitlecaseLetter, DecimalNumber, LetterNumber, OtherNumber, LineSeparator, ParagraphSeparator, Space), isAscii, generalCategory)
import Data.Text(Text, dropAround, intercalate, split, toLower)
import qualified Data.Text as T
import Data.Text.Normalize(NormalizationMode(NFKC, NFKD), normalize)

-- | The given mode to slugify a 'Text' object.
data SlugMode
  = SlugAscii  -- ^ Slugify by removing diacritics and only retain ASCII characters.
  | SlugUnicode  -- ^ Slugify by allowing unicode characters.
  deriving (SlugMode
SlugMode -> SlugMode -> Bounded SlugMode
forall a. a -> a -> Bounded a
maxBound :: SlugMode
$cmaxBound :: SlugMode
minBound :: SlugMode
$cminBound :: SlugMode
Bounded, Int -> SlugMode
SlugMode -> Int
SlugMode -> [SlugMode]
SlugMode -> SlugMode
SlugMode -> SlugMode -> [SlugMode]
SlugMode -> SlugMode -> SlugMode -> [SlugMode]
(SlugMode -> SlugMode)
-> (SlugMode -> SlugMode)
-> (Int -> SlugMode)
-> (SlugMode -> Int)
-> (SlugMode -> [SlugMode])
-> (SlugMode -> SlugMode -> [SlugMode])
-> (SlugMode -> SlugMode -> [SlugMode])
-> (SlugMode -> SlugMode -> SlugMode -> [SlugMode])
-> Enum SlugMode
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: SlugMode -> SlugMode -> SlugMode -> [SlugMode]
$cenumFromThenTo :: SlugMode -> SlugMode -> SlugMode -> [SlugMode]
enumFromTo :: SlugMode -> SlugMode -> [SlugMode]
$cenumFromTo :: SlugMode -> SlugMode -> [SlugMode]
enumFromThen :: SlugMode -> SlugMode -> [SlugMode]
$cenumFromThen :: SlugMode -> SlugMode -> [SlugMode]
enumFrom :: SlugMode -> [SlugMode]
$cenumFrom :: SlugMode -> [SlugMode]
fromEnum :: SlugMode -> Int
$cfromEnum :: SlugMode -> Int
toEnum :: Int -> SlugMode
$ctoEnum :: Int -> SlugMode
pred :: SlugMode -> SlugMode
$cpred :: SlugMode -> SlugMode
succ :: SlugMode -> SlugMode
$csucc :: SlugMode -> SlugMode
Enum, SlugMode -> SlugMode -> Bool
(SlugMode -> SlugMode -> Bool)
-> (SlugMode -> SlugMode -> Bool) -> Eq SlugMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SlugMode -> SlugMode -> Bool
$c/= :: SlugMode -> SlugMode -> Bool
== :: SlugMode -> SlugMode -> Bool
$c== :: SlugMode -> SlugMode -> Bool
Eq, Eq SlugMode
Eq SlugMode
-> (SlugMode -> SlugMode -> Ordering)
-> (SlugMode -> SlugMode -> Bool)
-> (SlugMode -> SlugMode -> Bool)
-> (SlugMode -> SlugMode -> Bool)
-> (SlugMode -> SlugMode -> Bool)
-> (SlugMode -> SlugMode -> SlugMode)
-> (SlugMode -> SlugMode -> SlugMode)
-> Ord SlugMode
SlugMode -> SlugMode -> Bool
SlugMode -> SlugMode -> Ordering
SlugMode -> SlugMode -> SlugMode
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: SlugMode -> SlugMode -> SlugMode
$cmin :: SlugMode -> SlugMode -> SlugMode
max :: SlugMode -> SlugMode -> SlugMode
$cmax :: SlugMode -> SlugMode -> SlugMode
>= :: SlugMode -> SlugMode -> Bool
$c>= :: SlugMode -> SlugMode -> Bool
> :: SlugMode -> SlugMode -> Bool
$c> :: SlugMode -> SlugMode -> Bool
<= :: SlugMode -> SlugMode -> Bool
$c<= :: SlugMode -> SlugMode -> Bool
< :: SlugMode -> SlugMode -> Bool
$c< :: SlugMode -> SlugMode -> Bool
compare :: SlugMode -> SlugMode -> Ordering
$ccompare :: SlugMode -> SlugMode -> Ordering
$cp1Ord :: Eq SlugMode
Ord, ReadPrec [SlugMode]
ReadPrec SlugMode
Int -> ReadS SlugMode
ReadS [SlugMode]
(Int -> ReadS SlugMode)
-> ReadS [SlugMode]
-> ReadPrec SlugMode
-> ReadPrec [SlugMode]
-> Read SlugMode
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [SlugMode]
$creadListPrec :: ReadPrec [SlugMode]
readPrec :: ReadPrec SlugMode
$creadPrec :: ReadPrec SlugMode
readList :: ReadS [SlugMode]
$creadList :: ReadS [SlugMode]
readsPrec :: Int -> ReadS SlugMode
$creadsPrec :: Int -> ReadS SlugMode
Read, Int -> SlugMode -> ShowS
[SlugMode] -> ShowS
SlugMode -> String
(Int -> SlugMode -> ShowS)
-> (SlugMode -> String) -> ([SlugMode] -> ShowS) -> Show SlugMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SlugMode] -> ShowS
$cshowList :: [SlugMode] -> ShowS
show :: SlugMode -> String
$cshow :: SlugMode -> String
showsPrec :: Int -> SlugMode -> ShowS
$cshowsPrec :: Int -> SlugMode -> ShowS
Show)

-- | Check if the given 'Char'acter is a word separator, for the given slugify
-- algorithm.
isWordSeparator
  :: Char  -- ^ The given 'Char'acter to check.
  -> Bool  -- ^ 'True' if the given character is a separator; 'False' otherwise.
isWordSeparator :: Char -> Bool
isWordSeparator Char
'-' = Bool
True
isWordSeparator Char
'\x0b' = Bool
True
isWordSeparator Char
'\x0c' = Bool
True
isWordSeparator Char
'\x1c' = Bool
True
isWordSeparator Char
'\x85' = Bool
True
isWordSeparator Char
'\t' = Bool
True
isWordSeparator Char
'\r' = Bool
True
isWordSeparator Char
'\x1d' = Bool
True
isWordSeparator Char
'\x1f' = Bool
True
isWordSeparator Char
'\x1e' = Bool
True
isWordSeparator Char
'\n' = Bool
True
isWordSeparator Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
LineSeparator -> Bool
True         -- Zl
    GeneralCategory
ParagraphSeparator -> Bool
True    -- Zp
    GeneralCategory
Space -> Bool
True                 -- Zs
    GeneralCategory
_ -> Bool
False

-- | Check if the given 'Char'acter is considered a word character for the
-- slugify algorithm.
isWordChar
  :: Char  -- ^ The given 'Char'acter to check.
  -> Bool  -- ^ 'True' if it is a word character; 'False' otherwise.
isWordChar :: Char -> Bool
isWordChar Char
'_' = Bool
True
isWordChar Char
'\125259' = Bool
True
isWordChar Char
'\72162' = Bool
False
isWordChar Char
'\123215' = Bool
False
isWordChar Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
LowercaseLetter -> Bool
True       -- Ll
    GeneralCategory
ModifierLetter -> Bool
True        -- Lm
    GeneralCategory
OtherLetter -> Bool
True           -- Lo
    GeneralCategory
UppercaseLetter -> Bool
True       -- Lu
    GeneralCategory
TitlecaseLetter -> Bool
True       -- Lt
    GeneralCategory
DecimalNumber -> Bool
True         -- Nd
    GeneralCategory
LetterNumber -> Bool
True          -- Nl
    GeneralCategory
OtherNumber -> Bool
True           -- No
    GeneralCategory
_ -> Bool
False

_postDrop :: Char -> Bool
_postDrop :: Char -> Bool
_postDrop Char
'_' = Bool
True
_postDrop Char
'-' = Bool
True
_postDrop Char
_ = Bool
False

-- | Check if the given character is retained by the slugify algorithm.
isRetainChar
  :: Char  -- ^ The given 'Char'acter to check.
  -> Bool  -- ^ True if the given 'Char'acter will be retained; 'False' otherwise. Some of these characters will however be converted to a hyphen (@-@), and thus eventually will only produce a different character in the slug.
isRetainChar :: Char -> Bool
isRetainChar Char
c = Char -> Bool
isWordChar Char
c Bool -> Bool -> Bool
|| Char -> Bool
isWordSeparator Char
c

_slugify' :: Text -> Text
_slugify' :: Text -> Text
_slugify' = (Char -> Bool) -> Text -> Text
dropAround Char -> Bool
_postDrop (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text] -> Text
intercalate Text
"-" ([Text] -> Text) -> (Text -> [Text]) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> [Text]
split Char -> Bool
isWordSeparator (Text -> [Text]) -> (Text -> Text) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter Char -> Bool
isRetainChar (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
toLower

-- | Slugify the given 'Text' object and retain Unicode characters.
slugifyUnicode
  :: Text  -- ^ The given text to convert to a slug.
  -> Text  -- ^ The corresponding /slug/.
slugifyUnicode :: Text -> Text
slugifyUnicode = Text -> Text
_slugify' (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NormalizationMode -> Text -> Text
normalize NormalizationMode
NFKC

-- | Slugify the given 'Text' object and remove diacritics and convert
-- characters to the corresponding ASCII equivalent.
slugify
  :: Text  -- ^ The given text to convert to a slug.
  -> Text  -- ^ The corresponding /slug/.
slugify :: Text -> Text
slugify = Text -> Text
_slugify' (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter Char -> Bool
isAscii (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NormalizationMode -> Text -> Text
normalize NormalizationMode
NFKD

-- | Slugify the given 'Text' with the given 'SlugMode'.
slugifyWith
  :: SlugMode  -- ^ The given mode to slugify.
  -> Text  -- ^ The given text to convert to a slug.
  -> Text  -- ^ The corresponding /slug/.
slugifyWith :: SlugMode -> Text -> Text
slugifyWith SlugMode
SlugAscii = Text -> Text
slugify
slugifyWith SlugMode
SlugUnicode = Text -> Text
slugifyUnicode