{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
module Brassica.SoundChange.Tokenise
(
Component(..)
, getWords
, splitMultipleResults
, tokeniseWord
, tokeniseWords
, detokeniseWords'
, detokeniseWords
, concatWithBoundary
, findFirstCategoriesDecl
, withFirstCategoriesDecl
, wordParser
, componentsParser
, sortByDescendingLength
) where
import Data.Char (isSpace)
import Data.Function (on)
import Data.Functor.Identity
import Data.List (intersperse, sortBy)
import Data.Maybe (mapMaybe)
import Data.Ord (Down(..))
import Data.Void (Void)
import GHC.Generics (Generic)
import Control.DeepSeq (NFData)
import Text.Megaparsec hiding (State)
import Text.Megaparsec.Char
import Brassica.SoundChange.Types
data Component a = Word a | Separator String | Gloss String
deriving (Component a -> Component a -> Bool
forall a. Eq a => Component a -> Component a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Component a -> Component a -> Bool
$c/= :: forall a. Eq a => Component a -> Component a -> Bool
== :: Component a -> Component a -> Bool
$c== :: forall a. Eq a => Component a -> Component a -> Bool
Eq, Int -> Component a -> ShowS
forall a. Show a => Int -> Component a -> ShowS
forall a. Show a => [Component a] -> ShowS
forall a. Show a => Component a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Component a] -> ShowS
$cshowList :: forall a. Show a => [Component a] -> ShowS
show :: Component a -> String
$cshow :: forall a. Show a => Component a -> String
showsPrec :: Int -> Component a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Component a -> ShowS
Show, forall a b. a -> Component b -> Component a
forall a b. (a -> b) -> Component a -> Component b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> Component b -> Component a
$c<$ :: forall a b. a -> Component b -> Component a
fmap :: forall a b. (a -> b) -> Component a -> Component b
$cfmap :: forall a b. (a -> b) -> Component a -> Component b
Functor, forall a. Eq a => a -> Component a -> Bool
forall a. Num a => Component a -> a
forall a. Ord a => Component a -> a
forall m. Monoid m => Component m -> m
forall a. Component a -> Bool
forall a. Component a -> Int
forall a. Component a -> [a]
forall a. (a -> a -> a) -> Component a -> a
forall m a. Monoid m => (a -> m) -> Component a -> m
forall b a. (b -> a -> b) -> b -> Component a -> b
forall a b. (a -> b -> b) -> b -> Component a -> b
forall (t :: * -> *).
(forall m. Monoid m => t m -> m)
-> (forall m a. Monoid m => (a -> m) -> t a -> m)
-> (forall m a. Monoid m => (a -> m) -> t a -> m)
-> (forall a b. (a -> b -> b) -> b -> t a -> b)
-> (forall a b. (a -> b -> b) -> b -> t a -> b)
-> (forall b a. (b -> a -> b) -> b -> t a -> b)
-> (forall b a. (b -> a -> b) -> b -> t a -> b)
-> (forall a. (a -> a -> a) -> t a -> a)
-> (forall a. (a -> a -> a) -> t a -> a)
-> (forall a. t a -> [a])
-> (forall a. t a -> Bool)
-> (forall a. t a -> Int)
-> (forall a. Eq a => a -> t a -> Bool)
-> (forall a. Ord a => t a -> a)
-> (forall a. Ord a => t a -> a)
-> (forall a. Num a => t a -> a)
-> (forall a. Num a => t a -> a)
-> Foldable t
product :: forall a. Num a => Component a -> a
$cproduct :: forall a. Num a => Component a -> a
sum :: forall a. Num a => Component a -> a
$csum :: forall a. Num a => Component a -> a
minimum :: forall a. Ord a => Component a -> a
$cminimum :: forall a. Ord a => Component a -> a
maximum :: forall a. Ord a => Component a -> a
$cmaximum :: forall a. Ord a => Component a -> a
elem :: forall a. Eq a => a -> Component a -> Bool
$celem :: forall a. Eq a => a -> Component a -> Bool
length :: forall a. Component a -> Int
$clength :: forall a. Component a -> Int
null :: forall a. Component a -> Bool
$cnull :: forall a. Component a -> Bool
toList :: forall a. Component a -> [a]
$ctoList :: forall a. Component a -> [a]
foldl1 :: forall a. (a -> a -> a) -> Component a -> a
$cfoldl1 :: forall a. (a -> a -> a) -> Component a -> a
foldr1 :: forall a. (a -> a -> a) -> Component a -> a
$cfoldr1 :: forall a. (a -> a -> a) -> Component a -> a
foldl' :: forall b a. (b -> a -> b) -> b -> Component a -> b
$cfoldl' :: forall b a. (b -> a -> b) -> b -> Component a -> b
foldl :: forall b a. (b -> a -> b) -> b -> Component a -> b
$cfoldl :: forall b a. (b -> a -> b) -> b -> Component a -> b
foldr' :: forall a b. (a -> b -> b) -> b -> Component a -> b
$cfoldr' :: forall a b. (a -> b -> b) -> b -> Component a -> b
foldr :: forall a b. (a -> b -> b) -> b -> Component a -> b
$cfoldr :: forall a b. (a -> b -> b) -> b -> Component a -> b
foldMap' :: forall m a. Monoid m => (a -> m) -> Component a -> m
$cfoldMap' :: forall m a. Monoid m => (a -> m) -> Component a -> m
foldMap :: forall m a. Monoid m => (a -> m) -> Component a -> m
$cfoldMap :: forall m a. Monoid m => (a -> m) -> Component a -> m
fold :: forall m. Monoid m => Component m -> m
$cfold :: forall m. Monoid m => Component m -> m
Foldable, Functor Component
Foldable Component
forall (t :: * -> *).
Functor t
-> Foldable t
-> (forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> t a -> f (t b))
-> (forall (f :: * -> *) a. Applicative f => t (f a) -> f (t a))
-> (forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> t a -> m (t b))
-> (forall (m :: * -> *) a. Monad m => t (m a) -> m (t a))
-> Traversable t
forall (m :: * -> *) a.
Monad m =>
Component (m a) -> m (Component a)
forall (f :: * -> *) a.
Applicative f =>
Component (f a) -> f (Component a)
forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> Component a -> m (Component b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Component a -> f (Component b)
sequence :: forall (m :: * -> *) a.
Monad m =>
Component (m a) -> m (Component a)
$csequence :: forall (m :: * -> *) a.
Monad m =>
Component (m a) -> m (Component a)
mapM :: forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> Component a -> m (Component b)
$cmapM :: forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> Component a -> m (Component b)
sequenceA :: forall (f :: * -> *) a.
Applicative f =>
Component (f a) -> f (Component a)
$csequenceA :: forall (f :: * -> *) a.
Applicative f =>
Component (f a) -> f (Component a)
traverse :: forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Component a -> f (Component b)
$ctraverse :: forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Component a -> f (Component b)
Traversable, forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall a x. Rep (Component a) x -> Component a
forall a x. Component a -> Rep (Component a) x
$cto :: forall a x. Rep (Component a) x -> Component a
$cfrom :: forall a x. Component a -> Rep (Component a) x
Generic, forall a. NFData a => Component a -> ()
forall a. (a -> ()) -> NFData a
rnf :: Component a -> ()
$crnf :: forall a. NFData a => Component a -> ()
NFData)
getWords :: [Component a] -> [a]
getWords :: forall a. [Component a] -> [a]
getWords = forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe forall a b. (a -> b) -> a -> b
$ \case
Word a
a -> forall a. a -> Maybe a
Just a
a
Component a
_ -> forall a. Maybe a
Nothing
splitMultipleResults :: String -> Component [a] -> [Component a]
splitMultipleResults :: forall a. String -> Component [a] -> [Component a]
splitMultipleResults String
wh (Word [a]
as) = forall a. a -> [a] -> [a]
intersperse (forall a. String -> Component a
Separator String
wh) forall a b. (a -> b) -> a -> b
$ forall a. a -> Component a
Word forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a]
as
splitMultipleResults String
_ (Separator String
w) = [forall a. String -> Component a
Separator String
w]
splitMultipleResults String
_ (Gloss String
g) = [forall a. String -> Component a
Gloss String
g]
wordParser :: [Char] -> [String] -> ParsecT Void String Identity PWord
wordParser :: String -> [String] -> ParsecT Void String Identity PWord
wordParser String
excludes [String]
gs = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some forall a b. (a -> b) -> a -> b
$
(Grapheme
GBoundary forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
'#')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
choice (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Grapheme
GMulti forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
gs)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (String -> Grapheme
GMulti forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a. Applicative f => a -> f a
pure forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
exclude))
where
exclude :: Char -> Bool
exclude = Bool -> Bool -> Bool
(||) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Bool
isSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
excludes)
componentsParser
:: ParsecT Void String Identity a
-> ParsecT Void String Identity [Component a]
componentsParser :: forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity [Component a]
componentsParser ParsecT Void String Identity a
p = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$
(forall a. String -> Component a
Separator forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing Char -> Bool
isSpace) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(forall a. String -> Component a
Gloss forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall {s} {e} {m :: * -> *}.
(Tokens s ~ String, Token s ~ Char, MonadParsec e s m) =>
Bool -> m String
gloss Bool
False) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(forall a. a -> Component a
Word forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void String Identity a
p)
where
gloss :: Bool -> m String
gloss Bool
returnBracketed = do
Token s
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'['
[String]
contents <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ Bool -> m String
gloss Bool
True forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"[]"))
Token s
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
']'
forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ if Bool
returnBracketed
then Char
'[' forall a. a -> [a] -> [a]
: forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
contents forall a. [a] -> [a] -> [a]
++ String
"]"
else forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
contents
sortByDescendingLength :: [[a]] -> [[a]]
sortByDescendingLength :: forall a. [[a]] -> [[a]]
sortByDescendingLength = forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` forall a. a -> Down a
Down forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Int
length)
tokeniseWord :: [String] -> String -> Either (ParseErrorBundle String Void) PWord
tokeniseWord :: [String] -> String -> Either (ParseErrorBundle String Void) PWord
tokeniseWord (forall a. [[a]] -> [[a]]
sortByDescendingLength -> [String]
gs) = forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
parse (String -> [String] -> ParsecT Void String Identity PWord
wordParser String
"[" [String]
gs) String
""
tokeniseWords :: [String] -> String -> Either (ParseErrorBundle String Void) [Component PWord]
tokeniseWords :: [String]
-> String
-> Either (ParseErrorBundle String Void) [Component PWord]
tokeniseWords (forall a. [[a]] -> [[a]]
sortByDescendingLength -> [String]
gs) =
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
parse (forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity [Component a]
componentsParser forall a b. (a -> b) -> a -> b
$ String -> [String] -> ParsecT Void String Identity PWord
wordParser String
"[" [String]
gs) String
""
detokeniseWords' :: (a -> String) -> [Component a] -> String
detokeniseWords' :: forall a. (a -> String) -> [Component a] -> String
detokeniseWords' a -> String
f = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall a b. (a -> b) -> a -> b
$ \case
Word a
gs -> a -> String
f a
gs
Separator String
w -> String
w
Gloss String
g -> Char
'['forall a. a -> [a] -> [a]
:(String
g forall a. [a] -> [a] -> [a]
++ String
"]")
detokeniseWords :: [Component PWord] -> String
detokeniseWords :: [Component PWord] -> String
detokeniseWords = forall a. (a -> String) -> [Component a] -> String
detokeniseWords' PWord -> String
concatWithBoundary
findFirstCategoriesDecl :: SoundChanges -> [String]
findFirstCategoriesDecl :: SoundChanges -> [String]
findFirstCategoriesDecl (CategoriesDeclS (CategoriesDecl PWord
gs):SoundChanges
_) =
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe
(\case Grapheme
GBoundary -> forall a. Maybe a
Nothing; GMulti String
m -> forall a. a -> Maybe a
Just String
m)
PWord
gs
findFirstCategoriesDecl (Statement
_:SoundChanges
ss) = SoundChanges -> [String]
findFirstCategoriesDecl SoundChanges
ss
findFirstCategoriesDecl [] = []
withFirstCategoriesDecl :: ([String] -> t) -> SoundChanges -> t
withFirstCategoriesDecl :: forall t. ([String] -> t) -> SoundChanges -> t
withFirstCategoriesDecl [String] -> t
tok SoundChanges
ss = [String] -> t
tok (SoundChanges -> [String]
findFirstCategoriesDecl SoundChanges
ss)