{-# LANGUAGE ConstraintKinds       #-}
{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE DeriveAnyClass        #-}
{-# LANGUAGE DeriveGeneric         #-}
{-# LANGUAGE DerivingVia           #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE GADTs                 #-}
{-# LANGUAGE LambdaCase            #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE PolyKinds             #-}
{-# LANGUAGE PatternSynonyms       #-}
{-# LANGUAGE RankNTypes            #-}
{-# LANGUAGE StandaloneDeriving    #-}
{-# LANGUAGE TypeFamilies          #-}
{-# LANGUAGE TypeOperators         #-}
{-# LANGUAGE UndecidableInstances  #-}

module Brassica.SoundChange.Types
       (
       -- * Words and graphemes
         Grapheme(..)
       , PWord
       , addBoundaries
       , removeBoundaries
       , concatWithBoundary
       -- * Lexemes
       , Lexeme(..)
       , pattern Boundary
       , LexemeType(..)
       -- * Rules
       , Rule(..)
       , Environment
       , Direction(..)
       , Flags(..)
       , defFlags
       -- * Categories and statements
       , CategoriesDecl(..)
       , Statement(..)
       , plaintext'
       , SoundChanges
       -- * Utility
       , OneOf
       ) where

import Control.DeepSeq (NFData(..))
import Data.Kind (Constraint)
import GHC.Generics (Generic)
import GHC.OldList (dropWhileEnd)
import GHC.TypeLits

-- | The constraint @OneOf a x y@ is satisfied if @a ~ x@ or @a ~ y@.
--
-- (Note: the strange @() ~ Bool@ constraint is just a simple
-- unsatisfiable constraint, so as to not give ‘non-exhaustive pattern
-- match’ errors everywhere.)
type family OneOf a x y :: Constraint where
    OneOf a a y = ()
    OneOf a x a = ()
    OneOf a b c =
        ( () ~ Bool
        , TypeError ('Text "Couldn't match type "
                     ':<>: 'ShowType a
                     ':<>: 'Text " with "
                     ':<>: 'ShowType b
                     ':<>: 'Text " or "
                     ':<>: 'ShowType c))

-- | The type of graphemes within a word.
data Grapheme
    = GMulti [Char]  -- ^ A multigraph: for instance @GMulti "a", GMulti "ch", GMulti "c̓" :: t'Grapheme'@.
    | GBoundary      -- ^ A non-letter element representing a word boundary which sound changes can manipulate
    deriving (Grapheme -> Grapheme -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Grapheme -> Grapheme -> Bool
$c/= :: Grapheme -> Grapheme -> Bool
== :: Grapheme -> Grapheme -> Bool
$c== :: Grapheme -> Grapheme -> Bool
Eq, Eq Grapheme
Grapheme -> Grapheme -> Bool
Grapheme -> Grapheme -> Ordering
Grapheme -> Grapheme -> Grapheme
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Grapheme -> Grapheme -> Grapheme
$cmin :: Grapheme -> Grapheme -> Grapheme
max :: Grapheme -> Grapheme -> Grapheme
$cmax :: Grapheme -> Grapheme -> Grapheme
>= :: Grapheme -> Grapheme -> Bool
$c>= :: Grapheme -> Grapheme -> Bool
> :: Grapheme -> Grapheme -> Bool
$c> :: Grapheme -> Grapheme -> Bool
<= :: Grapheme -> Grapheme -> Bool
$c<= :: Grapheme -> Grapheme -> Bool
< :: Grapheme -> Grapheme -> Bool
$c< :: Grapheme -> Grapheme -> Bool
compare :: Grapheme -> Grapheme -> Ordering
$ccompare :: Grapheme -> Grapheme -> Ordering
Ord, Int -> Grapheme -> ShowS
[Grapheme] -> ShowS
Grapheme -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Grapheme] -> ShowS
$cshowList :: [Grapheme] -> ShowS
show :: Grapheme -> String
$cshow :: Grapheme -> String
showsPrec :: Int -> Grapheme -> ShowS
$cshowsPrec :: Int -> Grapheme -> ShowS
Show, forall x. Rep Grapheme x -> Grapheme
forall x. Grapheme -> Rep Grapheme x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Grapheme x -> Grapheme
$cfrom :: forall x. Grapheme -> Rep Grapheme x
Generic, Grapheme -> ()
forall a. (a -> ()) -> NFData a
rnf :: Grapheme -> ()
$crnf :: Grapheme -> ()
NFData)

-- | A word (or a subsequence of one) can be viewed as a list of
-- @Grapheme@s: e.g. Portuguese "filha" becomes
-- @["f", "i", "lh", "a"] :: 'PWord'@.
--
-- (The name 'PWord' is from ‘phonological word’, these being what a
-- SCA typically manipulates; this name was chosen to avoid a clash
-- with @Prelude.'Prelude.Word'@.)
type PWord = [Grapheme]

-- Add a 'GBoundary' at the beginning and end of the 'PWord'.
addBoundaries :: PWord -> PWord
addBoundaries :: [Grapheme] -> [Grapheme]
addBoundaries [Grapheme]
w = Grapheme
GBoundary forall a. a -> [a] -> [a]
: [Grapheme]
w forall a. [a] -> [a] -> [a]
++ [Grapheme
GBoundary]

-- Remove 'GBoundary's from the beginning and end of the 'PWord'.
removeBoundaries :: PWord -> PWord
removeBoundaries :: [Grapheme] -> [Grapheme]
removeBoundaries = forall a. (a -> Bool) -> [a] -> [a]
dropWhile (forall a. Eq a => a -> a -> Bool
==Grapheme
GBoundary) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhileEnd (forall a. Eq a => a -> a -> Bool
==Grapheme
GBoundary)

-- | Render a 'PWord' as a 'String'. Very much like 'concat', but
-- treating 'GBoundary's specially. Word-external boundaries are
-- deleted, while word-internal boundaries are converted to @"#"@.
concatWithBoundary :: PWord -> String
concatWithBoundary :: [Grapheme] -> String
concatWithBoundary = [Grapheme] -> String
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Grapheme] -> [Grapheme]
removeBoundaries
  where
    go :: [Grapheme] -> String
go = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall a b. (a -> b) -> a -> b
$ \case
        GMulti String
g -> String
g
        Grapheme
GBoundary -> String
"#"

-- | The part of a 'Rule' in which a 'Lexeme' may occur: either the
-- target, the replacement or the environment.
data LexemeType = Target | Replacement | Env

-- | A 'Lexeme' is the smallest part of a sound change. Both matches
-- and replacements are made up of 'Lexeme's: the phantom type
-- variable specifies where each different variety of 'Lexeme' may
-- occur.
data Lexeme (a :: LexemeType) where
    -- | In Brassica sound-change syntax, one or more letters without intervening whitespace,
    -- or a word boundary specified as @#@
    Grapheme :: Grapheme -> Lexeme a
    -- | In Brassica sound-change syntax, delimited by square brackets
    Category :: [Grapheme] -> Lexeme a
    -- | In Brassica sound-change syntax, delimited by parentheses
    Optional :: [Lexeme a] -> Lexeme a
    -- | In Brassica sound-change syntax, specified as @\@
    Metathesis :: Lexeme 'Replacement
    -- | In Brassica sound-change syntax, specified as @>@
    Geminate :: Lexeme a
    -- | In Brassica sound-change syntax, specified as @^@ before another 'Lexeme'
    Wildcard :: OneOf a 'Target 'Env => Lexeme a -> Lexeme a
    -- | In Brassica sound-change syntax, specified as @*@ after another 'Lexeme'
    Kleene   :: OneOf a 'Target 'Env => Lexeme a -> Lexeme a
    -- | In Brassica sound-change syntax, specified as @~@
    Discard  :: Lexeme 'Replacement
    -- | In Brassica sound-change syntax, specified as \@i before a category
    Backreference :: OneOf a 'Target 'Replacement => Int -> [Grapheme] -> Lexeme a
    -- | In Brassica sound-change syntax, specified as \@? before a category
    Multiple :: [Grapheme] -> Lexeme 'Replacement

-- | A 'Lexeme' matching a single word boundary, specified as @#@ in Brassica syntax.
pattern Boundary :: Lexeme a
pattern $bBoundary :: forall (a :: LexemeType). Lexeme a
$mBoundary :: forall {r} {a :: LexemeType}.
Lexeme a -> ((# #) -> r) -> ((# #) -> r) -> r
Boundary = Grapheme GBoundary

deriving instance Show (Lexeme a)

instance NFData (Lexeme a) where
    rnf :: Lexeme a -> ()
rnf (Grapheme Grapheme
g) = forall a. NFData a => a -> ()
rnf Grapheme
g
    rnf (Category [Grapheme]
cs) = forall a. NFData a => a -> ()
rnf [Grapheme]
cs
    rnf (Optional [Lexeme a]
ls) = forall a. NFData a => a -> ()
rnf [Lexeme a]
ls
    rnf Lexeme a
Metathesis = ()
    rnf Lexeme a
Geminate = ()
    rnf (Wildcard Lexeme a
l) = forall a. NFData a => a -> ()
rnf Lexeme a
l
    rnf (Kleene Lexeme a
l) = forall a. NFData a => a -> ()
rnf Lexeme a
l
    rnf Lexeme a
Discard = ()
    rnf (Backreference Int
i [Grapheme]
l) = seq :: forall a b. a -> b -> b
seq Int
i forall a b. (a -> b) -> a -> b
$ forall a. NFData a => a -> ()
rnf [Grapheme]
l
    rnf (Multiple [Grapheme]
l) = forall a. NFData a => a -> ()
rnf [Grapheme]
l

-- | An 'Environment' is a tuple of @(before, after)@ components,
-- corresponding to a ‘/ before _ after’ component of a sound change.
--
-- Note that an empty environment is just @([], [])@.
type Environment = ([Lexeme 'Env], [Lexeme 'Env])

-- | Specifies application direction of rule — either left-to-right or right-to-left.
data Direction = LTR | RTL
    deriving (Direction -> Direction -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Direction -> Direction -> Bool
$c/= :: Direction -> Direction -> Bool
== :: Direction -> Direction -> Bool
$c== :: Direction -> Direction -> Bool
Eq, Int -> Direction -> ShowS
[Direction] -> ShowS
Direction -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Direction] -> ShowS
$cshowList :: [Direction] -> ShowS
show :: Direction -> String
$cshow :: Direction -> String
showsPrec :: Int -> Direction -> ShowS
$cshowsPrec :: Int -> Direction -> ShowS
Show, forall x. Rep Direction x -> Direction
forall x. Direction -> Rep Direction x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Direction x -> Direction
$cfrom :: forall x. Direction -> Rep Direction x
Generic, Direction -> ()
forall a. (a -> ()) -> NFData a
rnf :: Direction -> ()
$crnf :: Direction -> ()
NFData)

-- | Flags which can be enabled, disabled or altered on a 'Rule' to
-- change how it is applied.
data Flags = Flags
  { Flags -> Bool
highlightChanges :: Bool
  , Flags -> Direction
applyDirection   :: Direction
  , Flags -> Bool
applyOnceOnly    :: Bool
  , Flags -> Bool
sporadic         :: Bool
  } deriving (Int -> Flags -> ShowS
[Flags] -> ShowS
Flags -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Flags] -> ShowS
$cshowList :: [Flags] -> ShowS
show :: Flags -> String
$cshow :: Flags -> String
showsPrec :: Int -> Flags -> ShowS
$cshowsPrec :: Int -> Flags -> ShowS
Show, forall x. Rep Flags x -> Flags
forall x. Flags -> Rep Flags x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Flags x -> Flags
$cfrom :: forall x. Flags -> Rep Flags x
Generic, Flags -> ()
forall a. (a -> ()) -> NFData a
rnf :: Flags -> ()
$crnf :: Flags -> ()
NFData)

-- | A default selection of flags which are appropriate for most
-- rules:
--
-- @
-- 'defFlags' = 'Flags'
--     { 'highlightChanges' = 'True'
--     , 'applyDirection' = 'LTR'
--     , 'applyOnceOnly' = 'False'
--     , 'sporadic' = 'False'
--     }
-- @
--
-- That is: highlight changes, apply from left to right, apply
-- repeatedly, and don’t apply sporadically.
defFlags :: Flags
defFlags :: Flags
defFlags = Flags
    { highlightChanges :: Bool
highlightChanges = Bool
True
    , applyDirection :: Direction
applyDirection = Direction
LTR
    , applyOnceOnly :: Bool
applyOnceOnly = Bool
False
    , sporadic :: Bool
sporadic = Bool
False
    }

-- | A single sound change rule: in Brassica sound-change syntax with all elements specified,
-- @-flags target / replacement \/ environment1 | environment2 | … \/ exception@.
-- (And usually the 'plaintext' of the rule will contain a 'String' resembling that pattern.)
data Rule = Rule
  { Rule -> [Lexeme 'Target]
target      :: [Lexeme 'Target]
  , Rule -> [Lexeme 'Replacement]
replacement :: [Lexeme 'Replacement]
  , Rule -> [Environment]
environment :: [Environment]
  , Rule -> Maybe Environment
exception   :: Maybe Environment
  , Rule -> Flags
flags       :: Flags
  , Rule -> String
plaintext   :: String
  } deriving (Int -> Rule -> ShowS
[Rule] -> ShowS
Rule -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Rule] -> ShowS
$cshowList :: [Rule] -> ShowS
show :: Rule -> String
$cshow :: Rule -> String
showsPrec :: Int -> Rule -> ShowS
$cshowsPrec :: Int -> Rule -> ShowS
Show, forall x. Rep Rule x -> Rule
forall x. Rule -> Rep Rule x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Rule x -> Rule
$cfrom :: forall x. Rule -> Rep Rule x
Generic, Rule -> ()
forall a. (a -> ()) -> NFData a
rnf :: Rule -> ()
$crnf :: Rule -> ()
NFData)

-- | Corresponds to a category declaration in a set of sound
-- changes. Category declarations are mostly desugared away by the
-- parser, but for rule application we still need to be able to filter
-- out all unknown t'Grapheme's; thus, a 'CategoriesDecl' lists the
-- t'Grapheme's which are available at a given point.
newtype CategoriesDecl = CategoriesDecl { CategoriesDecl -> [Grapheme]
graphemes :: [Grapheme] }
  deriving (Int -> CategoriesDecl -> ShowS
[CategoriesDecl] -> ShowS
CategoriesDecl -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CategoriesDecl] -> ShowS
$cshowList :: [CategoriesDecl] -> ShowS
show :: CategoriesDecl -> String
$cshow :: CategoriesDecl -> String
showsPrec :: Int -> CategoriesDecl -> ShowS
$cshowsPrec :: Int -> CategoriesDecl -> ShowS
Show, forall x. Rep CategoriesDecl x -> CategoriesDecl
forall x. CategoriesDecl -> Rep CategoriesDecl x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep CategoriesDecl x -> CategoriesDecl
$cfrom :: forall x. CategoriesDecl -> Rep CategoriesDecl x
Generic, CategoriesDecl -> ()
forall a. (a -> ()) -> NFData a
rnf :: CategoriesDecl -> ()
$crnf :: CategoriesDecl -> ()
NFData)

-- | A 'Statement' can be either a single sound change rule, or a
-- category declaration.
data Statement = RuleS Rule | CategoriesDeclS CategoriesDecl
    deriving (Int -> Statement -> ShowS
[Statement] -> ShowS
Statement -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Statement] -> ShowS
$cshowList :: [Statement] -> ShowS
show :: Statement -> String
$cshow :: Statement -> String
showsPrec :: Int -> Statement -> ShowS
$cshowsPrec :: Int -> Statement -> ShowS
Show, forall x. Rep Statement x -> Statement
forall x. Statement -> Rep Statement x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Statement x -> Statement
$cfrom :: forall x. Statement -> Rep Statement x
Generic, Statement -> ()
forall a. (a -> ()) -> NFData a
rnf :: Statement -> ()
$crnf :: Statement -> ()
NFData)

-- | A simple wrapper around 'plaintext' for 'Statement's. Returns
-- @"categories … end"@ for all 'CategoriesDecl' inputs.
plaintext' :: Statement -> String
plaintext' :: Statement -> String
plaintext' (RuleS Rule
r) = Rule -> String
plaintext Rule
r
plaintext' (CategoriesDeclS CategoriesDecl
_) = String
"categories … end"

-- | A set of 'SoundChanges' is simply a list of 'Statement's.
type SoundChanges = [Statement]