{-# LINE 1 "Data/Text/ICU/Break.hsc" #-}
{-# LANGUAGE BangPatterns, ForeignFunctionInterface, RecordWildCards #-}
-- |
-- Module      : Data.Text.ICU.Break
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- String breaking functions for Unicode, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
--
-- The text boundary positions are found according to the rules described in
-- Unicode Standard Annex #29, Text Boundaries, and Unicode Standard Annex
-- #14, Line Breaking Properties.  These are available at
-- <http://www.unicode.org/reports/tr14/> and
-- <http://www.unicode.org/reports/tr29/>.

module Data.Text.ICU.Break
    (
    -- * Types
      BreakIterator
    , Line(..)
    , Data.Text.ICU.Break.Word(..)
    -- * Breaking functions
    , breakCharacter
    , breakLine
    , breakSentence
    , breakWord
    , clone
    , setText
    -- * Iteration functions
    -- $indices
    , current
    , first
    , last
    , next
    , previous
    , preceding
    , following
    , isBoundary
    -- * Iterator status
    , getStatus
    , getStatuses
    -- * Locales
    , available
    ) where



import Control.DeepSeq (NFData(..))
import Control.Monad (forM)
import Data.IORef (newIORef, writeIORef)
import Data.Int (Int32)
import Data.Text (Text)
import Data.Text.ICU.Break.Types (BreakIterator(..), UBreakIterator)
import Data.Text.ICU.Error.Internal (UErrorCode, handleError)
import Data.Text.ICU.Internal (LocaleName(..), UBool, UChar, asBool, withLocaleName, TextI, UText, asUTextPtr, withUTextPtr, newICUPtr)
import Foreign.C.String (CString, peekCString)
import Foreign.C.Types (CInt(..))
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Array (allocaArray, peekArray)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (FunPtr, Ptr, nullPtr)
import Prelude hiding (last)
import System.IO.Unsafe (unsafePerformIO)

-- $indices
--
-- /Important note/: All of the indices accepted and returned by
-- functions in this module are offsets into the raw UTF-16 text
-- array, /not/ a count of code points.

-- | Line break status.
data Line = Soft                -- ^ A soft line break is a position at
                                -- which a line break is acceptable, but not
                                -- required.
          | Hard
            deriving (Line -> Line -> Bool
(Line -> Line -> Bool) -> (Line -> Line -> Bool) -> Eq Line
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Line -> Line -> Bool
$c/= :: Line -> Line -> Bool
== :: Line -> Line -> Bool
$c== :: Line -> Line -> Bool
Eq, Int -> Line -> ShowS
[Line] -> ShowS
Line -> String
(Int -> Line -> ShowS)
-> (Line -> String) -> ([Line] -> ShowS) -> Show Line
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Line] -> ShowS
$cshowList :: [Line] -> ShowS
show :: Line -> String
$cshow :: Line -> String
showsPrec :: Int -> Line -> ShowS
$cshowsPrec :: Int -> Line -> ShowS
Show, Int -> Line
Line -> Int
Line -> [Line]
Line -> Line
Line -> Line -> [Line]
Line -> Line -> Line -> [Line]
(Line -> Line)
-> (Line -> Line)
-> (Int -> Line)
-> (Line -> Int)
-> (Line -> [Line])
-> (Line -> Line -> [Line])
-> (Line -> Line -> [Line])
-> (Line -> Line -> Line -> [Line])
-> Enum Line
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Line -> Line -> Line -> [Line]
$cenumFromThenTo :: Line -> Line -> Line -> [Line]
enumFromTo :: Line -> Line -> [Line]
$cenumFromTo :: Line -> Line -> [Line]
enumFromThen :: Line -> Line -> [Line]
$cenumFromThen :: Line -> Line -> [Line]
enumFrom :: Line -> [Line]
$cenumFrom :: Line -> [Line]
fromEnum :: Line -> Int
$cfromEnum :: Line -> Int
toEnum :: Int -> Line
$ctoEnum :: Int -> Line
pred :: Line -> Line
$cpred :: Line -> Line
succ :: Line -> Line
$csucc :: Line -> Line
Enum)

instance NFData Line where
    rnf :: Line -> ()
rnf !Line
_ = ()

-- | Word break status.
data Word = Uncategorized       -- ^ A \"word\" that does not fit into another
                                -- category.  Includes spaces and most
                                -- punctuation.
          | Number              -- ^ A word that appears to be a number.
          | Letter              -- ^ A word containing letters, excluding
                                -- hiragana, katakana or ideographic
                                -- characters.
          | Kana                -- ^ A word containing kana characters.
          | Ideograph           -- ^ A word containing ideographic characters.
            deriving (Word -> Word -> Bool
(Word -> Word -> Bool) -> (Word -> Word -> Bool) -> Eq Word
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Word -> Word -> Bool
$c/= :: Word -> Word -> Bool
== :: Word -> Word -> Bool
$c== :: Word -> Word -> Bool
Eq, Int -> Word -> ShowS
[Word] -> ShowS
Word -> String
(Int -> Word -> ShowS)
-> (Word -> String) -> ([Word] -> ShowS) -> Show Word
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Word] -> ShowS
$cshowList :: [Word] -> ShowS
show :: Word -> String
$cshow :: Word -> String
showsPrec :: Int -> Word -> ShowS
$cshowsPrec :: Int -> Word -> ShowS
Show, Int -> Word
Word -> Int
Word -> [Word]
Word -> Word
Word -> Word -> [Word]
Word -> Word -> Word -> [Word]
(Word -> Word)
-> (Word -> Word)
-> (Int -> Word)
-> (Word -> Int)
-> (Word -> [Word])
-> (Word -> Word -> [Word])
-> (Word -> Word -> [Word])
-> (Word -> Word -> Word -> [Word])
-> Enum Word
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Word -> Word -> Word -> [Word]
$cenumFromThenTo :: Word -> Word -> Word -> [Word]
enumFromTo :: Word -> Word -> [Word]
$cenumFromTo :: Word -> Word -> [Word]
enumFromThen :: Word -> Word -> [Word]
$cenumFromThen :: Word -> Word -> [Word]
enumFrom :: Word -> [Word]
$cenumFrom :: Word -> [Word]
fromEnum :: Word -> Int
$cfromEnum :: Word -> Int
toEnum :: Int -> Word
$ctoEnum :: Int -> Word
pred :: Word -> Word
$cpred :: Word -> Word
succ :: Word -> Word
$csucc :: Word -> Word
Enum)

instance NFData Data.Text.ICU.Break.Word where
    rnf :: Word -> ()
rnf !Word
_ = ()

-- | Break a string on character boundaries.
--
-- Character boundary analysis identifies the boundaries of \"Extended
-- Grapheme Clusters\", which are groupings of codepoints that should be
-- treated as character-like units for many text operations.  Please see
-- Unicode Standard Annex #29, Unicode Text Segmentation,
-- <http://www.unicode.org/reports/tr29/> for additional information on
-- grapheme clusters and guidelines on their use.
breakCharacter :: LocaleName -> Text -> IO (BreakIterator ())
breakCharacter :: LocaleName -> Text -> IO (BreakIterator ())
breakCharacter = UBreakIteratorType
-> (Int32 -> ()) -> LocaleName -> Text -> IO (BreakIterator ())
forall a.
UBreakIteratorType
-> (Int32 -> a) -> LocaleName -> Text -> IO (BreakIterator a)
open (UBreakIteratorType
0) (() -> Int32 -> ()
forall a b. a -> b -> a
const ())
{-# LINE 110 "Data/Text/ICU/Break.hsc" #-}

-- | Break a string on line boundaries.
--
-- Line boundary analysis determines where a text string can be broken when
-- line wrapping. The mechanism correctly handles punctuation and hyphenated
-- words.
breakLine :: LocaleName -> Text -> IO (BreakIterator Line)
breakLine :: LocaleName -> Text -> IO (BreakIterator Line)
breakLine = UBreakIteratorType
-> (Int32 -> Line) -> LocaleName -> Text -> IO (BreakIterator Line)
forall a.
UBreakIteratorType
-> (Int32 -> a) -> LocaleName -> Text -> IO (BreakIterator a)
open (UBreakIteratorType
2) Int32 -> Line
forall {a}. (Ord a, Num a, Show a) => a -> Line
asLine
{-# LINE 118 "Data/Text/ICU/Break.hsc" #-}
  where
    asLine :: a -> Line
asLine a
i
      | a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< (a
100) = Line
Soft
{-# LINE 121 "Data/Text/ICU/Break.hsc" #-}
      | i < (200) = Hard
{-# LINE 122 "Data/Text/ICU/Break.hsc" #-}
      | otherwise = error $ "unknown line break status " ++ show i

-- | Break a string on sentence boundaries.
--
-- Sentence boundary analysis allows selection with correct interpretation
-- of periods within numbers and abbreviations, and trailing punctuation
-- marks such as quotation marks and parentheses.
breakSentence :: LocaleName -> Text -> IO (BreakIterator ())
breakSentence :: LocaleName -> Text -> IO (BreakIterator ())
breakSentence = UBreakIteratorType
-> (Int32 -> ()) -> LocaleName -> Text -> IO (BreakIterator ())
forall a.
UBreakIteratorType
-> (Int32 -> a) -> LocaleName -> Text -> IO (BreakIterator a)
open (UBreakIteratorType
3) (() -> Int32 -> ()
forall a b. a -> b -> a
const ())
{-# LINE 131 "Data/Text/ICU/Break.hsc" #-}

-- | Break a string on word boundaries.
--
-- Word boundary analysis is used by search and replace functions, as well
-- as within text editing applications that allow the user to select words
-- with a double click. Word selection provides correct interpretation of
-- punctuation marks within and following words. Characters that are not
-- part of a word, such as symbols or punctuation marks, have word breaks on
-- both sides.
breakWord :: LocaleName -> Text -> IO (BreakIterator Data.Text.ICU.Break.Word)
breakWord :: LocaleName -> Text -> IO (BreakIterator Word)
breakWord = UBreakIteratorType
-> (Int32 -> Word) -> LocaleName -> Text -> IO (BreakIterator Word)
forall a.
UBreakIteratorType
-> (Int32 -> a) -> LocaleName -> Text -> IO (BreakIterator a)
open (UBreakIteratorType
1) Int32 -> Word
forall {a}. (Ord a, Num a, Show a) => a -> Word
asWord
{-# LINE 142 "Data/Text/ICU/Break.hsc" #-}
  where
    asWord :: a -> Word
asWord a
i
      | a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< (a
100) = Word
Uncategorized
{-# LINE 145 "Data/Text/ICU/Break.hsc" #-}
      | i < (200) = Number
{-# LINE 146 "Data/Text/ICU/Break.hsc" #-}
      | i < (300) = Letter
{-# LINE 147 "Data/Text/ICU/Break.hsc" #-}
      | i < (400) = Kana
{-# LINE 148 "Data/Text/ICU/Break.hsc" #-}
      | i < (500) = Ideograph
{-# LINE 149 "Data/Text/ICU/Break.hsc" #-}
      | otherwise = error $ "unknown word break status " ++ show i

-- | Create a new 'BreakIterator' for locating text boundaries in the
-- specified locale.
open :: UBreakIteratorType -> (Int32 -> a) -> LocaleName -> Text
     -> IO (BreakIterator a)
open :: forall a.
UBreakIteratorType
-> (Int32 -> a) -> LocaleName -> Text -> IO (BreakIterator a)
open UBreakIteratorType
brk Int32 -> a
f LocaleName
loc Text
t = LocaleName
-> (CString -> IO (BreakIterator a)) -> IO (BreakIterator a)
forall a. LocaleName -> (CString -> IO a) -> IO a
withLocaleName LocaleName
loc ((CString -> IO (BreakIterator a)) -> IO (BreakIterator a))
-> (CString -> IO (BreakIterator a)) -> IO (BreakIterator a)
forall a b. (a -> b) -> a -> b
$ \CString
locale -> do
  IORef UTextPtr
r <- UTextPtr -> IO (IORef UTextPtr)
forall a. a -> IO (IORef a)
newIORef UTextPtr
forall a. HasCallStack => a
undefined
  BreakIterator a
b <- (ForeignPtr UBreakIterator -> BreakIterator a)
-> FinalizerPtr UBreakIterator
-> IO (Ptr UBreakIterator)
-> IO (BreakIterator a)
forall a i.
(ForeignPtr a -> i) -> FinalizerPtr a -> IO (Ptr a) -> IO i
newICUPtr (IORef UTextPtr
-> (Int32 -> a) -> ForeignPtr UBreakIterator -> BreakIterator a
forall a.
IORef UTextPtr
-> (Int32 -> a) -> ForeignPtr UBreakIterator -> BreakIterator a
BR IORef UTextPtr
r Int32 -> a
f) FinalizerPtr UBreakIterator
ubrk_close (IO (Ptr UBreakIterator) -> IO (BreakIterator a))
-> IO (Ptr UBreakIterator) -> IO (BreakIterator a)
forall a b. (a -> b) -> a -> b
$
    (Ptr UBreakIteratorType -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a. (Ptr UBreakIteratorType -> IO a) -> IO a
handleError ((Ptr UBreakIteratorType -> IO (Ptr UBreakIterator))
 -> IO (Ptr UBreakIterator))
-> (Ptr UBreakIteratorType -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a b. (a -> b) -> a -> b
$ UBreakIteratorType
-> CString
-> Ptr UChar
-> Int32
-> Ptr UBreakIteratorType
-> IO (Ptr UBreakIterator)
ubrk_open UBreakIteratorType
brk CString
locale Ptr UChar
forall a. Ptr a
nullPtr Int32
0
  BreakIterator a -> Text -> IO ()
forall a. BreakIterator a -> Text -> IO ()
setText BreakIterator a
b Text
t
  BreakIterator a -> IO (BreakIterator a)
forall (m :: * -> *) a. Monad m => a -> m a
return BreakIterator a
b

-- | Point an existing 'BreakIterator' at a new piece of text.
setText :: BreakIterator a -> Text -> IO ()
setText :: forall a. BreakIterator a -> Text -> IO ()
setText BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
..} Text
t = do
  UTextPtr
fp <- Text -> IO UTextPtr
asUTextPtr Text
t
  UTextPtr -> (Ptr UText -> IO ()) -> IO ()
forall a. UTextPtr -> (Ptr UText -> IO a) -> IO a
withUTextPtr UTextPtr
fp ((Ptr UText -> IO ()) -> IO ()) -> (Ptr UText -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ Ptr UText
ptr -> do
    ForeignPtr UBreakIterator -> (Ptr UBreakIterator -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter ((Ptr UBreakIterator -> IO ()) -> IO ())
-> (Ptr UBreakIterator -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr UBreakIterator
p -> (Ptr UBreakIteratorType -> IO ()) -> IO ()
forall a. (Ptr UBreakIteratorType -> IO a) -> IO a
handleError ((Ptr UBreakIteratorType -> IO ()) -> IO ())
-> (Ptr UBreakIteratorType -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ Ptr UBreakIterator -> Ptr UText -> Ptr UBreakIteratorType -> IO ()
ubrk_setUText Ptr UBreakIterator
p Ptr UText
ptr
    IORef UTextPtr -> UTextPtr -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef UTextPtr
brText UTextPtr
fp

-- | Thread safe cloning operation.  This is substantially faster than
-- creating a new 'BreakIterator' from scratch.
clone :: BreakIterator a -> IO (BreakIterator a)
clone :: forall a. BreakIterator a -> IO (BreakIterator a)
clone BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} = (ForeignPtr UBreakIterator -> BreakIterator a)
-> FinalizerPtr UBreakIterator
-> IO (Ptr UBreakIterator)
-> IO (BreakIterator a)
forall a i.
(ForeignPtr a -> i) -> FinalizerPtr a -> IO (Ptr a) -> IO i
newICUPtr (IORef UTextPtr
-> (Int32 -> a) -> ForeignPtr UBreakIterator -> BreakIterator a
forall a.
IORef UTextPtr
-> (Int32 -> a) -> ForeignPtr UBreakIterator -> BreakIterator a
BR IORef UTextPtr
brText Int32 -> a
brStatus) FinalizerPtr UBreakIterator
ubrk_close (IO (Ptr UBreakIterator) -> IO (BreakIterator a))
-> IO (Ptr UBreakIterator) -> IO (BreakIterator a)
forall a b. (a -> b) -> a -> b
$
  ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter ((Ptr UBreakIterator -> IO (Ptr UBreakIterator))
 -> IO (Ptr UBreakIterator))
-> (Ptr UBreakIterator -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a b. (a -> b) -> a -> b
$ \Ptr UBreakIterator
p ->
    Int32
-> (Ptr Int32 -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with Int32
1 ((Ptr Int32 -> IO (Ptr UBreakIterator)) -> IO (Ptr UBreakIterator))
-> (Ptr Int32 -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a b. (a -> b) -> a -> b
$ (Ptr UBreakIteratorType -> IO (Ptr UBreakIterator))
-> IO (Ptr UBreakIterator)
forall a. (Ptr UBreakIteratorType -> IO a) -> IO a
handleError ((Ptr UBreakIteratorType -> IO (Ptr UBreakIterator))
 -> IO (Ptr UBreakIterator))
-> (Ptr Int32 -> Ptr UBreakIteratorType -> IO (Ptr UBreakIterator))
-> Ptr Int32
-> IO (Ptr UBreakIterator)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr UBreakIterator
-> Ptr Any
-> Ptr Int32
-> Ptr UBreakIteratorType
-> IO (Ptr UBreakIterator)
forall a.
Ptr UBreakIterator
-> Ptr a
-> Ptr Int32
-> Ptr UBreakIteratorType
-> IO (Ptr UBreakIterator)
ubrk_safeClone Ptr UBreakIterator
p Ptr Any
forall a. Ptr a
nullPtr

asIndex :: (Ptr UBreakIterator -> IO Int32) -> BreakIterator a -> IO (Maybe TextI)
asIndex :: forall a.
(Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
asIndex Ptr UBreakIterator -> IO Int32
act BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} = do
  Int32
i <- ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter Ptr UBreakIterator -> IO Int32
act
  Maybe TextI -> IO (Maybe TextI)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe TextI -> IO (Maybe TextI))
-> Maybe TextI -> IO (Maybe TextI)
forall a b. (a -> b) -> a -> b
$! if Int32
i Int32 -> Int32 -> Bool
forall a. Eq a => a -> a -> Bool
== (-Int32
1)
{-# LINE 181 "Data/Text/ICU/Break.hsc" #-}
            then Maybe TextI
forall a. Maybe a
Nothing
            else TextI -> Maybe TextI
forall a. a -> Maybe a
Just (TextI -> Maybe TextI) -> TextI -> Maybe TextI
forall a b. (a -> b) -> a -> b
$! Int32 -> TextI
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
i

-- | Reset the breaker to the beginning of the text to be scanned.
first :: BreakIterator a -> IO TextI
first :: forall a. BreakIterator a -> IO TextI
first BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} = Int32 -> TextI
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> TextI) -> IO Int32 -> IO TextI
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter Ptr UBreakIterator -> IO Int32
ubrk_first

-- | Reset the breaker to the end of the text to be scanned.
last :: BreakIterator a -> IO TextI
last :: forall a. BreakIterator a -> IO TextI
last BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} = Int32 -> TextI
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> TextI) -> IO Int32 -> IO TextI
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter Ptr UBreakIterator -> IO Int32
ubrk_last

-- | Advance the iterator and break at the text boundary that follows the
-- current text boundary.
next :: BreakIterator a -> IO (Maybe TextI)
next :: forall a. BreakIterator a -> IO (Maybe TextI)
next = (Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
forall a.
(Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
asIndex Ptr UBreakIterator -> IO Int32
ubrk_next

-- | Advance the iterator and break at the text boundary that precedes the
-- current text boundary.
previous :: BreakIterator a -> IO (Maybe TextI)
previous :: forall a. BreakIterator a -> IO (Maybe TextI)
previous = (Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
forall a.
(Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
asIndex Ptr UBreakIterator -> IO Int32
ubrk_previous

-- | Determine the text boundary preceding the specified offset.
preceding :: BreakIterator a -> Int -> IO (Maybe TextI)
preceding :: forall a. BreakIterator a -> Int -> IO (Maybe TextI)
preceding BreakIterator a
bi Int
i = (Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
forall a.
(Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
asIndex ((Ptr UBreakIterator -> Int32 -> IO Int32)
-> Int32 -> Ptr UBreakIterator -> IO Int32
forall a b c. (a -> b -> c) -> b -> a -> c
flip Ptr UBreakIterator -> Int32 -> IO Int32
ubrk_preceding (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i)) BreakIterator a
bi

-- | Determine the text boundary following the specified offset.
following :: BreakIterator a -> Int -> IO (Maybe TextI)
following :: forall a. BreakIterator a -> Int -> IO (Maybe TextI)
following BreakIterator a
bi Int
i = (Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
forall a.
(Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
asIndex ((Ptr UBreakIterator -> Int32 -> IO Int32)
-> Int32 -> Ptr UBreakIterator -> IO Int32
forall a b c. (a -> b -> c) -> b -> a -> c
flip Ptr UBreakIterator -> Int32 -> IO Int32
ubrk_following (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i)) BreakIterator a
bi

-- | Return the character index most recently returned by 'next',
-- 'previous', 'first', or 'last'.
current :: BreakIterator a -> IO (Maybe TextI)
current :: forall a. BreakIterator a -> IO (Maybe TextI)
current = (Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
forall a.
(Ptr UBreakIterator -> IO Int32)
-> BreakIterator a -> IO (Maybe TextI)
asIndex Ptr UBreakIterator -> IO Int32
ubrk_current

-- | Return the status from the break rule that determined the most recently
-- returned break position.  For rules that do not specify a status, a
-- default value of @()@ is returned.
getStatus :: BreakIterator a -> IO a
getStatus :: forall a. BreakIterator a -> IO a
getStatus BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} = Int32 -> a
brStatus (Int32 -> a) -> IO Int32 -> IO a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter Ptr UBreakIterator -> IO Int32
ubrk_getRuleStatus

-- | Return statuses from all of the break rules that determined the most
-- recently returned break position.
getStatuses :: BreakIterator a -> IO [a]
getStatuses :: forall a. BreakIterator a -> IO [a]
getStatuses BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} =
  ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO [a]) -> IO [a]
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter ((Ptr UBreakIterator -> IO [a]) -> IO [a])
-> (Ptr UBreakIterator -> IO [a]) -> IO [a]
forall a b. (a -> b) -> a -> b
$ \Ptr UBreakIterator
brk -> do
    Int32
n <- (Ptr UBreakIteratorType -> IO Int32) -> IO Int32
forall a. (Ptr UBreakIteratorType -> IO a) -> IO a
handleError ((Ptr UBreakIteratorType -> IO Int32) -> IO Int32)
-> (Ptr UBreakIteratorType -> IO Int32) -> IO Int32
forall a b. (a -> b) -> a -> b
$ Ptr UBreakIterator
-> Ptr Int32 -> Int32 -> Ptr UBreakIteratorType -> IO Int32
ubrk_getRuleStatusVec Ptr UBreakIterator
brk Ptr Int32
forall a. Ptr a
nullPtr Int32
0
    Int -> (Ptr Int32 -> IO [a]) -> IO [a]
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n) ((Ptr Int32 -> IO [a]) -> IO [a])
-> (Ptr Int32 -> IO [a]) -> IO [a]
forall a b. (a -> b) -> a -> b
$ \Ptr Int32
ptr -> do
      Int32
_ <- (Ptr UBreakIteratorType -> IO Int32) -> IO Int32
forall a. (Ptr UBreakIteratorType -> IO a) -> IO a
handleError ((Ptr UBreakIteratorType -> IO Int32) -> IO Int32)
-> (Ptr UBreakIteratorType -> IO Int32) -> IO Int32
forall a b. (a -> b) -> a -> b
$ Ptr UBreakIterator
-> Ptr Int32 -> Int32 -> Ptr UBreakIteratorType -> IO Int32
ubrk_getRuleStatusVec Ptr UBreakIterator
brk Ptr Int32
ptr Int32
n
      (Int32 -> a) -> [Int32] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map Int32 -> a
brStatus ([Int32] -> [a]) -> IO [Int32] -> IO [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Int -> Ptr Int32 -> IO [Int32]
forall a. Storable a => Int -> Ptr a -> IO [a]
peekArray (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n) Ptr Int32
ptr

-- | Determine whether the specfied position is a boundary position.
-- As a side effect, leaves the iterator pointing to the first
-- boundary position at or after the given offset.
isBoundary :: BreakIterator a -> Int -> IO Bool
isBoundary :: forall a. BreakIterator a -> Int -> IO Bool
isBoundary BR{ForeignPtr UBreakIterator
IORef UTextPtr
Int32 -> a
brIter :: ForeignPtr UBreakIterator
brStatus :: Int32 -> a
brText :: IORef UTextPtr
brIter :: forall a. BreakIterator a -> ForeignPtr UBreakIterator
brStatus :: forall a. BreakIterator a -> Int32 -> a
brText :: forall a. BreakIterator a -> IORef UTextPtr
..} Int
i = UBool -> Bool
forall a. Integral a => a -> Bool
asBool (UBool -> Bool) -> IO UBool -> IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ForeignPtr UBreakIterator
-> (Ptr UBreakIterator -> IO UBool) -> IO UBool
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr UBreakIterator
brIter ((Ptr UBreakIterator -> Int32 -> IO UBool)
-> Int32 -> Ptr UBreakIterator -> IO UBool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Ptr UBreakIterator -> Int32 -> IO UBool
ubrk_isBoundary (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))

-- | Locales for which text breaking information is available.  A
-- 'BreakIterator' in a locale in this list will perform the correct
-- text breaking for the locale.
available :: [LocaleName]
available :: [LocaleName]
available = IO [LocaleName] -> [LocaleName]
forall a. IO a -> a
unsafePerformIO (IO [LocaleName] -> [LocaleName])
-> IO [LocaleName] -> [LocaleName]
forall a b. (a -> b) -> a -> b
$ do
  Int32
n <- IO Int32
ubrk_countAvailable
  [Int32] -> (Int32 -> IO LocaleName) -> IO [LocaleName]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Int32
0..Int32
nInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
-Int32
1] ((Int32 -> IO LocaleName) -> IO [LocaleName])
-> (Int32 -> IO LocaleName) -> IO [LocaleName]
forall a b. (a -> b) -> a -> b
$ \Int32
i -> Int32 -> IO CString
ubrk_getAvailable Int32
i IO CString -> (CString -> IO LocaleName) -> IO LocaleName
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (String -> LocaleName) -> IO String -> IO LocaleName
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> LocaleName
Locale (IO String -> IO LocaleName)
-> (CString -> IO String) -> CString -> IO LocaleName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CString -> IO String
peekCString
{-# NOINLINE available #-}

type UBreakIteratorType = CInt

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_open" ubrk_open
    :: UBreakIteratorType -> CString -> Ptr UChar -> Int32 -> Ptr UErrorCode
    -> IO (Ptr UBreakIterator)

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_setUText" ubrk_setUText
    :: Ptr UBreakIterator -> Ptr UText -> Ptr UErrorCode
    -> IO ()

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_safeClone" ubrk_safeClone
    :: Ptr UBreakIterator -> Ptr a -> Ptr Int32 -> Ptr UErrorCode
    -> IO (Ptr UBreakIterator)

foreign import ccall unsafe "hs_text_icu.h &__hs_ubrk_close" ubrk_close
    :: FunPtr (Ptr UBreakIterator -> IO ())

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_current" ubrk_current
    :: Ptr UBreakIterator -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_first" ubrk_first
    :: Ptr UBreakIterator -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_last" ubrk_last
    :: Ptr UBreakIterator -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_next" ubrk_next
    :: Ptr UBreakIterator -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_previous" ubrk_previous
    :: Ptr UBreakIterator -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_preceding" ubrk_preceding
    :: Ptr UBreakIterator -> Int32 -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_following" ubrk_following
    :: Ptr UBreakIterator -> Int32 -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getRuleStatus" ubrk_getRuleStatus
    :: Ptr UBreakIterator -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getRuleStatusVec" ubrk_getRuleStatusVec
    :: Ptr UBreakIterator -> Ptr Int32 -> Int32 -> Ptr UErrorCode -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_isBoundary" ubrk_isBoundary
    :: Ptr UBreakIterator -> Int32 -> IO UBool

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_countAvailable" ubrk_countAvailable
    :: IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getAvailable" ubrk_getAvailable
    :: Int32 -> IO CString