{-# LANGUAGE PatternGuards #-}
module Text.CharRanges
  ( Range(..)
  , range
  , single
  , CharSet
  , toSet
  , member
  ) where

import           Data.List
import           Data.Set (Set)
import qualified Data.Set as Set

data Range = Single {-# UNPACK #-} !Char
           | Range  {-# UNPACK #-} !Char {-# UNPACK #-} !Char
             deriving (Eq, Show)

newtype CharRange = CR { unCR :: Range }

-- | A rather hacked-up instance.
--   This is to support fast lookups using 'Data.Set' (see 'toSet').
--   x == y iff x and y overlap
instance Eq CharRange where
    CR (Single x)    == CR (Single y) = x == y
    CR (Single a)    == CR (Range x y) = x <= a && a <= y
    CR (Range x y)   == CR (Single a) = x <= a && a <= y
    CR (Range lx ux) == CR (Range ly uy) = (lx <= uy && ly <= ux)
                                        || (lx <= uy && ly <= ux) -- INTENTIONAL
                                           -- For some strange reason GHC
                                           -- (7.6.3) seems to have problems
                                           -- optimizing this expressions
                                           -- without the additional or

instance Ord CharRange where
    CR (Single x)  <= CR (Single y) = x <= y
    CR (Single x)  <= CR (Range y _) = x <= y
    CR (Range _ x) <= CR (Single y) = x <= y
    CR (Range _ x) <= CR (Range y _) = x <= y

newtype CharSet = CharSet (Set CharRange)

-- | Allows quick lookups using ranges.
toSet :: [Range] -> CharSet
toSet = CharSet . Set.fromDistinctAscList . prepareRanges
  where prepareRanges :: [Range] -> [CharRange]
        prepareRanges =  go . sort . map CR -- we could use unsafeCoerce to
                                            -- avoid the cost of mapping
        go (r1:r2:rs) | Just r' <- maybeMergeRanges r1 r2 = go (r':rs)
                      | rss@(r3:rs') <- go (r2:rs) =
            case maybeMergeRanges r1 r3 of
                Nothing -> r1:rss
                Just r' -> r':rs'
        go rs = rs

maybeMergeRanges :: CharRange -> CharRange -> Maybe CharRange
maybeMergeRanges x y = if x == y -- overlap
                       then Just . CR $ minMax (unCR x) (unCR y)
                       else Nothing
{-# INLINE maybeMergeRanges #-}

minMax :: Range -> Range -> Range
minMax (Range lx ux) (Range ly uy) = Range (min lx ly) (max ux uy)
minMax (Single _) y = y
minMax x (Single _) = x
{-# INLINE minMax #-}

range :: Char -> Char -> Range
range x y = if x < y then Range x y
            else error "range: x not smaller than y"
{-# INLINE range #-}

single :: Char -> Range
single = Single
{-# INLINE single #-}

member :: Char -> CharSet -> Bool
member x (CharSet cs) = Set.member (CR $ Single x) cs