-- GenI surface realiser
-- Copyright (C) 2005 Carlos Areces and Eric Kow
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License
-- as published by the Free Software Foundation; either version 2
-- of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-- | This module provides some very generic, non-GenI specific functions on strings,
--   trees and other miscellaneous odds and ends.  Whenever possible, one should try
--   to replace these functions with versions that are available in the standard
--   libraries, or the Haskell platform ones, or on hackage.

{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DeriveDataTypeable, TypeSynonymInstances #-}
{-# LANGUAGE OverloadedStrings #-}
module NLP.GenI.General (
        -- * IO
        ePutStr, ePutStrLn, eFlush,
        -- * Strings
        isGeniIdentLetter,
        dropTillIncluding,
        trim,
        toUpperHead, toLowerHead,
        toAlphaNum,
        quoteString, quoteText,
        clumpBy,
        -- * Triples
        first3, second3, third3,
        fst3, snd3, thd3,
        -- * Lists
        map',
        buckets,
        isEmptyIntersect,
        groupByFM,
        insertToListMap,
        histogram,
        combinations,
        mapMaybeM,
        repList,
        -- * Trees
        mapTree', filterTree,
        treeLeaves, preTerminals,
        repNode, repAllNode, listRepNode, repNodeByNode,
        -- * Intervals
        Interval,
        (!+!), ival, showInterval,
        -- * Bit vectors
        BitVector,
        showBitVector,
        -- * Errors, logging and exceptions
        geniBug,
        prettyException,
        mkLogname,
        )
        where

import Control.Arrow (first)
import Control.Exception (IOException)
import Control.Monad (liftM)
import Data.Bits (shiftR, (.&.))
import Data.Char (isAlphaNum, isDigit, isSpace, toUpper, toLower)
import Data.Function ( on )
import Data.List (foldl', intersect, inits, intersperse, groupBy, sortBy)
import Data.Typeable ( typeOf, Typeable )
import Data.Tree
import System.IO (hPutStrLn, hPutStr, hFlush, stderr)
import System.IO.Error (isUserError, ioeGetErrorString)
import qualified Data.Map as Map
import Prelude hiding ( catch )
import Data.Text ( Text )
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Binary
import Text.JSON

-- ----------------------------------------------------------------------
-- IO
-- ----------------------------------------------------------------------

-- | putStr on stderr
ePutStr :: String -> IO ()
ePutStr   = hPutStr stderr

ePutStrLn :: String -> IO()
ePutStrLn = hPutStrLn stderr

eFlush :: IO()
eFlush    = hFlush stderr

-- ----------------------------------------------------------------------
-- Strings
-- ----------------------------------------------------------------------

instance Binary Text where
  put = put . T.encodeUtf8
  get = liftM T.decodeUtf8 get

isGeniIdentLetter :: Char -> Bool
isGeniIdentLetter x = isAlphaNum x || x `elem` "_'+-."

trim :: String -> String
trim = reverse . (dropWhile isSpace) . reverse . (dropWhile isSpace) 

-- | Drop all characters up to and including the one in question
dropTillIncluding :: Char -> String -> String
dropTillIncluding c = drop 1 . (dropWhile (/= c))

-- | Make the first character of a string upper case
toUpperHead :: String -> String
toUpperHead []    = []
toUpperHead (h:t) = (toUpper h):t

-- | Make the first character of a string lower case
toLowerHead :: String -> String
toLowerHead []    = []
toLowerHead(h:t)  = (toLower h):t

quoteString :: String -> String
quoteString xs = "\"" ++ concatMap helper xs ++ "\""
  where
   helper '"'  = [ '\\', '\"' ]
   helper '\\' = [ '\\', '\\' ]
   helper x    = [ x ]

quoteText :: Text -> Text
quoteText t =
    q `T.append` escape t `T.append` q
  where
    escape = T.replace q escQ . T.replace s escS
    q = "\""
    s = "\\"
    escQ = s `T.append` q
    escS = s `T.append` s

-- | break a list of items into sublists of length < the clump
--   size, taking into consideration that each item in the clump
--   will have a single gap of padding interspersed
--
--   any item whose length is greater than the clump size
--   is put into a clump by itself
--
--   given a length function
--   @clumpBy (length.show) 8 ["hello", "this", "is", "a", "list"]@
clumpBy :: (a -> Int) -> Int -> [a] -> [[a]]
clumpBy f l items = iter [] items
 where
  iter acc [] = reverse acc
  iter acc cs =
   case break toobig (drop 1 $ inits cs) of
        ([],_)    -> next 1           -- first too big
        (_,[])    -> iter (cs:acc) [] -- none too big
        (_,(x:_)) -> next (length x - 1)
   where next n = iter (take n cs : acc) (drop n cs)
  toobig x = (sum . intersperse 1 . map f) x > l

-- ----------------------------------------------------------------------
-- Alphanumeric sort
-- ----------------------------------------------------------------------

-- | Intermediary type used for alphanumeric sort
data AlphaNum = A String | N Int deriving Eq

-- we don't derive this, because we want num < alpha
instance Ord AlphaNum where
 compare (A s1)  (A s2) = compare s1 s2
 compare (N s1)  (N s2) = compare s1 s2
 compare (A _)   (N _)  = GT
 compare (N _)   (A _)  = LT

-- | An alphanumeric sort is one where you treat the numbers in the string
--   as actual numbers.  An alphanumeric sort would put x2 before x100,
--   because 2 < 10, wheraeas a naive sort would put it the other way
--   around because the characters 1 < 2.  To sort alphanumerically, just
--   'sortBy (comparing toAlphaNum)'
toAlphaNum :: String -> [AlphaNum]
toAlphaNum = map readOne . groupBy ((==) `on` isDigit)
 where
   readOne s
     | all isDigit s = N (read s)
     | otherwise      = A s

-- ----------------------------------------------------------------------
-- Triples
-- ----------------------------------------------------------------------

first3 :: (a -> a2) -> (a, b, c) -> (a2, b, c)
first3 f (x,y,z) = (f x, y, z)

second3 :: (b -> b2) -> (a, b, c) -> (a, b2, c)
second3 f (x,y,z) = (x, f y, z)

third3 :: (c -> c2) -> (a, b, c) -> (a, b, c2)
third3 f (x,y,z) = (x, y, f z)

fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x

snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x

thd3 :: (a,b,c) -> c
thd3 (_,_,x) = x

-- ----------------------------------------------------------------------
-- Lists
-- ----------------------------------------------------------------------

-- | A strict version of 'map'
map' :: (a->b) -> [a] -> [b]
map' _ [] = []
map' f (x:xs) = let a = f x in a `seq` (a:(map' f xs))

-- | True if the intersection of two lists is empty.
isEmptyIntersect :: (Eq a) => [a] -> [a] -> Bool
isEmptyIntersect a b = null $ intersect a b

-- ----------------------------------------------------------------------
-- Grouping
-- ----------------------------------------------------------------------

-- | Serves the same function as 'Data.List.groupBy'.  It groups together
--   items by some property they have in common. The difference is that the
--   property is used as a key to a Map that you can lookup.
groupByFM :: (Ord b) => (a -> b) -> [a] -> (Map.Map b [a])
groupByFM fn list = 
  let addfn  x acc key = insertToListMap key x acc
      helper acc x = addfn x acc (fn x)
  in foldl' helper Map.empty list

{-# INLINE insertToListMap #-}
insertToListMap :: (Ord b) => b -> a -> Map.Map b [a] -> Map.Map b [a]
insertToListMap k i m =
  case Map.lookup k m of
  Nothing -> Map.insert k [i] m
  Just p  -> Map.insert k (i:p) m

histogram :: Ord a => [a] -> Map.Map a Int
histogram xs = Map.fromListWith (+) $ zip xs (repeat 1)

buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ]
buckets f = map (first head . unzip)
          . groupBy ((==) `on` fst)
          . sortBy (compare `on` fst)
          . map (\x -> (f x, x))

-- Given a list of lists, return all lists such that one item from each sublist is chosen.
-- If returns the empty list if there are any empty sublists.
combinations :: [[a]] -> [[a]]
combinations = sequence

mapMaybeM :: (Monad m) => (a -> m (Maybe b)) -> [a] -> m [b]
mapMaybeM _ [] = return []
mapMaybeM f (x:xs) =
 f x >>=
 (\my -> case my of
          Nothing -> mapMaybeM f xs
	  Just y  -> liftM (y:) (mapMaybeM f xs))

-- | Return the list, modifying only the first matching item.
repList :: (a->Bool) -> (a->a) -> [a] -> [a]
repList _ _ [] = []
repList pr fn (x:xs)
  | pr x = fn x : xs
  | otherwise = x : (repList pr fn xs)

-- ----------------------------------------------------------------------
-- Trees
-- ----------------------------------------------------------------------

-- | Strict version of 'mapTree' (for non-strict, just use fmap)
mapTree' :: (a->b) -> Tree a -> Tree b
mapTree' fn (Node a []) = let b = fn a in b `seq` Node b []
mapTree' fn (Node a l)  = let b = fn a
                              bs = map' (mapTree' fn) l
                          in b `seq` bs `seq` Node b bs

-- | Like 'filter', except on Trees.  Filter might not be a good name, though,
--   because we return a list of nodes, not a tree.
filterTree :: (a->Bool) -> Tree a -> [a]
filterTree fn = (filter fn) . flatten

-- | The leaf nodes of a Tree
treeLeaves :: Tree a -> [a]
treeLeaves (Node n []) = [n]
treeLeaves (Node _ l ) = concatMap treeLeaves l

-- | Return pairs of (parent, terminal)
preTerminals :: Tree a -> [(a,a)]
preTerminals (Node r xs) = concatMap (helper r) xs
 where
  helper p (Node k []) = [ (p,k) ]
  helper _ (Node p ys) = concatMap (helper p) ys

-- | 'repNode' @fn filt t@ returns a version of @t@ in which the first
--   node which @filt@ matches is transformed using @fn@.
repNode :: (Tree a -> Tree a) -- ^ replacement function
        -> (Tree a -> Bool)   -- ^ filtering function
        -> Tree a -> Maybe (Tree a)
repNode fn filt t =
 case listRepNode fn filt [t] of
 (_, False)   -> Nothing
 ([t2], True) -> Just t2
 _            -> geniBug "Either repNode or listRepNode are broken"

-- | Like 'repNode' except that it performs the operations on
--   all nodes that match and doesn't care if any nodes match
--   or not
repAllNode :: (Tree a -> Tree a) -> (Tree a -> Bool)
           -> Tree a -> Tree a
repAllNode fn filt n | filt n = fn n
repAllNode fn filt (Node p ks) = Node p $ map (repAllNode fn filt) ks

-- | Like 'repNode' but on a list of tree nodes
listRepNode :: (Tree a -> Tree a) -- ^ replacement function
            -> (Tree a -> Bool)   -- ^ filtering function
            -> [Tree a]           -- ^ nodes
            -> ([Tree a], Bool)
listRepNode _ _ [] = ([], False)
listRepNode fn filt (n:l2) | filt n = (fn n : l2, True)
listRepNode fn filt ((n@(Node a l1)):l2) =
  case listRepNode fn filt l1 of
  (lt1, True) -> ((Node a lt1):l2, True)
  _ -> case listRepNode fn filt l2 of
       (lt2, flag2) -> (n:lt2, flag2)

-- | Replace a node in the tree in-place with another node; keep the
--   children the same.  If the node is not found in the tree, or if
--   there are multiple instances of the node, this is treated as an
--   error.
repNodeByNode :: (a -> Bool) -- ^ which node?
              -> a -> Tree a -> Tree a
repNodeByNode nfilt rep t =
 let tfilt (Node n _) = nfilt n
     replaceFn (Node _ k) = Node rep k
 in case listRepNode replaceFn tfilt [t] of
    ([t2], True) -> t2
    (_ ,  False) -> geniBug "Node not found in repNode"
    _            -> geniBug "Unexpected result in repNode"

-- ----------------------------------------------------------------------
-- Errors, exceptions and logging
-- ----------------------------------------------------------------------

-- | errors specifically in GenI, which is very likely NOT the user's fault.
geniBug :: String -> a
geniBug s = error $ "Bug in GenI!\n" ++ s ++
                    "\nPlease file a report on http://trac.haskell.org/GenI/newticket"

-- stolen from Darcs
prettyException :: IOException -> String
prettyException e | isUserError e = ioeGetErrorString e
prettyException e = show e

-- | The module name for an arbitrary data type
mkLogname :: Typeable a => a -> String
mkLogname = reverse . drop 1 . dropWhile (/= '.') . reverse
          . show . typeOf
-- ----------------------------------------------------------------------
-- Intervals
-- ----------------------------------------------------------------------

type Interval = (Int,Int)

-- | Add two intervals
(!+!) :: Interval -> Interval -> Interval
(!+!) (a1,a2) (b1,b2) = (a1+b1, a2+b2)

-- | 'ival' @x@ builds a trivial interval from 'x' to 'x'
ival :: Int -> Interval
ival i = (i,i)

showInterval :: Interval -> String
showInterval (x,y) =
 let sign i = if i > 0 then "+" else ""
     --
 in if (x==y) 
    then (sign x) ++ (show x) 
    else show (x,y)

-- ----------------------------------------------------------------------
-- Bit vectors
-- ----------------------------------------------------------------------

type BitVector = Integer

-- | displays a bit vector, using a minimum number of bits
showBitVector :: Int -> BitVector -> String
showBitVector min_ 0 = replicate min_ '0'
showBitVector min_ x = showBitVector (min_ - 1) (shiftR x 1) ++ (show $ x .&. 1)

-- ----------------------------------------------------------------------
-- JSON
-- ----------------------------------------------------------------------

instance JSON Text where
    readJSON = fmap T.pack . readJSON
    showJSON = showJSON . T.unpack