-- GenI surface realiser
-- Copyright (C) 2005 Carlos Areces and Eric Kow
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License
-- as published by the Free Software Foundation; either version 2
-- of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-- | This module provides some very generic, non-GenI specific functions on strings,
--   trees and other miscellaneous odds and ends.  Whenever possible, one should try
--   to replace these functions with versions that are available in the standard
--   libraries, or the Haskell platform ones, or on hackage.

module NLP.GenI.General (
        -- * IO
        ePutStr, ePutStrLn, eFlush,
        -- ** Strict readFile
        readFile',
        lazySlurp,
        -- ** Timeouts
        withTimeout,
        exitTimeout,
        -- * Strings
        dropTillIncluding,
        trim,
        toUpperHead, toLowerHead,
        toAlphaNum,
        -- * Triples
        fst3, snd3, thd3,
        -- * Lists
        map',
        boundsCheck,
        isEmptyIntersect,
        groupByFM,
        multiGroupByFM,
        insertToListMap,
        groupAndCount,
        combinations,
        mapMaybeM,
        repList,
        -- * Trees
        mapTree', filterTree,
        treeLeaves, preTerminals,
        repNode, repAllNode, listRepNode, repNodeByNode,
        -- * Intervals
        Interval,
        (!+!), ival, showInterval,
        -- * Bit vectors
        BitVector,
        showBitVector,
       -- * Bugs
        geniBug,
        )
        where

import Control.Monad (liftM)
import Data.Bits (shiftR, (.&.))
import Data.Char (isDigit, isSpace, toUpper, toLower)
import Data.Function ( on )
import Data.List (foldl', intersect, groupBy, group, sort)
import Data.Tree
import System.IO (hPutStrLn, hPutStr, hFlush, stderr)
import qualified Data.Map as Map

-- for timeout
import Control.Concurrent
import Control.Exception
import Data.Dynamic(Typeable, typeOf, TyCon, mkTyCon, mkTyConApp, toDyn)
import Data.Unique
import System.Exit(exitWith, ExitCode(ExitFailure))

-- for non-lazy IO
import System.IO (openFile, IOMode(ReadMode), hFileSize, hGetBuf)
import System.IO.Unsafe (unsafeInterleaveIO)
import Foreign (mallocForeignPtrBytes, withForeignPtr, ForeignPtr, Ptr, peekElemOff, plusPtr, Word8)
import Data.Char (chr)

-- ----------------------------------------------------------------------
-- IO
-- ----------------------------------------------------------------------

-- | putStr on stderr
ePutStr :: String -> IO ()
ePutStr   = hPutStr stderr

ePutStrLn :: String -> IO()
ePutStrLn = hPutStrLn stderr

eFlush :: IO()
eFlush    = hFlush stderr

-- ----------------------------------------------------------------------
-- Strings
-- ----------------------------------------------------------------------

trim :: String -> String
trim = reverse . (dropWhile isSpace) . reverse . (dropWhile isSpace) 

-- | Drop all characters up to and including the one in question
dropTillIncluding :: Char -> String -> String
dropTillIncluding c = drop 1 . (dropWhile (/= c))

-- | Make the first character of a string upper case
toUpperHead :: String -> String
toUpperHead []    = []
toUpperHead (h:t) = (toUpper h):t

-- | Make the first character of a string lower case
toLowerHead :: String -> String
toLowerHead []    = []
toLowerHead(h:t)  = (toLower h):t

-- ----------------------------------------------------------------------
-- Alphanumeric sort
-- ----------------------------------------------------------------------

-- | Intermediary type used for alphanumeric sort
data AlphaNum = A String | N Int deriving Eq

-- we don't derive this, because we want num < alpha
instance Ord AlphaNum where
 compare (A s1)  (A s2) = compare s1 s2
 compare (N s1)  (N s2) = compare s1 s2
 compare (A _)   (N _)  = GT
 compare (N _)   (A _)  = LT

-- | An alphanumeric sort is one where you treat the numbers in the string
--   as actual numbers.  An alphanumeric sort would put x2 before x100,
--   because 2 < 10, wheraeas a naive sort would put it the other way
--   around because the characters 1 < 2.  To sort alphanumerically, just
--   'sortBy (comparing toAlphaNum)'
toAlphaNum :: String -> [AlphaNum]
toAlphaNum = map readOne . groupBy ((==) `on` isDigit)
 where
   readOne s
     | all isDigit s = N (read s)
     | otherwise      = A s

-- ----------------------------------------------------------------------
-- Triples
-- ----------------------------------------------------------------------

fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x

snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x

thd3 :: (a,b,c) -> c
thd3 (_,_,x) = x

-- ----------------------------------------------------------------------
-- Lists
-- ----------------------------------------------------------------------

-- | A strict version of 'map'
map' :: (a->b) -> [a] -> [b]
map' _ [] = []
map' f (x:xs) = let a = f x in a `seq` (a:(map' f xs))

-- | Makes sure that index s is in the bounds of list l.  
--   Surely there must be some more intelligent way to deal with this.
boundsCheck :: Int -> [a] -> Bool
boundsCheck s l = s >= 0 && s < length l

-- | True if the intersection of two lists is empty.
isEmptyIntersect :: (Eq a) => [a] -> [a] -> Bool
isEmptyIntersect a b = null $ intersect a b

-- ----------------------------------------------------------------------
-- Grouping
-- ----------------------------------------------------------------------

-- | Serves the same function as 'Data.List.groupBy'.  It groups together
--   items by some property they have in common. The difference is that the
--   property is used as a key to a Map that you can lookup.
groupByFM :: (Ord b) => (a -> b) -> [a] -> (Map.Map b [a])
groupByFM fn list = 
  let addfn  x acc key = insertToListMap key x acc
      helper acc x = addfn x acc (fn x)
  in foldl' helper Map.empty list

-- | Same as 'groupByFM', except that we let an item appear in
--   multiple groups.  The fn extracts the property from the item,
--   and returns multiple results in the form of a list
multiGroupByFM :: (Ord b) => (a -> [b]) -> [a] -> (Map.Map b [a])
multiGroupByFM fn list = 
  let addfn  x acc key = insertToListMap key x acc
      helper acc x = foldl' (addfn x) acc (fn x)
  in foldl' helper Map.empty list

{-# INLINE insertToListMap #-}
insertToListMap :: (Ord b) => b -> a -> Map.Map b [a] -> Map.Map b [a]
insertToListMap k i m =
  case Map.lookup k m of
  Nothing -> Map.insert k [i] m
  Just p  -> Map.insert k (i:p) m

-- | Convert a list of items into a list of tuples (a,b) where
--   a is an item in the list and b is the number of times a
--   in occurs in the list.
groupAndCount :: (Eq a, Ord a) => [a] -> [(a, Int)]
groupAndCount xs = 
  map (\x -> (head x, length x)) grouped
  where grouped = (group.sort) xs

-- Given a list of lists, return all lists such that one item from each sublist is chosen.
-- If returns the empty list if there are any empty sublists.
combinations :: [[a]] -> [[a]]
combinations = sequence

mapMaybeM :: (Monad m) => (a -> m (Maybe b)) -> [a] -> m [b]
mapMaybeM _ [] = return []
mapMaybeM f (x:xs) =
 f x >>=
 (\my -> case my of
          Nothing -> mapMaybeM f xs
	  Just y  -> liftM (y:) (mapMaybeM f xs))

-- | Return the list, modifying only the first matching item.
repList :: (a->Bool) -> (a->a) -> [a] -> [a]
repList _ _ [] = []
repList pr fn (x:xs)
  | pr x = fn x : xs
  | otherwise = x : (repList pr fn xs)

-- ----------------------------------------------------------------------
-- Trees
-- ----------------------------------------------------------------------

-- | Strict version of 'mapTree' (for non-strict, just use fmap)
mapTree' :: (a->b) -> Tree a -> Tree b
mapTree' fn (Node a []) = let b = fn a in b `seq` Node b []
mapTree' fn (Node a l)  = let b = fn a
                              bs = map' (mapTree' fn) l
                          in b `seq` bs `seq` Node b bs

-- | Like 'filter', except on Trees.  Filter might not be a good name, though,
--   because we return a list of nodes, not a tree.
filterTree :: (a->Bool) -> Tree a -> [a]
filterTree fn = (filter fn) . flatten

-- | The leaf nodes of a Tree
treeLeaves :: Tree a -> [a]
treeLeaves (Node n []) = [n]
treeLeaves (Node _ l ) = concatMap treeLeaves l

-- | Return pairs of (parent, terminal)
preTerminals :: Tree a -> [(a,a)]
preTerminals (Node _ []) = []
preTerminals (Node x ks) =
 [ (x,y) | (Node y ys) <- ks, null ys ] ++ concatMap preTerminals ks

-- | 'repNode' @fn filt t@ returns a version of @t@ in which the first
--   node which @filt@ matches is transformed using @fn@.
repNode :: (Tree a -> Tree a) -- ^ replacement function
        -> (Tree a -> Bool)   -- ^ filtering function
        -> Tree a -> Maybe (Tree a)
repNode fn filt t =
 case listRepNode fn filt [t] of
 (_, False)   -> Nothing
 ([t2], True) -> Just t2
 _            -> geniBug "Either repNode or listRepNode are broken"

-- | Like 'repNode' except that it performs the operations on
--   all nodes that match and doesn't care if any nodes match
--   or not
repAllNode :: (Tree a -> Tree a) -> (Tree a -> Bool)
           -> Tree a -> Tree a
repAllNode fn filt n | filt n = fn n
repAllNode fn filt (Node p ks) = Node p $ map (repAllNode fn filt) ks

-- | Like 'repNode' but on a list of tree nodes
listRepNode :: (Tree a -> Tree a) -- ^ replacement function
            -> (Tree a -> Bool)   -- ^ filtering function
            -> [Tree a]           -- ^ nodes
            -> ([Tree a], Bool)
listRepNode _ _ [] = ([], False)
listRepNode fn filt (n:l2) | filt n = (fn n : l2, True)
listRepNode fn filt ((n@(Node a l1)):l2) =
  case listRepNode fn filt l1 of
  (lt1, True) -> ((Node a lt1):l2, True)
  _ -> case listRepNode fn filt l2 of
       (lt2, flag2) -> (n:lt2, flag2)

-- | Replace a node in the tree in-place with another node; keep the
--   children the same.  If the node is not found in the tree, or if
--   there are multiple instances of the node, this is treated as an
--   error.
repNodeByNode :: (a -> Bool) -- ^ which node?
              -> a -> Tree a -> Tree a
repNodeByNode nfilt rep t =
 let tfilt (Node n _) = nfilt n
     replaceFn (Node _ k) = Node rep k
 in case listRepNode replaceFn tfilt [t] of
    ([t2], True) -> t2
    (_ ,  False) -> geniBug "Node not found in repNode"
    _            -> geniBug "Unexpected result in repNode"

-- ----------------------------------------------------------------------
-- Errors
-- ----------------------------------------------------------------------

-- | errors specifically in GenI, which is very likely NOT the user's fault.
geniBug :: String -> a
geniBug s = error $ "Bug in GenI!\n" ++ s ++
                    "\nPlease file a report on http://trac.haskell.org/GenI/newticket"

-- ----------------------------------------------------------------------
-- Intervals
-- ----------------------------------------------------------------------

type Interval = (Int,Int)

-- | Add two intervals
(!+!) :: Interval -> Interval -> Interval
(!+!) (a1,a2) (b1,b2) = (a1+b1, a2+b2)

-- | 'ival' @x@ builds a trivial interval from 'x' to 'x'
ival :: Int -> Interval
ival i = (i,i)

showInterval :: Interval -> String
showInterval (x,y) =
 let sign i = if i > 0 then "+" else ""
     --
 in if (x==y) 
    then (sign x) ++ (show x) 
    else show (x,y)

-- ----------------------------------------------------------------------
-- Bit vectors
-- ----------------------------------------------------------------------

type BitVector = Integer

-- | displays a bit vector, using a minimum number of bits
showBitVector :: Int -> BitVector -> String
showBitVector min_ 0 = replicate min_ '0'
showBitVector min_ x = showBitVector (min_ - 1) (shiftR x 1) ++ (show $ x .&. 1)

-- ----------------------------------------------------------------------
-- Strict readfile
-- Simon Marlow wrote this code on the Haskell mailing list 2005-08-02.
-- ----------------------------------------------------------------------

-- | Using readFile' can be a good idea if you're dealing with not-so-huge
-- files (i.e. where you don't want lazy evaluation), because it ensures
-- that the handles are closed. No more ``too many open files''
readFile' :: FilePath -> IO String
readFile' f = do
  h <- openFile f ReadMode
  s <- hFileSize h
  fp <- mallocForeignPtrBytes (fromIntegral s)
  len <- withForeignPtr fp $ \buf -> hGetBuf h buf (fromIntegral s)
  lazySlurp fp 0 len

buf_size :: Int
buf_size = 4096 :: Int

lazySlurp :: ForeignPtr Word8 -> Int -> Int -> IO String
lazySlurp fp ix len
  | fp `seq` False = undefined
  | ix >= len = return []
  | otherwise = do
      cs <- unsafeInterleaveIO (lazySlurp fp (ix + buf_size) len)
      ws <- withForeignPtr fp $ \p -> loop (min (len-ix) buf_size - 1)
					((p :: Ptr Word8) `plusPtr` ix) cs
      return ws
 where
  loop :: Int -> Ptr Word8 -> String -> IO String
  loop sublen p acc
    | sublen `seq` p `seq` False = undefined
    | sublen < 0 = return acc
    | otherwise = do
       w <- peekElemOff p sublen
       loop (sublen-1) p (chr (fromIntegral w):acc)

-- ----------------------------------------------------------------------
-- Timeouts
-- ----------------------------------------------------------------------

data TimeOut = TimeOut Unique

timeOutTc :: TyCon
timeOutTc = mkTyCon "TimeOut"

instance Typeable TimeOut where
    typeOf _ = mkTyConApp timeOutTc []

withTimeout :: Integer
            -> IO a -- ^ action to run upon timing out
            -> IO a -- ^ main action to run
            -> IO a
withTimeout secs on_timeout action =
 do parent  <- myThreadId
    i       <- newUnique
    block $ do
      timeout <- forkIO (timeout_thread secs parent i)
      Control.Exception.catchDyn
        ( unblock $ do result <- action
                       killThread timeout
                       return result )
        ( \ex -> case ex of
                 TimeOut u | u == i -> unblock on_timeout
                 _ -> killThread timeout >>= throwDyn ex )
 where
  timeout_thread secs_ parent i =
   do threadDelay $ (fromInteger secs_) * 1000000
      throwTo parent (DynException $ toDyn $ TimeOut i)

-- | Like 'exitFailure', except that we return with a code that we reserve for timing out
exitTimeout :: IO ()
exitTimeout = exitWith $ ExitFailure 2