% GenI surface realiser
% Copyright (C) 2005 Carlos Areces and Eric Kow
%
% This program is free software; you can redistribute it and/or
% modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation; either version 2
% of the License, or (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program; if not, write to the Free Software
% Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

\chapter{General}

This module provides some very generic, non-Geni specific functions on strings,
trees and other miscellaneous odds and ends.

\begin{code}
module NLP.GenI.General
where
\end{code}

\ignore{
\begin{code}
import Control.Monad (liftM)
import Data.Bits (shiftR, (.&.))
import Data.Char (isSpace, toUpper, toLower)
import Data.List (intersect, groupBy, group, sort, intersperse)
import Data.Tree
import System.IO (hPutStrLn, hPutStr, hFlush, stderr)
import qualified Data.Map as Map

-- for timeout
import Control.Concurrent
import Control.Exception
import Data.Dynamic(Typeable, typeOf, TyCon, mkTyCon, mkTyConApp, toDyn)
import Data.Unique
import System.Exit(exitWith, ExitCode(ExitFailure))

-- for non-lazy IO
import System.IO (openFile, IOMode(ReadMode), hFileSize, hGetBuf)
import System.IO.Unsafe (unsafeInterleaveIO)
import Foreign (mallocForeignPtrBytes, withForeignPtr, ForeignPtr, Ptr, peekElemOff, plusPtr, Word8)
import Data.Char (chr)
\end{code}
}

\section{IO}

\fnlabel{stderr} PutStr and PutStrLn an stderr
\begin{code}
ePutStr, ePutStrLn :: String -> IO()
ePutStr   = hPutStr stderr
ePutStrLn = hPutStrLn stderr

eFlush :: IO()
eFlush    = hFlush stderr
\end{code}

\section{Strings}

Haskell seems to be missing a string library.  Here are some functions I had to
implement.

\begin{code}
trim :: String -> String
trim = reverse . (dropWhile isSpace) . reverse . (dropWhile isSpace) 

-- | Drop all characters up to and including the one in question
dropTillIncluding :: Char -> String -> String
dropTillIncluding c = tail_ . (dropWhile (/= c))
\end{code}

\fnlabel{toUpperHead and toLowerHead} make the first character of a
string upper and lower case, respectively.  

\begin{code}
toUpperHead, toLowerHead :: String -> String
toUpperHead []    = []
toUpperHead (h:t) = (toUpper h):t
toLowerHead []    = []
toLowerHead(h:t)  = (toLower h):t
\end{code}

An alphanumeric sort is one where you treat the numbers in the string
as actual numbers.  An alphanumeric sort would put x2 before x100,
because 2 < 10, wheraeas a naive sort would put it the other way
around because the characters 1 < 2.  To sort alphanumerically, just
use \fnreflite{toAlphanum} as in \verb!sortBy (comparing toAlphanum)!

\begin{code}
data AlphaNum = A String | N Int deriving Eq

-- we don't derive this, because we want num < alpha
instance Ord AlphaNum where
 compare (A s1)  (A s2) = compare s1 s2
 compare (N s1)  (N s2) = compare s1 s2
 compare (A _)   (N _)  = GT
 compare (N _)   (A _)  = LT

toAlphaNum :: String -> [AlphaNum]
toAlphaNum = map readOne . groupBy (equating isNumber)
 where
   readOne s
     | all isNumber s = N (read s)
     | otherwise      = A s
\end{code}

\begin{code}
-- Crappy version until GHC 6.6 becomes the norm
isNumber :: Char -> Bool
isNumber c = c `elem` "1234567890"
\end{code}

\section{Three-tuples}

\begin{code}
fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x

snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x

thd3 :: (a,b,c) -> c
thd3 (_,_,x) = x
\end{code}

\section{Lists}

\begin{code}
equating :: Eq b => (a -> b) -> (a -> a -> Bool)
equating f a b = f a == f b

comparing :: Ord b => (a -> b) -> (a -> a -> Ordering)
comparing f a b = compare (f a) (f b)
\end{code}

\begin{code}
-- | A forgiving version of tail : if you give it the empty list, it returns the empty list
tail_ :: [a] -> [a]
tail_ [] = []
tail_ (_:t) = t

-- | A strict version of 'map'
map' :: (a->b) -> [a] -> [b]
map' _ [] = []
map' f (x:xs) = let a = f x in a `seq` (a:(map' f xs))
\end{code}

A generic version of the Data.List.words

\begin{code}
wordsBy :: (Eq a) => a -> [a] -> [[a]]
wordsBy c xs = filter (/= [c]) $ groupBy (\x y -> x /= c && y /= c) xs

-- | Makes sure that index s is in the bounds of list l.  
--   Surely there must be some more intelligent way to deal with this.
boundsCheck :: Int -> [a] -> Bool
boundsCheck s l = s >= 0 && s < length l
\end{code}

\fnlabel{isEmptyIntersect} is true if the intersection of two lists is
empty.

\begin{code}
isEmptyIntersect :: (Eq a) => [a] -> [a] -> Bool
isEmptyIntersect a b = null $ intersect a b
\end{code}

\paragraph{Grouping things together}

\begin{code}
-- | Serves the same function as 'Data.List.groupBy'.  It groups together
--   items by some property they have in common. The difference is that the
--   property is used as a key to a Map that you can lookup.
groupByFM :: (Ord b) => (a -> b) -> [a] -> (Map.Map b [a])
groupByFM fn list = 
  let addfn  x acc key = insertToListMap key x acc
      helper acc x = addfn x acc (fn x)
  in foldl helper Map.empty list

-- | Same as 'groupByFM', except that we let an item appear in
--   multiple groups.  The fn extracts the property from the item,
--   and returns multiple results in the form of a list
multiGroupByFM :: (Ord b) => (a -> [b]) -> [a] -> (Map.Map b [a])
multiGroupByFM fn list = 
  let addfn  x acc key = insertToListMap key x acc
      helper acc x = foldl (addfn x) acc (fn x)
  in foldl helper Map.empty list

{-# INLINE insertToListMap #-}
insertToListMap :: (Ord b) => b -> a -> Map.Map b [a] -> Map.Map b [a]
insertToListMap k i m =
  case Map.lookup k m of
  Nothing -> Map.insert k [i] m
  Just p  -> Map.insert k (i:p) m
\end{code}

\fnlabel{groupAndCount} is a generic list-processing function.
It converts a list of items into a list of tuples (a,b) where 
a is an item in the list and b is the number of times a in occurs 
in the list.

\begin{code}
groupAndCount :: (Eq a, Ord a) => [a] -> [(a, Int)]
groupAndCount xs = 
  map (\x -> (head x, length x)) grouped
  where grouped = (group.sort) xs
\end{code}

\begin{code}
-- Given a list of lists, return all lists such that one item from each sublist is chosen.
-- If returns the empty list if there are any empty sublists.
combinations :: [[a]] -> [[a]]
combinations = sequence
\end{code}

\begin{code}
mapMaybeM :: (Monad m) => (a -> m (Maybe b)) -> [a] -> m [b]
mapMaybeM _ [] = return []
mapMaybeM f (x:xs) =
 f x >>=
 (\my -> case my of
          Nothing -> mapMaybeM f xs
	  Just y  -> liftM (y:) (mapMaybeM f xs))
\end{code}

\begin{code}
-- | Return the list, modifying only the first matching item.
repList :: (a->Bool) -> (a->a) -> [a] -> [a]
repList _ _ [] = []
repList pr fn (x:xs)
  | pr x = fn x : xs
  | otherwise = x : (repList pr fn xs)
\end{code}

\section{Trees}

\begin{code}
-- | Strict version of 'mapTree' (for non-strict, just use fmap)
mapTree' :: (a->b) -> Tree a -> Tree b
mapTree' fn (Node a []) = let b = fn a in b `seq` Node b []
mapTree' fn (Node a l)  = let b = fn a
                              bs = map' (mapTree' fn) l
                          in b `seq` bs `seq` Node b bs
\end{code}

\fnlabel{filterTree} is like filter, except on Trees.  Filter 
might not be a good name, though, because we return a list of 
nodes, not a tree.

\begin{code}
filterTree :: (a->Bool) -> Tree a -> [a]
filterTree fn = (filter fn) . flatten
\end{code}

\fnlabel{treeLeaves} returns the leaf nodes of a Tree.

\begin{code}
treeLeaves :: Tree a -> [a]
treeLeaves (Node n []) = [n]
treeLeaves (Node _ l ) = concatMap treeLeaves l

-- | Return pairs of (parent, terminal)
preTerminals :: Tree a -> [(a,a)]
preTerminals (Node _ []) = []
preTerminals (Node x ks) =
 [ (x,y) | (Node y ys) <- ks, null ys ] ++ concatMap preTerminals ks
\end{code}

\paragraph{repNode and listRepNode} are a generic tree-walking/editing
function and its equivalent for lists of trees.  It
takes a replacement function, a filtering function and a tree.  It
returns the tree, except that the first node for which the filtering
function returns True is transformed with the replacement function.

\begin{code}
repNode :: (Tree a -> Tree a) -> (Tree a -> Bool)
        -> Tree a -> Maybe (Tree a)
repNode fn filt t =
 case listRepNode fn filt [t] of
 (_, False)   -> Nothing
 ([t2], True) -> Just t2
 _            -> geniBug "Either repNode or listRepNode are broken"

-- | Like 'repNode' except that it performs the operations on
--   all nodes that match and doesn't care if any nodes match
--   or not
repAllNode :: (Tree a -> Tree a) -> (Tree a -> Bool)
           -> Tree a -> Tree a
repAllNode fn filt n | filt n = fn n
repAllNode fn filt (Node p ks) = Node p $ map (repAllNode fn filt) ks

listRepNode :: (Tree a -> Tree a) -> (Tree a -> Bool) 
              -> [Tree a] -> ([Tree a], Bool)
listRepNode _ _ [] = ([], False)
listRepNode fn filt (n:l2) | filt n = (fn n : l2, True)
listRepNode fn filt ((n@(Node a l1)):l2) =
  case listRepNode fn filt l1 of
  (lt1, True) -> ((Node a lt1):l2, True)
  _ -> case listRepNode fn filt l2 of
       (lt2, flag2) -> (n:lt2, flag2)

-- | Replace a node in the tree in-place with another node; keep the
--   children the same.  If the node is not found in the tree, or if
--   there are multiple instances of the node, this is treated as an
--   error.
repNodeByNode :: (a -> Bool) -- ^ which node?
              -> a -> Tree a -> Tree a
repNodeByNode nfilt rep t =
 let tfilt (Node n _) = nfilt n
     replaceFn (Node _ k) = Node rep k
 in case listRepNode replaceFn tfilt [t] of
    ([t2], True) -> t2
    (_ ,  False) -> geniBug "Node not found in repNode"
    _            -> geniBug "Unexpected result in repNode"
\end{code}

\section{Errors}

\begin{code}
-- | errors specifically in GenI, which is very likely NOT the user's fault.
geniBug :: String -> a
geniBug s = error $ "Bug in GenI!\n" ++ s ++
                    "\nPlease file a report on http://wiki.loria.fr/wiki/GenI/Complaints" 
\end{code}

\section{Files}

\begin{code}
basename :: FilePath -> FilePath
basename x
 | '.' `elem` x = reverse . tail . dropWhile (/= '.') . reverse $ x
 | otherwise    = x

-- from darcs
(///) :: FilePath -> FilePath -> FilePath
""///b = b
a///"" = a
a///b  = a ++ "/" ++ b
\end{code}

\section{Intervals}

We represent polarities as intervals $(x,y)$, meaning from $x$ to $y$ including
both $x$ and $y$.

\begin{code}
type Interval = (Int,Int)
\end{code}

\fnlabel{(!+!)} adds two intervals

\begin{code}
(!+!) :: Interval -> Interval -> Interval
(!+!) (a1,a2) (b1,b2) = (a1+b1, a2+b2)
\end{code}

\fnlabel{ival} builds a (trivial) interval from $x$ to $x$
\begin{code}
ival :: Int -> Interval
ival i = (i,i)
\end{code}

Hmm... do i really feel like instantiating a datatype for this?

\begin{code}
showInterval :: Interval -> String
showInterval (x,y) =
 let sign i = if i > 0 then "+" else ""
     --
 in if (x==y) 
    then (sign x) ++ (show x) 
    else show (x,y)
\end{code}

\section{Other}

\begin{code}
type BitVector = Integer

-- | displays a bit vector, using a minimum number of bits
showBitVector :: Int -> BitVector -> String
showBitVector min_ 0 = take min_ $ repeat '0'
showBitVector min_ x = showBitVector (min_ - 1) (shiftR x 1) ++ (show $ x .&. 1)
\end{code}

\paragraph{showTable} pretty-prints an ASCII table from a list of items.
More precisely, it builds this from 
\begin{enumerate}
\item \fnparam{header} a list of headers, 
\item \fnparam{items}  a list of items and
\item \fnparam{displayfn} which converts the items to list of pretty-printed strings.
\end{enumerate}
Each item corresponds to a row.  The list returned by \fnparam{displayfn} ought
to be the same length as \fnparam{header}, since each item in the list
corresponds to a column.  Note that this function tries to make the table
pretty by padding each column to be same length as the header 
(so to adjust the size of columns, just pad the header with spaces).

\begin{code}
showTable :: [String] -> [a] -> (a -> [String]) -> String
showTable header items displayfn = 
  let showIt l = concat $ intersperse " | " $ l
      showLine = concat $ intersperse "-+-" $ map linestr header
      resStr r = zipWith pad (displayfn r) header
      -- a list of "-" with the same length as l 
      linestr str2 = map (const '-') str2
      -- pad str to be as long as str2
      pad str str2 = if (diff > 0) then padding ++ str else str
                     where padding = map (const ' ') [1..diff]
                           diff = (length str2) - (length str)   
      --
      headerStr = showIt header ++ "\n" ++ showLine ++ "\n" 
      bodyStr   = concat $ intersperse "\n" $ map (showIt.resStr) items 
  in headerStr ++ bodyStr
\end{code}

\section{Non-lazy IO}

Simon Marlow wrote this code on the Haskell mailing list 2005-08-02.
Using readFile' can be a good idea if you're dealing with not-so-huge
files (i.e. where you don't want lazy evaluation), because it ensures
that the handles are closed. No more ``too many open files''

\begin{code}
readFile' :: FilePath -> IO String
readFile' f = do
  h <- openFile f ReadMode
  s <- hFileSize h
  fp <- mallocForeignPtrBytes (fromIntegral s)
  len <- withForeignPtr fp $ \buf -> hGetBuf h buf (fromIntegral s)
  lazySlurp fp 0 len

buf_size :: Int
buf_size = 4096 :: Int

lazySlurp :: ForeignPtr Word8 -> Int -> Int -> IO String
lazySlurp fp ix len
  | fp `seq` False = undefined
  | ix >= len = return []
  | otherwise = do
      cs <- unsafeInterleaveIO (lazySlurp fp (ix + buf_size) len)
      ws <- withForeignPtr fp $ \p -> loop (min (len-ix) buf_size - 1)
					((p :: Ptr Word8) `plusPtr` ix) cs
      return ws
 where
  loop :: Int -> Ptr Word8 -> String -> IO String
  loop sublen p acc
    | sublen `seq` p `seq` False = undefined
    | sublen < 0 = return acc
    | otherwise = do
       w <- peekElemOff p sublen
       loop (sublen-1) p (chr (fromIntegral w):acc)
\end{code}

\section{Timeouts}

This code is taken directly from HyLoRes.  I have just reformatted it to be a bit more
readable (in my eyes).

\begin{code}
data TimeOut = TimeOut Unique

timeOutTc :: TyCon
timeOutTc = mkTyCon "TimeOut"

instance Typeable TimeOut where
    typeOf _ = mkTyConApp timeOutTc []

withTimeout :: Integer
            -> IO a -- ^ action to run upon timing out
            -> IO a -- ^ main action to run
            -> IO a
withTimeout secs on_timeout action =
 do parent  <- myThreadId
    i       <- newUnique
    block $ do
      timeout <- forkIO (timeout_thread secs parent i)
      Control.Exception.catchDyn
        ( unblock $ do result <- action
                       killThread timeout
                       return result )
        ( \ex -> case ex of
                 TimeOut u | u == i -> unblock on_timeout
                 _ -> killThread timeout >>= throwDyn ex )
 where
  timeout_thread secs_ parent i =
   do threadDelay $ (fromInteger secs_) * 1000000
      throwTo parent (DynException $ toDyn $ TimeOut i)

-- | Like 'exitFailure', except that we return with a code that we reserve for timing out
exitTimeout :: IO ()
exitTimeout = exitWith $ ExitFailure 2
\end{code}