{-# LANGUAGE ForeignFunctionInterface
  #-}


{-| A binding for the native 'wcwidth'. It's important that you 'setLocale'
    before using it, like this:

 >  #!/usr/bin/env runhaskell
 >
 >  import Text.Printf
 >  
 >  import System.Locale.SetLocale
 >  import Data.Char.WCWidth
 >
 >  main                     =  do
 >    setLocale LC_ALL (Just "")
 >    sequence_ [ display c | c <- chars ]
 >   where
 >    chars                  =  [minBound..'A']
 >    display c = printf "%04x  %2d  %s\n" (fromEnum c) (wcwidth c) (show c)

    The program file @WCWidthTableaux.hs@ contains a more extensive example of
    using 'wcwidth'.

    Note that this binding to the native implementation gets certain
    characters wrong in obvious ways as well as ways that are problematic for
    indentation based languages. The ASCII tab should be assigned a width of
    8, not -1; and one is likely to find -1 assigned to  numerous obscure
    characters (for example, symbols from the Book of Changes).

 -}


module Data.Char.WCWidth
  ( wcwidth
  , widths
  , ranges
  ) where

import Foreign.C
import Data.List




{-| Widths of all characters. 
 -}
widths                      ::  [ (Char, Int) ]
widths                       =  [ (c, wcwidth c) | c <- [minBound..maxBound] ] 


{-| Characters broken into contiguous ranges with the same width.
 -}
ranges                      ::  [ ((Char, Char), Int) ]
ranges                       =  reverse (foldl' aggregate start (tail widths))
 where
  start                      =  aggregate [] (head widths)
  aggregate [] (c, w)        =  [((c, c), w)]
  aggregate (((a, z), i) : t) (c, w)
    | i == w                 =  ((a, c), i) : t
    | otherwise              =  ((c, c), w) : ((a, z), i) : t


{-| Binding to the native 'wcwidth'. 
 -}
wcwidth                     ::  Char -> Int
wcwidth                      =  fromEnum . native . toEnum . fromEnum


foreign import ccall unsafe "wchar.h wcwidth" native :: CWchar -> CInt