{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}

#if (__GLASGOW_HASKELL__ >= 802)
{-# LANGUAGE DerivingStrategies #-}
#endif

--------------------------------------------------------------------------------
--  See end of this file for licence information.
--------------------------------------------------------------------------------
-- |
--  Module      :  QName
--  Copyright   :  (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011, 2012, 2013, 2020, 2022 Douglas Burke
--  License     :  GPL V2
--
--  Maintainer  :  Douglas Burke
--  Stability   :  experimental
--  Portability :  CPP, DerivingStrategies, OverloadedStrings
--
--  This module defines an algebraic datatype for qualified names (QNames),
--  which represents a 'URI' as the combination of a namespace 'URI'
--  and a local component ('LName'), which can be empty.
--
--  Although RDF supports using IRIs, the use of 'URI' here precludes this,
--  which means that, for instance, 'LName' only accepts a subset of valid
--  characters. There is currently no attempt to convert from an IRI into a URI.
--
--------------------------------------------------------------------------------

-- At present we support using URI references rather than forcing an absolute
-- URI. This is partly to support the existing tests (too lazy to resolve whether
-- the tests really should be using relative URIs in this case).

module Swish.QName
    ( QName
    , LName
    , emptyLName
    , newLName
    , getLName
    , newQName
    , qnameFromURI
    , getNamespace
    , getLocalName
    , getQNameURI
    , qnameFromFilePath
    )
    where

import Data.Char (isAscii)
import Data.Maybe (fromMaybe)
import Data.Interned (intern, unintern)
import Data.Interned.URI (InternedURI)
import Data.Ord (comparing)
import Data.String (IsString(..))

import Network.URI (URI(..), URIAuth(..), parseURIReference)

import System.Directory (canonicalizePath)
import System.FilePath (splitFileName)

import qualified Data.Text as T

------------------------------------------------------------
--  Qualified name
------------------------------------------------------------
--
--  These are RDF QNames rather than XML ones (as much as
--  RDF can claim to have them).
--


{-| A local name, which can be empty.

At present, the local name can not contain a space character and can only
contain ascii characters (those that match 'Data.Char.isAscii').

In version @0.9.0.3@ and earlier, the following characters were not
allowed in local names: \'#\', \':\', or \'/\' characters.

This is all rather experimental.
-}
newtype LName = LName T.Text
    deriving
#if (__GLASGOW_HASKELL__ >= 802)
      stock
#endif
      (LName -> LName -> Bool
(LName -> LName -> Bool) -> (LName -> LName -> Bool) -> Eq LName
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LName -> LName -> Bool
== :: LName -> LName -> Bool
$c/= :: LName -> LName -> Bool
/= :: LName -> LName -> Bool
Eq, Eq LName
Eq LName =>
(LName -> LName -> Ordering)
-> (LName -> LName -> Bool)
-> (LName -> LName -> Bool)
-> (LName -> LName -> Bool)
-> (LName -> LName -> Bool)
-> (LName -> LName -> LName)
-> (LName -> LName -> LName)
-> Ord LName
LName -> LName -> Bool
LName -> LName -> Ordering
LName -> LName -> LName
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: LName -> LName -> Ordering
compare :: LName -> LName -> Ordering
$c< :: LName -> LName -> Bool
< :: LName -> LName -> Bool
$c<= :: LName -> LName -> Bool
<= :: LName -> LName -> Bool
$c> :: LName -> LName -> Bool
> :: LName -> LName -> Bool
$c>= :: LName -> LName -> Bool
>= :: LName -> LName -> Bool
$cmax :: LName -> LName -> LName
max :: LName -> LName -> LName
$cmin :: LName -> LName -> LName
min :: LName -> LName -> LName
Ord)

instance Show LName where
    show :: LName -> [Char]
show (LName Text
t) = Text -> [Char]
forall a. Show a => a -> [Char]
show Text
t

-- | This is not total since attempting to convert a string
--   containing invalid characters will cause an error.
instance IsString LName where
    fromString :: [Char] -> LName
fromString [Char]
s = 
        LName -> Maybe LName -> LName
forall a. a -> Maybe a -> a
fromMaybe ([Char] -> LName
forall a. HasCallStack => [Char] -> a
error ([Char]
"Invalid local name: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
s)) (Maybe LName -> LName) -> Maybe LName -> LName
forall a b. (a -> b) -> a -> b
$
                  Text -> Maybe LName
newLName ([Char] -> Text
T.pack [Char]
s)

-- | The empty local name.
emptyLName :: LName
emptyLName :: LName
emptyLName = Text -> LName
LName Text
""

-- | Create a local name.
newLName :: T.Text -> Maybe LName
-- newLName l = if T.any (`elem` " #:/") l then Nothing else Just (LName l) -- 0.7.0.1 and earlier
-- newLName l = if T.any (\c -> c `elem` " #:/" || not (isAscii c)) l then Nothing else Just (LName l) -- 0.9.0.3 and earlier
newLName :: Text -> Maybe LName
newLName Text
l = if (Char -> Bool) -> Text -> Bool
T.any (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Bool -> Bool
not (Char -> Bool
isAscii Char
c)) Text
l then Maybe LName
forall a. Maybe a
Nothing else LName -> Maybe LName
forall a. a -> Maybe a
Just (Text -> LName
LName Text
l)

-- | Extract the local name.
getLName :: LName -> T.Text
getLName :: LName -> Text
getLName (LName Text
l) = Text
l

{-| 

A qualified name, consisting of a namespace URI
and the local part of the identifier, which can be empty.
The serialisation of a QName is formed by concatanating the
two components.

> Prelude> :set prompt "swish> "
> swish> :set -XOverloadedStrings
> swish> :m + Swish.QName
> swish> let qn1 = "http://example.com/" :: QName
> swish> let qn2 = "http://example.com/bob" :: QName
> swish> let qn3 = "http://example.com/bob/fred" :: QName
> swish> let qn4 = "http://example.com/bob/fred#x" :: QName
> swish> let qn5 = "http://example.com/bob/fred:joe" :: QName
> swish> map getLocalName [qn1, qn2, qn3, qn4, qn5]
> ["","bob","fred","x","fred:joe"]
> swish> getNamespace qn1
> http://example.com/
> swish> getNamespace qn2
> http://example.com/
> swish> getNamespace qn3
> http://example.com/bob/
> swish> getNamespace qn4
> http://example.com/bob/fred#

-}

{-
For now I have added in storing the actual URI
as well as the namespace component. This may or
may not be a good idea (space vs time saving).
-}

data QName = QName !InternedURI URI LName

-- | This is not total since it will fail if the input string is not a valid URI.
instance IsString QName where
  fromString :: [Char] -> QName
fromString [Char]
s = 
      QName -> Maybe QName -> QName
forall a. a -> Maybe a -> a
fromMaybe ([Char] -> QName
forall a. HasCallStack => [Char] -> a
error ([Char]
"QName conversion given an invalid URI: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
s))
      ([Char] -> Maybe URI
parseURIReference [Char]
s Maybe URI -> (URI -> Maybe QName) -> Maybe QName
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= URI -> Maybe QName
qnameFromURI)

-- | Equality is determined by a case sensitive comparison of the               
-- URI.
instance Eq QName where
    QName
u1 == :: QName -> QName -> Bool
== QName
u2 = QName -> URI
getQNameURI QName
u1 URI -> URI -> Bool
forall a. Eq a => a -> a -> Bool
== QName -> URI
getQNameURI QName
u2

-- | In @0.8.0.0@ the ordering now uses the ordering defined in
--   "Network.URI.Ord" rather than the @Show@
--   instance. This should make no difference unless a password
--   was included in the URI when using basic access authorization.
--
instance Ord QName where
    compare :: QName -> QName -> Ordering
compare = (QName -> URI) -> QName -> QName -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing QName -> URI
getQNameURI
  
-- | The format used to display the URI is @\<uri\>@, and does not
--   include the password if using basic access authorization.
instance Show QName where
    show :: QName -> [Char]
show (QName InternedURI
u URI
_ LName
_) = [Char]
"<" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ InternedURI -> [Char]
forall a. Show a => a -> [Char]
show InternedURI
u [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
">"

{-
The assumption in QName is that the validation done in creating
the local name is sufficient to ensure that the combined 
URI is syntactically valid. Is this true?
-}

-- | Create a new qualified name with an explicit local component.
--
newQName ::
    URI            -- ^ Namespace
    -> LName       -- ^ Local component
    -> QName
newQName :: URI -> LName -> QName
newQName URI
ns l :: LName
l@(LName Text
local) = 
  -- Until profiling shows that this is a time/space issue, we use
  -- the following code rather than trying to deconstruct the URI
  -- directly
  let lstr :: [Char]
lstr   = Text -> [Char]
T.unpack Text
local
      uristr :: [Char]
uristr = URI -> [Char]
forall a. Show a => a -> [Char]
show URI
ns [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
lstr
  in case [Char] -> Maybe URI
parseURIReference [Char]
uristr of
       Just URI
uri -> InternedURI -> URI -> LName -> QName
QName (Uninterned InternedURI -> InternedURI
forall t. Interned t => Uninterned t -> t
intern Uninterned InternedURI
URI
uri) URI
ns LName
l
       Maybe URI
_ -> [Char] -> QName
forall a. HasCallStack => [Char] -> a
error ([Char] -> QName) -> [Char] -> QName
forall a b. (a -> b) -> a -> b
$ [Char]
"Unable to combine " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ URI -> [Char]
forall a. Show a => a -> [Char]
show URI
ns [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" with " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
lstr
  
{-

old behavior

 splitQname "http://example.org/aaa#bbb" = ("http://example.org/aaa#","bbb")
 splitQname "http://example.org/aaa/bbb" = ("http://example.org/aaa/","bbb")
 splitQname "http://example.org/aaa/"    = ("http://example.org/aaa/","")

Should "urn:foo:bar" have a local name of "" or "foo:bar"? For now go
with the first option.

-}

-- | Create a new qualified name.
qnameFromURI :: 
    URI      -- ^ The URI will be deconstructed to find if it contains a local component.
    -> Maybe QName -- ^ The failure case may be removed.
qnameFromURI :: URI -> Maybe QName
qnameFromURI URI
uri =
  let uf :: [Char]
uf = URI -> [Char]
uriFragment URI
uri
      up :: [Char]
up = URI -> [Char]
uriPath URI
uri
      q0 :: Maybe QName
q0 = QName -> Maybe QName
forall a. a -> Maybe a
Just (QName -> Maybe QName) -> QName -> Maybe QName
forall a b. (a -> b) -> a -> b
$ URI -> LName -> QName
start URI
uri LName
emptyLName
      start :: URI -> LName -> QName
start = InternedURI -> URI -> LName -> QName
QName (Uninterned InternedURI -> InternedURI
forall t. Interned t => Uninterned t -> t
intern Uninterned InternedURI
URI
uri)
  in case [Char]
uf of
       [Char]
"#"    -> Maybe QName
q0
       Char
'#':[Char]
xs -> URI -> LName -> QName
start (URI
uri {uriFragment = "#"}) (LName -> QName) -> Maybe LName -> Maybe QName
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Text -> Maybe LName
newLName ([Char] -> Text
T.pack [Char]
xs)
       [Char]
""     -> case (Char -> Bool) -> [Char] -> ([Char], [Char])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/') (ShowS
forall a. [a] -> [a]
reverse [Char]
up) of
                   ([Char]
"",[Char]
_) -> Maybe QName
q0 -- path ends in / or is empty
                   ([Char]
_,[Char]
"") -> Maybe QName
q0 -- path contains no /
                   ([Char]
rlname,[Char]
rpath) -> 
                       URI -> LName -> QName
start (URI
uri {uriPath = reverse rpath}) (LName -> QName) -> Maybe LName -> Maybe QName
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` 
                       Text -> Maybe LName
newLName ([Char] -> Text
T.pack (ShowS
forall a. [a] -> [a]
reverse [Char]
rlname))

       -- e -> error $ "Unexpected: uri=" ++ show uri ++ " has fragment='" ++ show e ++ "'" 
       [Char]
_ -> Maybe QName
forall a. Maybe a
Nothing

-- | Return the URI of the namespace stored in the QName.
-- This does not contain the local component.
--
getNamespace :: QName -> URI
getNamespace :: QName -> URI
getNamespace (QName InternedURI
_ URI
ns LName
_) = URI
ns

-- | Return the local component of the QName.
getLocalName :: QName -> LName
getLocalName :: QName -> LName
getLocalName (QName InternedURI
_ URI
_ LName
l) = LName
l

-- | Returns the full URI of the QName (ie the combination of the
-- namespace and local components).
getQNameURI :: QName -> URI
getQNameURI :: QName -> URI
getQNameURI (QName InternedURI
u URI
_ LName
_) = InternedURI -> Uninterned InternedURI
forall t. Uninternable t => t -> Uninterned t
unintern InternedURI
u

{-|
Convert a filepath to a file: URI stored in a QName. If the
input file path is relative then the current working directory is used
to convert it into an absolute path.

If the input represents a directory then it *must* end in 
the directory separator - so for Posix systems use 
@\"\/foo\/bar\/\"@ rather than 
@\"\/foo\/bar\"@.

This has not been tested on Windows.
-}

{-
NOTE: not sure why I say directories should end in the path
seperator since

ghci> System.Directory.canonicalizePath "/Users/dburke/haskell/swish-text"
"/Users/dburke/haskell/swish-text"
ghci> System.Directory.canonicalizePath "/Users/dburke/haskell/swish-text/"
"/Users/dburke/haskell/swish-text"

-}

qnameFromFilePath :: FilePath -> IO QName
qnameFromFilePath :: [Char] -> IO QName
qnameFromFilePath [Char]
fname = do
  [Char]
ipath <- [Char] -> IO [Char]
canonicalizePath [Char]
fname
  let ([Char]
dname, [Char]
lname) = [Char] -> ([Char], [Char])
splitFileName [Char]
ipath
      nsuri :: URI
nsuri = [Char] -> Maybe URIAuth -> [Char] -> [Char] -> [Char] -> URI
URI [Char]
"file:" Maybe URIAuth
emptyAuth [Char]
dname [Char]
"" [Char]
""
      uri :: URI
uri = [Char] -> Maybe URIAuth -> [Char] -> [Char] -> [Char] -> URI
URI [Char]
"file:" Maybe URIAuth
emptyAuth [Char]
ipath [Char]
"" [Char]
""
  case [Char]
lname of
    [Char]
"" -> QName -> IO QName
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> IO QName) -> QName -> IO QName
forall a b. (a -> b) -> a -> b
$ InternedURI -> URI -> LName -> QName
QName (Uninterned InternedURI -> InternedURI
forall t. Interned t => Uninterned t -> t
intern Uninterned InternedURI
URI
nsuri) URI
nsuri LName
emptyLName
    [Char]
_  -> QName -> IO QName
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> IO QName) -> QName -> IO QName
forall a b. (a -> b) -> a -> b
$ InternedURI -> URI -> LName -> QName
QName (Uninterned InternedURI -> InternedURI
forall t. Interned t => Uninterned t -> t
intern Uninterned InternedURI
URI
uri) URI
nsuri (Text -> LName
LName ([Char] -> Text
T.pack [Char]
lname))

emptyAuth :: Maybe URIAuth
emptyAuth :: Maybe URIAuth
emptyAuth = URIAuth -> Maybe URIAuth
forall a. a -> Maybe a
Just (URIAuth -> Maybe URIAuth) -> URIAuth -> Maybe URIAuth
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> URIAuth
URIAuth [Char]
"" [Char]
"" [Char]
""

--------------------------------------------------------------------------------
--
--  Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin,
--    2011, 2012, 2013, 2020, 2022 Douglas Burke
--  All rights reserved.
--
--  This file is part of Swish.
--
--  Swish is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  Swish is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--  You should have received a copy of the GNU General Public License
--  along with Swish; if not, write to:
--    The Free Software Foundation, Inc.,
--    59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--
--------------------------------------------------------------------------------