{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-|
'Source' models source code, represented as a thin wrapper around a 'B.ByteString' with conveniences for splitting by line, slicing, etc.

This module is intended to be imported qualified to avoid name clashes with 'Prelude':

> import qualified Source.Source as Source
-}
module Source.Source
( Source
, bytes
, fromUTF8
-- * Measurement
, Source.Source.length
, Source.Source.null
, totalRange
, totalSpan
-- * En/decoding
, fromText
, toText
-- * Slicing
, slice
, drop
, take
-- * Splitting
, Source.Source.lines
, lineRanges
, lineRangesWithin
, newlineIndices
) where

import Prelude hiding (drop, take)

import           Control.Arrow ((&&&))
import           Control.DeepSeq (NFData)
import           Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B
import           Data.Char (ord)
import           Data.Maybe (fromMaybe)
import           Data.Monoid (Last (..))
import           Data.Semilattice.Lower
import           Data.String (IsString (..))
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import           Data.Text.Encoding.Error (lenientDecode)
import           GHC.Generics (Generic)
import           Source.Range
import           Source.Span (Pos (..), Span (Span))


-- | The contents of a source file. This is represented as a UTF-8
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
newtype Source = Source { Source -> ByteString
bytes :: B.ByteString }
  deriving (Source -> Source -> Bool
(Source -> Source -> Bool)
-> (Source -> Source -> Bool) -> Eq Source
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Source -> Source -> Bool
$c/= :: Source -> Source -> Bool
== :: Source -> Source -> Bool
$c== :: Source -> Source -> Bool
Eq, b -> Source -> Source
NonEmpty Source -> Source
Source -> Source -> Source
(Source -> Source -> Source)
-> (NonEmpty Source -> Source)
-> (forall b. Integral b => b -> Source -> Source)
-> Semigroup Source
forall b. Integral b => b -> Source -> Source
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> Source -> Source
$cstimes :: forall b. Integral b => b -> Source -> Source
sconcat :: NonEmpty Source -> Source
$csconcat :: NonEmpty Source -> Source
<> :: Source -> Source -> Source
$c<> :: Source -> Source -> Source
Semigroup, Semigroup Source
Source
Semigroup Source
-> Source
-> (Source -> Source -> Source)
-> ([Source] -> Source)
-> Monoid Source
[Source] -> Source
Source -> Source -> Source
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [Source] -> Source
$cmconcat :: [Source] -> Source
mappend :: Source -> Source -> Source
$cmappend :: Source -> Source -> Source
mempty :: Source
$cmempty :: Source
$cp1Monoid :: Semigroup Source
Monoid, String -> Source
(String -> Source) -> IsString Source
forall a. (String -> a) -> IsString a
fromString :: String -> Source
$cfromString :: String -> Source
IsString, Int -> Source -> ShowS
[Source] -> ShowS
Source -> String
(Int -> Source -> ShowS)
-> (Source -> String) -> ([Source] -> ShowS) -> Show Source
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Source] -> ShowS
$cshowList :: [Source] -> ShowS
show :: Source -> String
$cshow :: Source -> String
showsPrec :: Int -> Source -> ShowS
$cshowsPrec :: Int -> Source -> ShowS
Show, (forall x. Source -> Rep Source x)
-> (forall x. Rep Source x -> Source) -> Generic Source
forall x. Rep Source x -> Source
forall x. Source -> Rep Source x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Source x -> Source
$cfrom :: forall x. Source -> Rep Source x
Generic, Source -> ()
(Source -> ()) -> NFData Source
forall a. (a -> ()) -> NFData a
rnf :: Source -> ()
$crnf :: Source -> ()
NFData)

fromUTF8 :: B.ByteString -> Source
fromUTF8 :: ByteString -> Source
fromUTF8 = ByteString -> Source
Source

instance FromJSON Source where
  parseJSON :: Value -> Parser Source
parseJSON = String -> (Text -> Parser Source) -> Value -> Parser Source
forall a. String -> (Text -> Parser a) -> Value -> Parser a
withText String
"Source" (Source -> Parser Source
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Source -> Parser Source)
-> (Text -> Source) -> Text -> Parser Source
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Source
fromText)


-- Measurement

length :: Source -> Int
length :: Source -> Int
length = ByteString -> Int
B.length (ByteString -> Int) -> (Source -> ByteString) -> Source -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes

null :: Source -> Bool
null :: Source -> Bool
null = ByteString -> Bool
B.null (ByteString -> Bool) -> (Source -> ByteString) -> Source -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes

-- | Return a 'Range' that covers the entire text.
totalRange :: Source -> Range
totalRange :: Source -> Range
totalRange = Int -> Int -> Range
Range Int
0 (Int -> Range) -> (Source -> Int) -> Source -> Range
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int
B.length (ByteString -> Int) -> (Source -> ByteString) -> Source -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes

-- | Return a 'Span' that covers the entire text.
totalSpan :: Source -> Span
totalSpan :: Source -> Span
totalSpan Source
source = Pos -> Pos -> Span
Span (Int -> Int -> Pos
Pos Int
1 Int
1) (Int -> Int -> Pos
Pos ([Range] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
Prelude.length [Range]
ranges) (Int -> Int
forall a. Enum a => a -> a
succ (Range -> Int
end Range
lastRange Int -> Int -> Int
forall a. Num a => a -> a -> a
- Range -> Int
start Range
lastRange))) where
  ranges :: [Range]
ranges = Source -> [Range]
lineRanges Source
source
  lastRange :: Range
lastRange = Range -> Maybe Range -> Range
forall a. a -> Maybe a -> a
fromMaybe Range
forall s. Lower s => s
lowerBound (Last Range -> Maybe Range
forall a. Last a -> Maybe a
getLast ((Range -> Last Range) -> [Range] -> Last Range
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (Maybe Range -> Last Range
forall a. Maybe a -> Last a
Last (Maybe Range -> Last Range)
-> (Range -> Maybe Range) -> Range -> Last Range
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Range -> Maybe Range
forall a. a -> Maybe a
Just) [Range]
ranges))


-- En/decoding

-- | Return a 'Source' from a 'Text'.
fromText :: T.Text -> Source
fromText :: Text -> Source
fromText = ByteString -> Source
Source (ByteString -> Source) -> (Text -> ByteString) -> Text -> Source
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8

-- | Return the Text contained in the 'Source'.
toText :: Source -> T.Text
toText :: Source -> Text
toText = OnDecodeError -> ByteString -> Text
T.decodeUtf8With OnDecodeError
lenientDecode (ByteString -> Text) -> (Source -> ByteString) -> Source -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes


-- Slicing

-- | Return a 'Source' that contains a slice of the given 'Source'.
slice :: Source -> Range -> Source
slice :: Source -> Range -> Source
slice Source
source Range
range = Source -> Source
taking (Source -> Source) -> Source -> Source
forall a b. (a -> b) -> a -> b
$ Source -> Source
dropping Source
source where
  dropping :: Source -> Source
dropping = Int -> Source -> Source
drop (Range -> Int
start Range
range)
  taking :: Source -> Source
taking   = Int -> Source -> Source
take (Range -> Int
rangeLength Range
range)

drop :: Int -> Source -> Source
drop :: Int -> Source -> Source
drop Int
i = ByteString -> Source
Source (ByteString -> Source)
-> (Source -> ByteString) -> Source -> Source
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
B.drop Int
i (ByteString -> ByteString)
-> (Source -> ByteString) -> Source -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes

take :: Int -> Source -> Source
take :: Int -> Source -> Source
take Int
i = ByteString -> Source
Source (ByteString -> Source)
-> (Source -> ByteString) -> Source -> Source
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
B.take Int
i (ByteString -> ByteString)
-> (Source -> ByteString) -> Source -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes


-- Splitting

-- | Split the contents of the source after newlines.
lines :: Source -> [Source]
lines :: Source -> [Source]
lines Source
source = Source -> Range -> Source
slice Source
source (Range -> Source) -> [Range] -> [Source]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Source -> [Range]
lineRanges Source
source

-- | Compute the 'Range's of each line in a 'Source'.
lineRanges :: Source -> [Range]
lineRanges :: Source -> [Range]
lineRanges Source
source = Source -> Range -> [Range]
lineRangesWithin Source
source (Source -> Range
totalRange Source
source)

-- | Compute the 'Range's of each line in a 'Range' of a 'Source'.
lineRangesWithin :: Source -> Range -> [Range]
lineRangesWithin :: Source -> Range -> [Range]
lineRangesWithin Source
source Range
range
  = ([Int] -> [Int] -> [Range]) -> ([Int], [Int]) -> [Range]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry ((Int -> Int -> Range) -> [Int] -> [Int] -> [Range]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Int -> Range
Range)
  (([Int], [Int]) -> [Range])
-> (Source -> ([Int], [Int])) -> Source -> [Range]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Range -> Int
start Range
rangeInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:) ([Int] -> [Int]) -> ([Int] -> [Int]) -> [Int] -> ([Int], [Int])
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& ([Int] -> [Int] -> [Int]
forall a. Semigroup a => a -> a -> a
<> [ Range -> Int
end Range
range ]))
  ([Int] -> ([Int], [Int]))
-> (Source -> [Int]) -> Source -> ([Int], [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int) -> [Int] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int
forall a. Enum a => a -> a
succ (Range -> Int
start Range
range))
  ([Int] -> [Int]) -> (Source -> [Int]) -> Source -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Int]
newlineIndices
  (ByteString -> [Int]) -> (Source -> ByteString) -> Source -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Source -> ByteString
bytes
  (Source -> [Range]) -> Source -> [Range]
forall a b. (a -> b) -> a -> b
$ Source -> Range -> Source
slice Source
source Range
range

-- | Return all indices of newlines ('\n', '\r', and '\r\n') in the 'ByteString'.
newlineIndices :: B.ByteString -> [Int]
newlineIndices :: ByteString -> [Int]
newlineIndices = Int -> ByteString -> [Int]
go Int
0 where
  go :: Int -> ByteString -> [Int]
go Int
n ByteString
bs
    | ByteString -> Bool
B.null ByteString
bs = []
    | Bool
otherwise = case (ByteString -> Maybe Int
searchCR ByteString
bs, ByteString -> Maybe Int
searchLF ByteString
bs) of
      (Maybe Int
Nothing, Maybe Int
Nothing)  -> []
      (Just Int
i, Maybe Int
Nothing)   -> Int -> Int -> ByteString -> [Int]
recur Int
n Int
i ByteString
bs
      (Maybe Int
Nothing, Just Int
i)   -> Int -> Int -> ByteString -> [Int]
recur Int
n Int
i ByteString
bs
      (Just Int
crI, Just Int
lfI)
        | Int -> Int
forall a. Enum a => a -> a
succ Int
crI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
lfI -> Int -> Int -> ByteString -> [Int]
recur Int
n Int
lfI ByteString
bs
        | Bool
otherwise       -> Int -> Int -> ByteString -> [Int]
recur Int
n (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
crI Int
lfI) ByteString
bs
  recur :: Int -> Int -> ByteString -> [Int]
recur Int
n Int
i ByteString
bs = let j :: Int
j = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i in Int
j Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> ByteString -> [Int]
go (Int -> Int
forall a. Enum a => a -> a
succ Int
j) (Int -> ByteString -> ByteString
B.drop (Int -> Int
forall a. Enum a => a -> a
succ Int
i) ByteString
bs)
  searchLF :: ByteString -> Maybe Int
searchLF = Word8 -> ByteString -> Maybe Int
B.elemIndex (Int -> Word8
forall a. Enum a => Int -> a
toEnum (Char -> Int
ord Char
'\n'))
  searchCR :: ByteString -> Maybe Int
searchCR = Word8 -> ByteString -> Maybe Int
B.elemIndex (Int -> Word8
forall a. Enum a => Int -> a
toEnum (Char -> Int
ord Char
'\r'))
{-# INLINE newlineIndices #-}