-- -*- coding: utf-8; mode: haskell; -*- -- File: library/Language/Ninja/Misc/Located.hs -- -- License: -- Copyright 2017 Awake Security -- -- Licensed under the Apache License, Version 2.0 (the "License"); -- you may not use this file except in compliance with the License. -- You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE UndecidableInstances #-} -- | -- Module : Language.Ninja.Misc.Located -- Copyright : Copyright 2017 Awake Security -- License : Apache-2.0 -- Maintainer : opensource@awakesecurity.com -- Stability : experimental -- -- Tokenize text into a list of non-whitespace chunks, each of which is -- annotated with its source location. -- -- @since 0.1.0 module Language.Ninja.Misc.Located ( -- * @Located@ Located , tokenize, tokenizeFile, tokenizeText -- , untokenize , locatedPos, locatedVal -- * @Spans@ , Spans, makeSpans, spansSet -- * @Span@ , Span, makeSpan, spanPath, spanRange , spanStart, spanEnd, spanStartPos, spanEndPos -- * @Position@ , Position, makePosition , positionFile, positionOffset, positionLine, positionCol , comparePosition -- * @Offset@ , Offset, compareOffset, offsetLine, offsetColumn -- * Miscellaneous , Line, Column ) where import Control.Arrow (second, (&&&), (***)) import qualified Control.Lens as Lens import Control.Monad.ST (ST) import qualified Control.Monad.ST as ST import Data.STRef (STRef) import qualified Data.STRef as ST import Data.Char (isSpace) import qualified Data.Maybe import Data.Semigroup (Semigroup ((<>))) import Data.Text (Text) import qualified Data.Text as Text import Data.HashSet (HashSet) import qualified Data.HashSet as HS import Control.DeepSeq (NFData) import Data.Hashable (Hashable) import GHC.Generics (Generic) import qualified Test.SmallCheck.Series as SC import Data.Aeson ((.:), (.=)) import qualified Data.Aeson as Aeson import qualified Data.Aeson.Types as Aeson import Flow ((.>), (|>)) import qualified Language.Ninja.Misc.Path as Misc import qualified Language.Ninja.Mock as Mock -------------------------------------------------------------------------------- -- | This datatype represents a value annotated with a source location. -- -- @since 0.1.0 data Located t = MkLocated { _locatedPos :: {-# UNPACK #-} !Position , _locatedVal :: !t } deriving (Eq, Show, Generic, Functor, Foldable, Traversable) -- | Construct a 'Located' value directly. -- -- @since 0.1.0 {-# INLINE makeLocated #-} makeLocated :: Position -> t -> Located t makeLocated = MkLocated -- | Given @path :: 'Maybe' 'Misc.Path'@ and a @text :: 'Text'@, do the -- following: -- -- * Remove all @'\r'@ characters from the @text@. -- * Split the @text@ into chunks that are guaranteed not to contain newlines -- or whitespace, and which are annotated with their location. -- -- @since 0.1.0 tokenize :: Maybe Misc.Path -> Text -> [Located Text] tokenize mpath = removeWhitespace (mpath, 0, 0) -- | Read the file at the given 'Misc.Path' and then run 'tokenize' on the -- resulting 'Text'. -- -- @since 0.1.0 tokenizeFile :: (Mock.MonadReadFile m) => Misc.Path -> m [Located Text] tokenizeFile path = tokenize (Just path) <$> Mock.readFile path -- | This function is equivalent to @tokenize Nothing@. -- -- @since 0.1.0 tokenizeText :: Text -> [Located Text] tokenizeText = tokenize Nothing -- TODO: uncomment this and implement it -- -- -- | This function takes the output of 'tokenize' and returns a map from -- -- paths to the contents of the associated files. -- -- -- -- prop> untokenize [] == mempty -- -- prop> untokenize (xs <> ys) == untokenize xs <> untokenize ys -- -- prop> untokenize (tokenize (Just path) t) == Map.singleton path t -- -- -- -- @since 0.1.0 -- untokenize :: [Located Text] -> Map Misc.Path Text -- untokenize = error "untokenize is not yet written" -- | The position of this located value. -- -- @since 0.1.0 {-# INLINE locatedPos #-} locatedPos :: Lens.Lens' (Located t) Position locatedPos = Lens.lens _locatedPos $ \(MkLocated {..}) x -> MkLocated { _locatedPos = x, .. } -- | The value underlying this located value. -- -- @since 0.1.0 {-# INLINE locatedVal #-} locatedVal :: Lens.Lens' (Located t) t locatedVal = Lens.lens _locatedVal $ \(MkLocated {..}) x -> MkLocated { _locatedVal = x, .. } -- | Converts to @{position: …, value: …}@. -- -- @since 0.1.0 instance (Aeson.ToJSON t) => Aeson.ToJSON (Located t) where toJSON (MkLocated {..}) = [ "pos" .= _locatedPos , "val" .= _locatedVal ] |> Aeson.object -- | Inverse of the 'Aeson.ToJSON' instance. -- -- @since 0.1.0 instance (Aeson.FromJSON t) => Aeson.FromJSON (Located t) where parseJSON = (Aeson.withObject "Located" $ \o -> do _locatedPos <- (o .: "pos") >>= pure _locatedVal <- (o .: "val") >>= pure pure (MkLocated {..})) -- | Default 'Hashable' instance via 'Generic'. -- -- @since 0.1.0 instance (Hashable t) => Hashable (Located t) -- | Default 'NFData' instance via 'Generic'. -- -- @since 0.1.0 instance (NFData t) => NFData (Located t) -- | Default 'SC.Serial' instance via 'Generic'. -- -- @since 0.1.0 instance ( Monad m, SC.Serial m Text, SC.Serial m t ) => SC.Serial m (Located t) -- | Default 'SC.CoSerial' instance via 'Generic'. -- -- @since 0.1.0 instance ( Monad m, SC.CoSerial m Text, SC.CoSerial m t ) => SC.CoSerial m (Located t) -------------------------------------------------------------------------------- -- | A type representing a set of source spans. -- -- @since 0.1.0 newtype Spans = MkSpans (HashSet Span) deriving ( Eq, Show, Semigroup, Monoid , Generic, Aeson.ToJSON, Aeson.FromJSON , Hashable, NFData ) -- | Construct a 'Spans' from a list of 'Span's. -- -- @since 0.1.0 {-# INLINE makeSpans #-} makeSpans :: [Span] -> Spans makeSpans = HS.fromList .> MkSpans -- | A lens into the @'HashSet' 'Span'@ underlying a value of type 'Spans'. -- -- @since 0.1.0 {-# INLINE spansSet #-} spansSet :: Lens.Iso' Spans (HashSet Span) spansSet = Lens.iso (\(MkSpans s) -> s) MkSpans -- | Default 'SC.Serial' instance via 'Generic'. -- -- @since 0.1.0 instance (Monad m, SC.Serial m (HashSet Span)) => SC.Serial m Spans -- | Default 'SC.CoSerial' instance via 'Generic'. -- -- @since 0.1.0 instance (Monad m, SC.CoSerial m (HashSet Span)) => SC.CoSerial m Spans -------------------------------------------------------------------------------- -- | Represents a span of source code. -- -- @since 0.1.0 data Span = MkSpan !(Maybe Misc.Path) !Offset !Offset deriving (Eq, Show, Generic) -- | Construct a 'Span' from a given start position to a given end position. -- -- @since 0.1.0 {-# INLINE makeSpan #-} makeSpan :: Maybe Misc.Path -- ^ The file in which this span resides, if any. -> Offset -- ^ The start offset. -> Offset -- ^ The end offset. -> Span makeSpan mpath start end = case compareOffset start end of GT -> makeSpan mpath end start _ -> MkSpan mpath start end -- | A lens into the (nullable) path associated with a 'Span'. -- -- @since 0.1.0 {-# INLINE spanPath #-} spanPath :: Lens.Lens' Span (Maybe Misc.Path) spanPath = let helper (MkSpan p s e) = (p, \x -> MkSpan x s e) in Lens.lens (helper .> fst) (helper .> snd) -- | A lens giving the start and end 'Offset's associated with a 'Span'. -- -- @since 0.1.0 {-# INLINE spanRange #-} spanRange :: Lens.Lens' Span (Offset, Offset) spanRange = let helper (MkSpan p s e) = ((s, e), \(s', e') -> MkSpan p s' e') in Lens.lens (helper .> fst) (helper .> snd) -- | A lens into the 'Offset' associated with the start of a 'Span'. -- -- @since 0.1.0 {-# INLINE spanStart #-} spanStart :: Lens.Lens' Span Offset spanStart = spanRange . Lens._1 -- | A lens into the 'Offset' associated with the end of a 'Span'. -- -- @since 0.1.0 {-# INLINE spanEnd #-} spanEnd :: Lens.Lens' Span Offset spanEnd = spanRange . Lens._2 -- | A getter for the 'Position' associated with the start of a 'Span'. -- -- @since 0.1.0 {-# INLINE spanStartPos #-} spanStartPos :: Lens.Getter Span Position spanStartPos = Lens.to (\(MkSpan p s _) -> makePosition p s) -- | A getter for the 'Position' associated with the end of a 'Span'. -- -- @since 0.1.0 {-# INLINE spanEndPos #-} spanEndPos :: Lens.Getter Span Position spanEndPos = Lens.to (\(MkSpan p _ e) -> makePosition p e) -- | Converts to @{file: …, start: …, end: …}@. -- -- @since 0.1.0 instance Aeson.ToJSON Span where toJSON (MkSpan file start end) = [ "file" .= maybe Aeson.Null Aeson.toJSON file , "start" .= offsetJ start , "end" .= offsetJ end ] |> Aeson.object where offsetJ :: (Line, Column) -> Aeson.Value offsetJ (line, col) = Aeson.object ["line" .= line, "col" .= col] -- | Inverse of the 'Aeson.ToJSON' instance. -- -- @since 0.1.0 instance Aeson.FromJSON Span where parseJSON = (Aeson.withObject "Span" $ \o -> do file <- (o .: "file") >>= pure start <- (o .: "start") >>= offsetP end <- (o .: "end") >>= offsetP pure (MkSpan file start end)) where offsetP :: Aeson.Value -> Aeson.Parser Offset offsetP = (Aeson.withObject "Offset" $ \o -> do line <- (o .: "line") >>= pure col <- (o .: "col") >>= pure pure (line, col)) -- | Default 'Hashable' instance via 'Generic'. -- -- @since 0.1.0 instance Hashable Span -- | Default 'NFData' instance via 'Generic'. -- -- @since 0.1.0 instance NFData Span -- | Default 'SC.Serial' instance via 'Generic'. -- -- @since 0.1.0 instance (Monad m, SC.Serial m Text) => SC.Serial m Span -- | Default 'SC.CoSerial' instance via 'Generic'. -- -- @since 0.1.0 instance (Monad m, SC.CoSerial m Text) => SC.CoSerial m Span -------------------------------------------------------------------------------- -- | This datatype represents the position of a cursor in a text file. -- -- @since 0.1.0 data Position = MkPosition { _positionFile :: !(Maybe Misc.Path) , _positionLine :: {-# UNPACK #-} !Line , _positionCol :: {-# UNPACK #-} !Column } deriving (Eq, Show, Generic) -- | Construct a 'Position' from a (nullable) path and a @(line, column)@ pair. -- -- @since 0.1.0 {-# INLINE makePosition #-} makePosition :: Maybe Misc.Path -> Offset -> Position makePosition file (line, column) = MkPosition file line column -- | The path of the file pointed to by this position, if any. -- -- @since 0.1.0 {-# INLINE positionFile #-} positionFile :: Lens.Lens' Position (Maybe Misc.Path) positionFile = Lens.lens _positionFile $ \(MkPosition {..}) x -> MkPosition { _positionFile = x, .. } -- | The offset in the file pointed to by this position. -- -- @since 0.1.0 {-# INLINE positionOffset #-} positionOffset :: Lens.Lens' Position Offset positionOffset = Lens.lens (_positionLine &&& _positionCol) $ \(MkPosition {..}) (line, col) -> MkPosition { _positionLine = line, _positionCol = col, .. } -- | The line number in the file pointed to by this position. -- -- @since 0.1.0 {-# INLINE positionLine #-} positionLine :: Lens.Lens' Position Line positionLine = positionOffset . Lens._1 -- | The column number in the line pointed to by this position. -- -- @since 0.1.0 {-# INLINE positionCol #-} positionCol :: Lens.Lens' Position Column positionCol = positionOffset . Lens._2 -- | If two 'Position's are comparable (i.e.: if they are in the same file), -- this function will return an 'Ordering' giving their relative positions. -- Otherwise, it will of course return 'Nothing'. -- -- @since 0.1.0 comparePosition :: Position -> Position -> Maybe Ordering comparePosition = go where go (MkPosition fileX lineX colX) (MkPosition fileY lineY colY) = compareTriple (fileX, (lineX, colX)) (fileY, (lineY, colY)) compareTriple :: (Maybe Misc.Path, Offset) -> (Maybe Misc.Path, Offset) -> Maybe Ordering compareTriple (mfileX, offX) (mfileY, offY) | (mfileX == mfileY) = Just (compareOffset offX offY) | otherwise = Nothing -- | Converts to @{file: …, line: …, col: …}@. -- -- @since 0.1.0 instance Aeson.ToJSON Position where toJSON (MkPosition {..}) = [ "file" .= _positionFile , "line" .= _positionLine , "col" .= _positionCol ] |> Aeson.object -- | Inverse of the 'Aeson.ToJSON' instance. -- -- @since 0.1.0 instance Aeson.FromJSON Position where parseJSON = (Aeson.withObject "Position" $ \o -> do _positionFile <- (o .: "file") >>= pure _positionLine <- (o .: "line") >>= pure _positionCol <- (o .: "col") >>= pure pure (MkPosition {..})) -- | Default 'Hashable' instance via 'Generic'. -- -- @since 0.1.0 instance Hashable Position -- | Default 'NFData' instance via 'Generic'. -- -- @since 0.1.0 instance NFData Position -- | Default 'SC.Serial' instance via 'Generic'. -- -- @since 0.1.0 instance (Monad m, SC.Serial m Text) => SC.Serial m Position -- | Default 'SC.CoSerial' instance via 'Generic'. -- -- @since 0.1.0 instance (Monad m, SC.CoSerial m Text) => SC.CoSerial m Position -------------------------------------------------------------------------------- -- | A line/column offset into a file. -- -- @since 0.1.0 type Offset = (Line, Column) -- | Compare two 'Offset's in lexicographic order (i.e.: the 'Column' is -- ignored unless they are on the same 'Line'). -- -- @since 0.1.0 compareOffset :: Offset -> Offset -> Ordering compareOffset (lineX, colX) (lineY, colY) | (lineX < lineY) = LT | (lineX > lineY) = GT | otherwise = compare colX colY -- | A lens into the 'Line' associated with an 'Offset'. -- -- For now, this is simply defined as @offsetLine = 'Lens._1'@, -- but if 'Offset' is later refactored to be an abstract data type, using -- this lens instead of 'Lens._1' will decrease the amount of code that -- breaks. -- -- @since 0.1.0 {-# INLINE offsetLine #-} offsetLine :: Lens.Lens' Offset Line offsetLine = Lens._1 -- | A lens into the 'Line' associated with an 'Offset'. -- -- Read the description of 'offsetLine' for an understanding of why this -- exists and why you should use it instead of 'Lens._2'. -- -- @since 0.1.0 {-# INLINE offsetColumn #-} offsetColumn :: Lens.Lens' Offset Column offsetColumn = Lens._2 -------------------------------------------------------------------------------- -- | A line number. -- -- @since 0.1.0 type Line = Int -- | A column number. -- -- @since 0.1.0 type Column = Int -------------------------------------------------------------------------------- data Chunk = ChunkText !Text | ChunkSpace {-# UNPACK #-} !Int | ChunkLine {-# UNPACK #-} !Int deriving (Eq, Show) -------------------------------------------------------------------------------- newtype Chunks = MkChunks { fromChunks :: [Chunk] } deriving (Eq, Show) {-# INLINE chunksNil #-} chunksNil :: Chunks chunksNil = MkChunks [] chunksCons :: Chunk -> Chunks -> Chunks chunksCons = \chunk (MkChunks list) -> MkChunks (go chunk list) where {-# INLINE go #-} go (ChunkSpace m) (ChunkSpace n : rest) = ChunkSpace (m + n) : rest go (ChunkLine m) (ChunkLine n : rest) = ChunkLine (m + n) : rest go (ChunkText a) (ChunkText b : rest) = ChunkText (a <> b) : rest go other list = other : list {-# INLINE chunksAddChar #-} chunksAddChar :: Char -> Chunks -> Chunks chunksAddChar '\n' = chunksCons (ChunkLine 1) chunksAddChar '\r' = id chunksAddChar c | isSpace c = chunksCons (ChunkSpace 1) chunksAddChar c = chunksCons (ChunkText (Text.singleton c)) -------------------------------------------------------------------------------- removeWhitespace :: (Maybe Misc.Path, Line, Column) -> Text -> [Located Text] removeWhitespace (file, initLine, initCol) = go .> Data.Maybe.catMaybes .> map makeLoc where go :: Text -> [Maybe (Line, Column, Text)] go text = ST.runST $ do ref <- ST.newSTRef (initLine, initCol) Text.foldr chunksAddChar chunksNil text |> fromChunks |> mapM (applyChunk ref) applyChunk :: STRef s (Line, Column) -> Chunk -> ST s (Maybe (Line, Column, Text)) applyChunk ref = \case ChunkLine n -> do ST.modifySTRef' ref ((+ n) *** const 0) pure Nothing ChunkSpace n -> do ST.modifySTRef' ref (second (+ n)) pure Nothing ChunkText t -> do (line, column) <- ST.readSTRef ref ST.modifySTRef' ref (second (+ Text.length t)) pure (Just (line, column, t)) {-# INLINE makeLoc #-} makeLoc :: (Line, Column, Text) -> Located Text makeLoc (line, col, text) = makeLocated (MkPosition file line col) text --------------------------------------------------------------------------------