-- | -- Module : Network.DNS.Pattern -- Description : Generic network related types. -- -- Provides utilities and parsers for a simple domain name pattern language. {-# LANGUAGE OverloadedStrings #-} module Network.DNS.Pattern ( -- * Domain names -- $domainNames parseAbsDomain , parseAbsDomainRelax , parseDomainLabel , absDomainP , absDomainRelaxP , Domain(..) , DomainLabel(..) , pprDomain , pprDomainLabel , foldCase , foldCaseLabel -- * Pattern language -- $patterns , parsePattern , patternWorksInside , matchesPattern , domainLabelP , patternP , DomainPattern(..) , LabelPattern(..) , encodedLength , pprPattern ) where import Control.Monad (when) import Data.Char (isAscii, ord) import Data.Coerce (coerce) import Data.Foldable (asum, foldl') import Data.Function (on) import Data.Word (Word8) import Text.Printf (printf) import Control.Applicative.Combinators import Data.Attoparsec.Text (()) import qualified Data.Attoparsec.Text as A import Data.ByteString.Internal (c2w, w2c) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TB import qualified Data.ByteString as BS -- -- * Domain names -- -- $domainNames -- -- There is no standardized presentation and parsing format for domain names. -- In this library we assume a domain name and pattern to be specified as a text with an ASCII dot @.@ acting as a separator and terminator. -- We do not admit arbitrary unicode codepoints, only ASCII is acceptable. Punycoding, if desired, must be taken care of the user. -- -- Escape sequences -- The domain name and pattern language here allows for the following escape sequences -- -- @ -- \\. gives a dot inside a label, rather than a label separator -- \\\\ gives a backslash inside a label -- \\012 gives an arbitrary octet inside a label as specified by the three octets -- @ -- -- For example: @foo\\.bar.quux.@ is a domain name comprised of two labels @foo.bar@ and @quux@ -- * Patterns -- -- $patterns -- -- Patterns can be simple absolute domain names, where labels can be replaced with either a single glob @*@ or a globstar @**@. -- A single glob will match any label in its place, where globstar will greedily match as many labels as possible. -- -- Admits the escape sequences from domain names as well as the following -- -- @ -- \\* gives an asterisk inside a label, rather than a glob/globstar. -- @ -- -- -- Note: Currently a globstar is only supported on the left-most label. -- -- Examples or valid patterns are: -- -- @ -- *.foo.bar. -- **.foo.bar. -- foo.*.bar. -- foo.bar.*. -- @ type Parser = A.Parser -- | A domain pattern. newtype DomainPattern = DomainPattern { getDomainPattern :: [LabelPattern] } deriving (Eq, Ord) -- | A domain parsed into labels. Each label is a 'BS.ByteString' rather than 'T.Text' or 'String' because a label can contain arbitrary bytes. -- However, the 'Ord' and 'Eq' instances do limited case-folding according to [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). newtype Domain = Domain { getDomain :: [DomainLabel] } deriving (Eq, Ord) -- | Newtype warpper for 'BS.ByteString' that implements case-insensitive 'Eq' and 'Ord' as per [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). newtype DomainLabel = DomainLabel { getDomainLabel :: BS.ByteString } instance Ord DomainLabel where compare = compare `on` (BS.map foldCase_ . getDomainLabel) instance Eq DomainLabel where DomainLabel l == DomainLabel r = BS.map foldCase_ l == BS.map foldCase_ r -- | Case-folding of a domain according to [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). foldCase :: Domain -> Domain foldCase (Domain ls) = Domain (foldCaseLabel <$> ls) -- | Case-folding of a domain label according to [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). foldCaseLabel :: DomainLabel -> DomainLabel foldCaseLabel (DomainLabel l) = DomainLabel (BS.map foldCase_ l) {-# INLINE foldCase_ #-} foldCase_ :: Word8 -> Word8 foldCase_ x = case x of 0x41 -> c2w 'a' 0x42 -> c2w 'b' 0x43 -> c2w 'c' 0x44 -> c2w 'd' 0x45 -> c2w 'e' 0x46 -> c2w 'f' 0x47 -> c2w 'g' 0x48 -> c2w 'h' 0x49 -> c2w 'i' 0x4a -> c2w 'j' 0x4b -> c2w 'k' 0x4c -> c2w 'l' 0x4d -> c2w 'm' 0x4e -> c2w 'n' 0x4f -> c2w 'o' 0x50 -> c2w 'p' 0x51 -> c2w 'q' 0x52 -> c2w 'r' 0x53 -> c2w 's' 0x54 -> c2w 't' 0x55 -> c2w 'u' 0x56 -> c2w 'v' 0x57 -> c2w 'w' 0x58 -> c2w 'x' 0x59 -> c2w 'y' 0x5a -> c2w 'z' _ -> x -- | Print an arbitrary domain into a presentation format. -- -- This function nearly roundtrips with 'parseAbsDomain' in the sense that octet escape sequences might change case or drop a leading zero. -- -- prop> parseAbsDomain . pretty ~~~ id pprDomain :: Domain -> T.Text pprDomain (Domain l) = TL.toStrict (TB.toLazyText build) where build :: TB.Builder build = foldl' (\buf x -> buf <> buildLabel x <> ".") mempty l -- | Print a singular domain label into a presentation format. pprDomainLabel :: DomainLabel -> T.Text pprDomainLabel = TL.toStrict . TB.toLazyText . buildLabel -- | Print domain into presentation format. -- -- This function nearly roundtrips with 'parsePattern' in the sense that octet escape sequences might change case or drop a leading zero. -- -- prop> parsePattern . pprPattern ~~~ id pprPattern :: DomainPattern -> T.Text pprPattern (DomainPattern l) = TL.toStrict (TB.toLazyText build) where build :: TB.Builder build = foldl' (\buf x -> buf <> buildLabelPattern x <> ".") mempty l buildLabel :: DomainLabel -> TB.Builder buildLabel (DomainLabel d) = BS.foldl' (\buf x -> buf <> go x) mempty d where go :: Word8 -> TB.Builder go 0x2e = "\\." go 0x5c = "\\\\" go c | c > 0x20 && c <= 0x7E = TB.singleton (w2c c) | otherwise = "\\" <> TB.fromString (printf "%03o" c) buildLabelPattern :: LabelPattern -> TB.Builder buildLabelPattern (DomLiteral d) = BS.foldl' (\buf x -> buf <> go x) mempty d where go :: Word8 -> TB.Builder go 0x2e = "\\." go 0x5c = "\\\\" go 0x2a = "\\*" go c | c > 0x20 && c <= 0x7E = TB.singleton (w2c c) | otherwise = "\\" <> TB.fromString (printf "%03o" c) buildLabelPattern DomGlob = "*" buildLabelPattern DomGlobStar = "**" -- | A pattern for a singular label. data LabelPattern = DomLiteral BS.ByteString -- ^ Represents an exact label that must be matched. | DomGlob -- ^ Represents a single asterisk glob matching any arbitrary domain at a given level. | DomGlobStar -- ^ Represents a double asterisk matching any arbitrary subdomain at a given level. deriving (Eq, Ord, Show) -- | Given a pattern and a DNS zone specified by a domain name, test whether or not the pattern -- is applicable beneath that zone. -- -- @ -- foo.*.bar. applicable inside zone quux.bar. -- foo.bar. applicable inside zone bar. -- bar. applicable inside zone bar. -- foo.bar. not applicable inside zone quux. -- @ patternWorksInside :: DomainPattern -> Domain -> Bool patternWorksInside (DomainPattern x) (Domain y) = go (reverse x) (reverse y) where go :: [LabelPattern] -> [DomainLabel] -> Bool go [DomGlobStar] _ = True go [] [] = True go [] _ls = False go _p [] = True go (p:ps) (l:ls) = labelMatchesPattern (getDomainLabel l) p && go ps ls -- | Test whether a given domain matches a 'DomainPattern' matchesPattern :: Domain -> DomainPattern -> Bool matchesPattern (Domain x) (DomainPattern y) = go (reverse x) (reverse y) where go :: [DomainLabel] -> [LabelPattern] -> Bool go [] [] = True go [] _ps = False go _ls [] = False go _ls [DomGlobStar] = True go (l:ls) (p:ps) = labelMatchesPattern (getDomainLabel l) p && go ls ps -- | Test whether a single label matches a label pattern labelMatchesPattern :: BS.ByteString -> LabelPattern -> Bool labelMatchesPattern _l DomGlob = True labelMatchesPattern l (DomLiteral p) = l == p labelMatchesPattern _l DomGlobStar = True -- | Parser for domain patterns. See 'parsePattern' for a convenince wrapper. patternP :: Parser DomainPattern patternP = asum [ do p <- litGlobStar <* A.char '.' ps <- litGlob `endBy` A.char '.' pure (DomainPattern (p:ps)) , DomainPattern [] <$ A.char '.' -- Literal pattern of the root domain "." ] where litGlobStar :: Parser LabelPattern litGlobStar = asum [ DomLiteral <$> patternLabelP , DomGlobStar <$ A.string "**" , DomGlob <$ A.char '*' ] litGlob :: Parser LabelPattern litGlob = asum [ DomLiteral <$> patternLabelP , DomGlob <$ A.char '*' ] -- | Parse a domain pattern. Convenience wrapper for 'patternP. parsePattern :: T.Text -> Either String DomainPattern parsePattern = A.parseOnly (patternP <* A.endOfInput) -- | Parse an absolute domain. Convenience wrapper for 'absDomainP'. parseAbsDomain :: T.Text -> Either String Domain parseAbsDomain = A.parseOnly (absDomainP <* A.endOfInput) -- | Parse a singular domain label. Convenience wrapper for 'domainLabelP'. parseDomainLabel :: T.Text -> Either String DomainLabel parseDomainLabel = A.parseOnly (domainLabelP <* A.endOfInput) -- | Version of parseAbsDomain that also considers a domain name without a trailing dot -- to be absolute. parseAbsDomainRelax :: T.Text -> Either String Domain parseAbsDomainRelax = A.parseOnly (absDomainRelaxP <* A.endOfInput) -- | Parser for absolute domains. See 'parseAbsDomainRelax' for a convenience warpper. -- This variant differs from 'absDomainP' in that it does not care whether the domain -- name is terminated with a dot. absDomainRelaxP :: Parser Domain absDomainRelaxP = do d <- go let l = encodedLength d when (l >= 255) (fail "domain name too long") pure d where go = Domain <$> domainLabelP `sepBy1` A.char '.' <* optional (A.char '.') -- | Calculate the wire-encoded length of a domain name. encodedLength :: Domain -> Int encodedLength (Domain labels) = sum (BS.length <$> l') + length l' where l' = coerce labels :: [BS.ByteString] -- | Parser for absolute domains. See 'parseAbsDomain' for a convenience wrapper. -- For a parser that also admits domain forms without a leading dot, see 'absDomainRelaxP'. absDomainP :: Parser Domain absDomainP = do d <- go let l = encodedLength d when (l >= 255) (fail "domain name too long") pure d where go = Domain <$> asum [ domainLabelP `endBy1` A.char '.' , [] <$ A.char '.' -- The root domain itself ] patternLabelP :: Parser BS.ByteString patternLabelP = do r <- BS.pack <$> (some labelChar) when (BS.length r >= 64) (fail "label must not be longer than 63 octets") pure r where labelChar :: Parser Word8 labelChar = do c <- A.satisfy (\x -> x /= '.' && x /= '*' && isAscii x ) "pattern label character" case c of '\\' -> escapable _ -> pure (c2w c) escapable :: Parser Word8 escapable = asum [ c2w <$> A.char '.' , c2w <$> A.char '\\' , c2w <$> A.char '*' , octal ] "escapable character" octal :: Parser Word8 octal = do o1 <- v <$> A.satisfy isOctal o2 <- v <$> A.satisfy isOctal o3 <- v <$> A.satisfy isOctal pure (fromIntegral (o1 * 64 + o2 * 8 + o3)) where v c = ord c - 48 isOctal :: Char -> Bool isOctal c = c >= '0' && c <= '7' -- | Parser for a singular domain label. See 'parseDomainLabel' for a convenince wrapper. Also see 'absDomainP'. domainLabelP :: Parser DomainLabel domainLabelP = DomainLabel . BS.pack <$> (some labelChar) where labelChar :: Parser Word8 labelChar = do c <- A.satisfy (\x -> x /= '.' && isAscii x ) "domain label character" case c of '\\' -> escapable _ -> pure (c2w c) escapable :: Parser Word8 escapable = asum [ c2w <$> A.char '.' , c2w <$> A.char '\\' , octal ] "escapable character"