{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE ViewPatterns #-} module Data.License.Infer ( License(..) , inferLicense ) where import Control.Applicative import Control.Monad import Data.Foldable import Data.Char import Data.List import Data.Ord (comparing) import Data.Text (Text) import qualified Data.Text as T import Data.Text.Metrics import Data.License.SpdxLicenses (licenses) import Data.License.Type inferLicense :: String -> Maybe License inferLicense xs = inferLicenseByName xs <|> inferLicenseByLevenshtein xs inferLicenseByName :: String -> Maybe License inferLicenseByName (normalize -> xs) = asum $ map (matchName xs) licenseNames matchName :: String -> (License, String) -> Maybe License matchName xs (license, name) = license <$ guard (isPrefixOf name xs) licenseNames :: [(License, String)] licenseNames = map (fmap normalize) [ (GPLv2, "GNU GENERAL PUBLIC LICENSE Version 2, June 1991") , (GPLv3, "GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007") , (LGPLv2_1, "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999") , (LGPLv3, "GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007") , (AGPLv3, "GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007") , (MPL_2_0, "Mozilla Public License Version 2.0") , (Apache_2_0, "Apache License Version 2.0, January 2004") ] normalize :: String -> String normalize = map toLower . filter isAlphaNum inferLicenseByLevenshtein :: String -> Maybe License inferLicenseByLevenshtein (T.pack -> xs) | T.length xs > 2000 = Nothing | otherwise = case maximumBy (comparing snd) (probabilities xs) of (license, n) | n > 0.85 -> Just license _ -> Nothing probabilities :: Text -> [(License, Double)] probabilities license = map (fmap probability) licenses where probability = realToFrac . levenshteinNorm license