--
-- Human exchangable identifiers and locators
--
-- Copyright © 2011-2018 Operational Dynamics Consulting, Pty Ltd
--
-- The code in this file, and the program it is a part of, is
-- made available to you by its authors as open source software:
-- you can redistribute it and/or modify it under the terms of
-- the BSD licence.
--
-- This code originally licenced GPLv2. Relicenced BSD3 on 2 Jan 2014.
--
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE TypeApplications #-}
module Data.Locator.English16
( Locator(..)
, English16(..)
, fromEnglish16
, toEnglish16
, toEnglish16a
, hashStringToEnglish16a
-- Deprecated
, fromLocator16
, toLocator16
, toLocator16a
, hashStringToLocator16a
) where
import Prelude hiding (toInteger)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.List (mapAccumL)
import Data.Set (Set)
import qualified Data.Set as Set
import Numeric (showIntAtBase)
import Data.Locator.Common
--
-- | A symbol set with sixteen uniquely pronounceable digits.
--
-- The fact there are sixteen symbols is more an indication of a certain degree
-- of bullheaded-ness on the part of the author, and less of any kind of actual
-- requirement. We might have a slighly better readback score if we dropped to
-- 15 or 14 unique characters. It does mean you can match up with hexidecimal,
-- which is not entirely without merit.
--
-- The grouping of letters and numbers was the hard part; having come up with
-- the set and deconflicted the choices, the ordering is then entirely
-- arbitrary. Since there are some numbers, might as well have them at the same
-- place they correspond to in base 10; the letters were then allocated in
-- alpha order in the remaining slots.
--
{-
-- 0 Conflicts with @\'O\'@ obviously, and @\'Q\'@ often enough
--
-- 2 @\'U\'@, @\'W\'@, and @\'2\'@. @\'W\'@ is disqualifed because of
-- the way Australians butcher double-this and triple-that. \"Double
-- @\'U\'@\" or \"@\'W\'@\"?
--
-- C @\'B\'@, @\'C\'@, @\'D\'@, @\'E\'@, @\'G\'@, @\'P\'@, @\'T\'@,
-- @\'V\'@, and @\'3\'@ plus @\'Z\'@ because Americans can't pronounce
-- Zed properly.
--
-- 4 @\'4\'@ and @\'5\'@ are often confused, and @\'5\'@, definitely
-- out due to its collision with @\'I\'@ when spoken and @\'S\'@ in
-- writing.
--
-- F @\'F\'@ and @\'S\'@ are notoriously confused, making the choice of
-- @\'F\'@ borderline, but @\'S\'@ is already disqualified for looking
-- like @\'5\'@.
--
-- K group of @\'A\'@, @\'J\'@, @\'K\'@.
--
-- L @\'L\'@ has good phonetics, and as long as it's upper case (which
-- the whole 'English16' symbol set is) there's no conflict with
-- @\'1\'@.
--
-- M choice from @\'M\'@ and @\'N\'@; the latter is a little too close
-- to @\'7\'@.
--
-- X choice from @\'X\'@ and @\'6\'@.
--
-- Y choice from @\'I\'@, @\'Y\'@, @\'5\'@. @\'I\'@ is out for the
-- usual reason of being similar to @\'1\'@.
-}
data English16
= Zero -- ^ @\'0\'@ /0th/
| One -- ^ @\'1\'@ /1st/
| Two -- ^ @\'2\'@ /2nd/
| Charlie -- ^ @\'C\'@ /3rd/
| Four -- ^ @\'4\'@ /4th/
| Foxtrot -- ^ @\'F\'@ /5th/
| Hotel -- ^ @\'H\'@ /6th/
| Seven -- ^ @\'7\'@ /7th/
| Eight -- ^ @\'8\'@ /8th/
| Nine -- ^ @\'9\'@ /9th/
| Kilo -- ^ @\'K\'@ /10th/
| Lima -- ^ @\'L\'@ /11th/
| Mike -- ^ @\'M\'@ /12th/
| Romeo -- ^ @\'R\'@ /13th/
| XRay -- ^ @\'X\'@ /14th/
| Yankee -- ^ @\'Y\'@ /15th/
deriving (Eq, Ord, Enum, Bounded)
instance Locator English16 where
locatorToDigit :: English16 -> Char
locatorToDigit x =
case x of
Zero -> '0'
One -> '1'
Two -> '2'
Charlie -> 'C'
Four -> '4'
Foxtrot -> 'F'
Hotel -> 'H'
Seven -> '7'
Eight -> '8'
Nine -> '9'
Kilo -> 'K'
Lima -> 'L'
Mike -> 'M'
Romeo -> 'R'
XRay -> 'X'
Yankee -> 'Y'
digitToLocator :: Char -> English16
digitToLocator c =
case c of
'0' -> Zero
'1' -> One
'2' -> Two
'C' -> Charlie
'4' -> Four
'F' -> Foxtrot
'H' -> Hotel
'7' -> Seven
'8' -> Eight
'9' -> Nine
'K' -> Kilo
'L' -> Lima
'M' -> Mike
'R' -> Romeo
'X' -> XRay
'Y' -> Yankee
_ -> error "Illegal digit"
instance Show English16 where
show x = [c]
where
c = locatorToDigit x
--
-- | Given a number, convert it to a string in the English16 base 16 symbol
-- alphabet. You can use this as a replacement for the standard \'0\'-\'9\'
-- \'A\'-\'F\' symbols traditionally used to express hexidemimal, though really
-- the fact that we came up with 16 total unique symbols was a nice
-- co-incidence, not a requirement.
--
toEnglish16 :: Int -> String
toEnglish16 x =
showIntAtBase 16 (represent Yankee) x ""
--
-- | Represent a number in English16a format. This uses the Locator16 symbol
-- set, and additionally specifies that no symbol can be repeated. The /a/ in
-- Locator16a represents that this transformation is done on the cheap; when
-- converting if we end up with \'9\' \'9\' we simply pick the subsequent digit
-- in the enum, in this case getting you \'9\' \'K\'.
--
-- Note that the transformation is /not/ reversible. A number like @4369@
-- (which is @0x1111@, incidentally) encodes as @12C4@. So do @4370@, @4371@,
-- and @4372@. The point is not uniqueness, but readibility in adverse
-- conditions. So while you can count locators, they don't map continuously to
-- base10 integers.
--
-- The first argument is the number of digits you'd like in the locator; if the
-- number passed in is less than 16^limit, then the result will be padded.
--
-- >>> toEnglish16a 6 4369
-- 12C40F
--
toEnglish16a :: Int -> Int -> String
toEnglish16a limit n
| limit > 16 = error "Can only request a maximum of 16 English16a characters, not " ++ (show limit)
| otherwise =
let
n' = abs n
ls = convert n' (replicate limit minBound) :: [English16]
(_,us) = mapAccumL uniq Set.empty ls
in
map locatorToDigit (take limit us)
where
convert :: Locator α => Int -> [α] -> [α]
convert 0 xs = xs
convert i xs =
let
(d,r) = divMod i 16
x = toEnum r
in
convert d (x:xs)
uniq :: Locator α => Set α -> α -> (Set α, α)
uniq s x =
if Set.member x s
then uniq s (subsequent x)
else (Set.insert x s, x)
subsequent :: Locator α => α -> α
subsequent x =
if x == maxBound
then minBound
else succ x
--
-- | Given a number encoded in Locator16, convert it back to an integer.
--
fromEnglish16 :: [Char] -> Int
fromEnglish16 ss =
foldl (multiply Yankee) 0 ss
--
-- | Take an arbitrary sequence of bytes, hash it with SHA1, then format as a
-- short @digits@-long Locator16 string.
--
-- >>> hashStringToLocator16a 6 "Hello World"
-- M48HR0
--
hashStringToEnglish16a :: Int -> ByteString -> ByteString
hashStringToEnglish16a limit s' =
let
s = S.unpack s'
n = digest s -- SHA1 hash
r = mod n upperBound -- trim to specified number of base 16 chars
x = toLocator16a limit r -- express in locator16
b' = S.pack x
in
b'
where
upperBound = 16 ^ limit
toLocator16 :: Int -> String
toLocator16 = toEnglish16
{-# DEPRECATED toLocator16 "Use toEnglish16 instead" #-}
toLocator16a :: Int -> Int -> String
toLocator16a = toEnglish16a
{-# DEPRECATED toLocator16a "Use toEnglish16a instead" #-}
fromLocator16 :: [Char] -> Int
fromLocator16 = fromEnglish16
{-# DEPRECATED fromLocator16 "Use fromEnglish16 instead" #-}
hashStringToLocator16a :: Int -> ByteString -> ByteString
hashStringToLocator16a = hashStringToEnglish16a
{-# DEPRECATED hashStringToLocator16a "Use hashStringToEnglish16a instead" #-}