-- | This module contains functions for reading the GSE one- and
-- two-electron integral data from a file, converting this data from
-- spatial to spin indices, and accessing the data.
--
-- The external interface consists of the type 'GSEData' and the
-- function 'load_gse_data'.
--
-- The Quipper distribution contains example data files
-- \"@h_1e_ascii@\" and \"@h_2e_ascii@\". These files contain enough
-- data for /M/ = 32 spin orbitals (corresponding to /M/\/2 = 16
-- spatial orbitals). Note that the example data was randomly
-- generated and is only a mock-up. In actual applications, physically
-- meaningful data should be substituted.
module Quipper.Algorithms.GSE.GSEData where
import Data.Array
import Data.Bits
import Data.Char
-- * Data abstraction
-- | A data structure describing the GSE Data - the number
-- of integrals and the functions to access the data by index.
data GSEData = GSEData {
-- | The number of spin orbitals /M/.
gse_data_M :: Int,
-- | 1-electron integrals /h/[sub p,q] in spin coordinates.
gse_data_h1 :: (Int,Int) -> Double,
-- | 2-electron integrals /h/[sub p,q,r,s] in spin coordinates.
-- Follows the physics convention for the ordering of indices.
gse_data_h2 :: (Int,Int,Int,Int) -> Double
}
instance Show (GSEData) where
show a = "GSEData { size = " ++ show (gse_data_M a) ++ " }"
-- ----------------------------------------------------------------------
-- * Reading GSE data from files
-- $ This section provides function for reading one- and two-electron
-- GSE data from files. The file formats are as follows. The file for
-- the one-electron data consists of lines of the form:
--
-- > ((i, j), h)
--
-- where /i/ and /j/ are integer indices in the range from /0/ to
-- /M/−1, and /h/ = /h/[sub i,j] is a real floating point number.
-- Please note that the file contains data for (/i/, /j/) and
-- (/j/, /i/), and that the indices /i/ and /j/ are in /spatial/
-- coordinates. The file data is sorted in order of increasing /i/,
-- then /j/.
--
-- The file for the two-electron data consists of lines of the form:
--
-- > ((i, j, k, l), h)
--
-- where /i/, /j/, /k/, and /l/ are integer indices in the range from
-- /0/ to /M/−1, and /h/ = /h/[sub i,k,l,j] is a real floating point
-- number. Please note that the indices /i/, /j/, /k/, and /l/ are in
-- /spatial/ coordinates, and the ordering of indices in the file
-- follows the /chemists'/ convention. Also, to save storage space,
-- the file only contains data for /i/ ≥ /j/, /k/ ≥ /l/, and either
-- /i/ > /k/, or /i/ = /k/ and /j/ ≥ /l/. The remaining data must be
-- inferred from symmetries. The file data is sorted in order of
-- increasing /i/, then /j/, then /k/, then /l/.
--
-- We also note that the data files, and the functions of this module
-- where noted, are the /only/ places where we use Chemists' notation
-- and spatial orbitals. The remainder of our implementation uses
-- physicists' notation and spin orbitals throughout.
-- | Read the 'GSEData' from two files. The first argument is /M/, the
-- number of spin orbitals. The second and third argument are the
-- filenames for the one-electron and two-electron data, respectively.
--
-- If the file contains data for more than /M/ spin orbitals, ignore
-- the excess data (this is useful for generating smaller problem
-- sizes for testing). In this case, only the necessary portion of the
-- file is read. If the file contains data for fewer than /M/ spin
-- orbitals, this is silently ignored, but will lead to an
-- \"undefined\" error later.
load_gse_data :: Int -> String -> String -> IO GSEData
load_gse_data size filename1 filename2 = do
content1 <- readFile filename1
content2 <- readFile filename2
let spatial_size = (size + 1) `div` 2
let spacial_data1 = parsefile1 spatial_size content1
let spacial_data2 = parsefile2 spatial_size content2
return (GSEData {
gse_data_M = size,
gse_data_h1 = spin1 $ access_1e spacial_data1,
gse_data_h2 = spin2 $ access_2e spacial_data2
})
-- ----------------------------------------------------------------------
-- * Low-level access functions
-- | Access 1-electron integral data. The indices are spatial, i.e.,
-- they run from 0 to /M/\/2 − 1.
access_1e :: Array (Int, Int) e -> (Int, Int) -> e
access_1e arr tuple = arr ! tuple
-- | Access 2-electron integral data. The input array is sparse (i.e.,
-- contains only one representative of each equivalence class), and
-- uses chemists' conventions. The output uses physicists'
-- conventions. The indices in both input and output are spatial,
-- i.e., they run from 0 to /M/\/2 − 1.
access_2e :: Array (Int, Int, Int, Int) e -> (Int, Int, Int, Int) -> e
access_2e arr (i,k,l,j) =
-- The indices are not in correct order on purpose. We
-- need to express the fact that h_prsq = h[pq|rs] = h[p,q,r,s] = h[i,j,k,l]
arr ! (swap_ijkl $ swap_kl $ swap_ij (i,j,k,l))
-- Note that because of symmetries, we have
-- h2(i,j,k,l) = h2(j,i,k,l)
-- h2(i,j,k,l) = h2(i,j,l,k)
-- h2(i,j,k,l) = h2(k,l,i,j)
-- For this reason, and to save space, the file only contains one
-- representative of each equivalence class.
where
swap_ij (i,j,k,l) = if (i < j) then (j,i,k,l) else (i,j,k,l)
swap_kl (i,j,k,l) = if (k < l) then (i,j,l,k) else (i,j,k,l)
swap_ijkl (i,j,k,l) = if ((i < k) || ((i == k) && (j < l))) then (k,l,i,j) else (i,j,k,l)
-- ----------------------------------------------------------------------
-- * Low-level parsing functions
-- | Decide whether a string is a comment. A comment is a line with
-- only whitespace characters, or where the first non-whitespace
-- character is \'\#\'.
is_comment :: String -> Bool
is_comment [] = True
is_comment ('#':t) = True
is_comment (h:t)
| isSpace h = is_comment t
| otherwise = False
-- | Extract an array from the one-electron file data. We do this
-- lazily, i.e., we stop reading as soon as enough data is found.
-- The resulting array uses spatial indices.
parsefile1 :: Int -> String -> Array (Int, Int) Double
parsefile1 size content =
array ((0,0), (n, n)) list3
where
n = size-1
list1 = [ read_line_h1 s | s <- lines content, not (is_comment s) ]
list2 = takeWhile (\((i,j),h) -> i<size) list1
list3 = filter in_range list2
in_range ((i,j),h) = i<size && j<size
read_line_h1 :: String -> ((Int,Int), Double)
read_line_h1 s = case reads s of
[(x, "")] -> x
_ -> error ("Illegal line: " ++ s ++ " -- expected format ((int, int), double)")
-- | Extract an array from the two-electron file data. We do this
-- lazily, i.e., we stop reading as soon as enough data is found. The
-- resulting array uses spatial indices in chemists' notation. Also,
-- the output array is sparse; it only contains as much data as the
-- file itself.
parsefile2 :: Int -> String -> Array (Int, Int, Int, Int) Double
parsefile2 size content =
array ((0,0,0,0), (n,n,n,n)) list3
where
n = size-1
list1 = [ read_line_h2 s | s <- lines content, not (is_comment s) ]
list2 = takeWhile (\((i,j,k,l),h) -> i<size) list1
list3 = filter in_range list2
in_range ((i,j,k,l),h) = i<size && j<size && k<size && l<size
read_line_h2 :: String -> ((Int,Int,Int,Int), Double)
read_line_h2 s = case reads s of
[(x, "")] -> x
_ -> error ("Illegal line: " ++ s ++ " -- expected format ((int, int, int, int), double)")
-- ----------------------------------------------------------------------
-- * Conversion of spin to spatial indices
-- | In the molecule we have twice as many orbitals (spin orbitals)
-- than data in the integral file (spatial orbitals). This function
-- converts /h[sub 1]/ from spatial-orbitals (/M/\/2 = 104) to spin
-- orbitals (/M/ = 208).
--
-- Spin orbitals are indexed by /p/=(/i/, σ/[sub i]/), where /i/ is a spatial
-- index and σ/[sub i]/ is a spin (up or down). For two spin indices
-- /p/=(/i/, σ/[sub i]/) and /q/=(/j/, σ/[sub j]/), the transition integral
-- h[sub pq] is given by the following formula:
--
-- \[image spin1.png]
--
-- The Hamiltonian vanishes for σ/[sub i]/ ≠ σ/[sub j]/ because we
-- assume that there is no spin orbital coupling.
--
-- Given /M/\/2 spatial orbitals, we re-map the spin orbitals to
-- integers from 0 to /M/−1 using the formula /p/ = 2/i/+σ/[sub i]/,
-- where σ[sub i] is 0 or 1.
--
-- The function 'spin1' inputs (/h[sub ij]/), the table of 1-electron
-- integrals for /M/\/2 spatial orbitals, and outputs the
-- corresponding table (/h[sub pq]/) for /M/ spin orbitals.
spin1 :: ((Int,Int) -> Double) -> ((Int,Int) -> Double)
spin1 h1 (p,q) =
if sigma_i == sigma_j
then h1 (i,j)
else 0.0
where
sigma_i = p .&. 1
i = p `div` 2
sigma_j = q .&. 1
j = q `div` 2
-- | Like 'spin1', but for 2-electron integrals. Here, the transition
-- integrals in spin coordinates are given by:
--
-- \[image spin2.png]
--
-- The Hamiltonian vanishes for σ/[sub i]/ ≠ σ/[sub l]/ or σ/[sub j]/
-- ≠ σ/[sub k]/ because we assume that there is no spin orbital
-- coupling.
--
-- The function 'spin2' inputs (/h[sub ijkl]/), the table of
-- 2-electron transition amplitudes for /M/\/2 spatial orbitals, and
-- outputs the corresponding table (/h[sub pqrs]/) for /M/ spin
-- orbitals. Index ordering follows the physicists' convention.
spin2 :: ((Int, Int, Int, Int) -> Double) -> ((Int, Int, Int, Int) -> Double)
spin2 h2 (p,q,r,s) =
if sigma_i == sigma_l && sigma_j == sigma_k
then h2 (i,j,k,l)
else 0.0
where
sigma_i = p .&. 1
i = p `div` 2
sigma_j = q .&. 1
j = q `div` 2
sigma_k = r .&. 1
k = r `div` 2
sigma_l = s .&. 1
l = s `div` 2
-- * Testing
-- | Print the /h/[sub 1] data for 1-electron integrals.
print_1e :: GSEData -> String
print_1e gse_data = unlines $ [ inner_print i j | i <- list, j <- list]
where list = [0..m-1]
inner_print i j = show (i,j) ++ " : " ++ show (h1 (i, j))
m = gse_data_M gse_data
h1 = gse_data_h1 gse_data
-- | Print the /h/[sub 2] data for 2-electron integrals.
print_2e :: GSEData -> String
print_2e gse_data = unlines $ [ inner_print i j k l | i <- list, j <- list, k <- list, l <- list]
where list = [0..m-1]
inner_print i j k l = show (i,j,k,l) ++" : " ++ show (h2 (i, j, k, l))
m = gse_data_M gse_data
h2 = gse_data_h2 gse_data
-- | A main function to test the GSEData module.
gse_data_test :: Int -> IO ()
gse_data_test n = do
gse_data <- load_gse_data n "h_1e_ascii" "h_2e_ascii"
putStr $ print_1e gse_data
putStr $ print_2e gse_data