#!/usr/bin/env python
import numpy as np
import basis_set_exchange as bse
import apdft
import os
import itertools as it
import pandas as pd
#: Conversion factor from Bohr in Angstrom.
angstrom = 1 / 0.52917721067
#: Conversion factor from electron charges and Angstrom to Debye
debye = 1 / 0.20819433
[docs]class Coulomb(object):
""" Collects functions for Coulomb interaction."""
[docs] @staticmethod
def nuclei_nuclei(coordinates, charges):
""" Calculates the nuclear-nuclear interaction energy from Coulomb interaction.
Sign convention assumes positive charges for nuclei.
Args:
coordinates: A (3,N) array of nuclear coordinates :math:`\\mathbf{r_i}`. [Angstrom]
charges: A N array of point charges :math:`q_i`. [e]
Returns:
Coulombic interaction energy. [Hartree]
"""
natoms = len(coordinates)
ret = 0.0
for i in range(natoms):
for j in range(i + 1, natoms):
d = np.linalg.norm((coordinates[i] - coordinates[j]) * angstrom)
ret += charges[i] * charges[j] / d
return ret
[docs] @staticmethod
def nuclear_potential(coordinates, charges, at):
natoms = len(coordinates)
ret = 0.0
for i in range(natoms):
if i == at:
continue
d = np.linalg.norm((coordinates[i] - coordinates[at]) * angstrom)
ret += charges[i] / d
return ret
[docs]class Dipoles(object):
""" Collects functions regarding the calculation of dipole moments. This code follows the physics convention of the sign: the dipole moment vector points from the negative charge center to the positive charge center."""
[docs] @staticmethod
def point_charges(reference_point, coordinates, charges):
""" Calculates the dipole moment of point charges.
Note that for sets of point charges of a net charge, the resulting dipole moment depends on the chosen reference point. A common choice in the molecular context is the center of mass.
Sign convention is such that nuclei should be given as positive charges.
.. math::
\\mathbf{p}(\\mathbf{r}) = \\sum_I q_i(\\mathbf{r_i}-\\mathbf{r})
Args:
reference_point: A 3 array of the reference point :math:`\\mathbf{r}`. [Angstrom]
coordinates: A (3,N) array of point charge coordinates :math:`\\mathbf{r_i}`. [Angstrom]
charges: A N array of point charges :math:`q_i`. [e]
Returns:
Dipole moment :math:`\\mathbf{p}`. [Debye]
"""
shift = coordinates - reference_point
return np.sum(shift.T * charges, axis=1) * debye
[docs] @staticmethod
def electron_density(reference_point, coordinates, electron_density):
""" Calculates the dipole moment of a charge distribution.
Note that for a charge density, the resulting dipole momennt depends on the chosen reference point. A common choice in the molecular context is the center of mass.
Electron density is a positive quantity.
.. math::
\\mathbf{p}(\\mathbf{r}) = \\int_\\Omega \\rho(\\mathbf{r_i})(\\mathbf{r_i}-\\mathbf{r})
Args:
reference_point: A 3 array of the reference point :math:`\\mathbf{r}`. [Angstrom]
coordinates: A (3,N) array of grid coordinates :math:`\\mathbf{r_i}`. [Angstrom]
electron_density: A N array of electron density values :math:`\\rho` at `coordinates`. [e/Angstrom^3]
Returns:
Dipole moment :math:`\\mathbf{p}`. [Debye]
"""
shift = coordinates - reference_point
return -np.sum(shift.T * electron_density, axis=1) * debye
[docs]def charge_to_label(Z):
""" Converts a nuclear charge to an element label.
Uncharged (ghost) sites are assigned a dash.
Args:
Z Nuclear charge. [e]
Returns:
Element label. [String]
"""
if Z == 0:
return "-"
return bse.lut.element_sym_from_Z(Z, normalize=True)
[docs]class APDFT(object):
""" Implementation of alchemical perturbation density functional theory.
Requires a working directory `basepath` which allows for storing the intermediate calculation results."""
def __init__(
self,
highest_order,
nuclear_numbers,
coordinates,
basepath,
calculator,
max_charge=0,
max_deltaz=3,
include_atoms=None,
targetlist=None,
):
if highest_order > 2:
raise NotImplementedError()
self._orders = list(range(0, highest_order + 1))
self._nuclear_numbers = np.array(nuclear_numbers)
self._coordinates = coordinates
self._delta = 0.05
self._basepath = basepath
self._calculator = calculator
self._max_charge = max_charge
self._max_deltaz = max_deltaz
self._targetlist = targetlist
if include_atoms is None:
self._include_atoms = list(range(len(self._nuclear_numbers)))
else:
included = []
for part in include_atoms:
if isinstance(part, int):
included.append(part)
else:
included += list(
np.where(
self._nuclear_numbers == bse.lut.element_Z_from_sym(part)
)[0]
)
self._include_atoms = sorted(list(set(included)))
def _calculate_delta_Z_vector(self, numatoms, order, sites, direction):
baseline = np.zeros(numatoms)
if order > 0:
sign = {"up": 1, "dn": -1}[direction] * self._delta
baseline[list(sites)] += sign
return baseline
[docs] def prepare(self, explicit_reference=False):
""" Builds a complete folder list of all relevant calculations."""
if os.path.isdir("QM"):
apdft.log.log(
"Input folder exists. Reusing existing data.", level="warning"
)
commands = []
for order in self._orders:
# only upper triangle with diagonal
for combination in it.combinations_with_replacement(
self._include_atoms, order
):
if len(combination) == 2 and combination[0] == combination[1]:
continue
if order > 0:
label = "-" + "-".join(map(str, combination))
directions = ["up", "dn"]
else:
directions = ["cc"]
label = "-all"
for direction in directions:
path = "QM/order-%d/site%s-%s" % (order, label, direction)
commands.append("( cd %s && bash run.sh )" % path)
if os.path.isdir(path):
continue
os.makedirs(path)
charges = self._nuclear_numbers + self._calculate_delta_Z_vector(
len(self._nuclear_numbers), order, combination, direction
)
inputfile = self._calculator.get_input(
self._coordinates,
self._nuclear_numbers,
charges,
None,
includeonly=self._include_atoms,
)
with open("%s/run.inp" % path, "w") as fh:
fh.write(inputfile)
with open("%s/run.sh" % path, "w") as fh:
fh.write(
self._calculator.get_runfile(
self._coordinates, self._nuclear_numbers, charges, None
)
)
if explicit_reference:
targets = self.enumerate_all_targets()
apdft.log.log(
"All targets listed for comparison run.",
level="info",
count=len(targets),
)
for target in targets:
path = "QM/comparison-%s" % ("-".join(map(str, target)))
os.makedirs(path, exist_ok=True)
inputfile = self._calculator.get_input(
self._coordinates, self._nuclear_numbers, target, None
)
with open("%s/run.inp" % path, "w") as fh:
fh.write(inputfile)
with open("%s/run.sh" % path, "w") as fh:
fh.write(
self._calculator.get_runfile(
self._coordinates, self._nuclear_numbers, target, None
)
)
commands.append("( cd %s && bash run.sh )" % path)
# write commands
with open("commands.sh", "w") as fh:
fh.write("\n".join(commands))
def _get_stencil_coefficients(self, deltaZ, shift):
""" Calculates the prefactors of the density terms outlined in the documentation of the implementation, e.g. alpha and beta.
In general, this collects all terms of the taylor expansion for one particular target and returns their coefficients.
For energies, the n-th order derivative of the density is divided by :math:`(n+1)!`, while the target density is obtained
from a regular Taylor expansion, i.e. the density derivative is divided by :math:`n!`. Therefore, a `shift` of 1 returns
energy coefficients and a `shift` of 0 returns density coefficients.
Args:
self: APDFT instance to obtain the stencil from.
deltaZ: Array of length N. Target system as described by the change in nuclear charges. [e]
shift: Integer. Shift of the factorial term in the energy expansion or density expansion.
"""
# build alphas
N = len(self._include_atoms)
nvals = {0: 1, 1: N * 2, 2: N * (N - 1)}
alphas = np.zeros((sum([nvals[_] for _ in self._orders]), len(self._orders)))
# test input
if N != len(deltaZ):
raise ValueError(
"Mismatch of array lengths: %d dZ values for %d nuclei."
% (len(deltaZ), N)
)
# order 0
if 0 in self._orders:
alphas[0, 0] = 1
# order 1
if 1 in self._orders:
prefactor = 1 / (2 * self._delta) / np.math.factorial(1 + shift)
for siteidx in range(N):
alphas[1 + siteidx * 2, 1] += prefactor * deltaZ[siteidx]
alphas[1 + siteidx * 2 + 1, 1] -= prefactor * deltaZ[siteidx]
# order 2
if 2 in self._orders:
pos = 1 + N * 2 - 2
for siteidx_i in range(N):
for siteidx_j in range(siteidx_i, N):
if siteidx_i != siteidx_j:
pos += 2
if deltaZ[siteidx_j] == 0 or deltaZ[siteidx_i] == 0:
continue
if self._include_atoms[siteidx_j] > self._include_atoms[siteidx_i]:
prefactor = (1 / (2 * self._delta ** 2)) / np.math.factorial(
2 + shift
)
prefactor *= deltaZ[siteidx_i] * deltaZ[siteidx_j]
alphas[pos, 2] += prefactor
alphas[pos + 1, 2] += prefactor
alphas[0, 2] += 2 * prefactor
alphas[1 + siteidx_i * 2, 2] -= prefactor
alphas[1 + siteidx_i * 2 + 1, 2] -= prefactor
alphas[1 + siteidx_j * 2, 2] -= prefactor
alphas[1 + siteidx_j * 2 + 1, 2] -= prefactor
if self._include_atoms[siteidx_j] == self._include_atoms[siteidx_i]:
prefactor = (1 / (self._delta ** 2)) / np.math.factorial(
2 + shift
)
prefactor *= deltaZ[siteidx_i] * deltaZ[siteidx_j]
alphas[0, 2] -= 2 * prefactor
alphas[1 + siteidx_i * 2, 2] += prefactor
alphas[1 + siteidx_j * 2 + 1, 2] += prefactor
return alphas
[docs] def get_epn_coefficients(self, deltaZ):
""" EPN coefficients are the weighting of the electronic EPN from each of the finite difference calculations.
The weights depend on the change in nuclear charge, i.e. implicitly on reference and target molecule as well as the finite difference stencil employed."""
return self._get_stencil_coefficients(deltaZ, 1)
def _print_energies(self, targets, energies, comparison_energies):
for position in range(len(targets)):
targetname = APDFT._get_target_name(targets[position])
kwargs = dict()
for order in self._orders:
kwargs["order%d" % order] = energies[position][order]
if comparison_energies is not None:
kwargs["reference"] = comparison_energies[position]
kwargs["error"] = energies[position][-1] - kwargs["reference"]
apdft.log.log(
"Energy calculated",
level="RESULT",
value=energies[position][-1],
kind="total_energy",
target=targets[position],
targetname=targetname,
**kwargs
)
def _print_dipoles(self, targets, dipoles, comparison_dipoles):
if comparison_dipoles is None:
for target, dipole in zip(targets, dipoles):
targetname = APDFT._get_target_name(target)
kwargs = dict()
for order in self._orders:
kwargs["order%d" % order] = list(dipole[:, order])
apdft.log.log(
"Dipole calculated",
level="RESULT",
kind="total_dipole",
value=list(dipole[:, -1]),
target=target,
targetname=targetname,
**kwargs
)
else:
for target, dipole, comparison in zip(targets, dipoles, comparison_dipoles):
targetname = APDFT._get_target_name(target)
apdft.log.log(
"Dipole calculated",
level="RESULT",
kind="total_dipole",
reference=list(comparison),
value=list(dipole),
target=target,
targetname=targetname,
)
@staticmethod
def _get_target_name(target):
return ",".join([apdft.physics.charge_to_label(_) for _ in target])
[docs] def get_folder_order(self):
""" Returns a static order of calculation folders to build the individual derivative entries.
To allow for a more efficient evaluation of APDFT, terms are collected and most of the evaluation
is done with the combined cofficients of those terms. This requires the terms to be handled in a certain
fixed order that is stable in the various parts of the code. Depending on the selected expansion order, this
function builds the list of folders to be included.
Returns: List of strings, the folder names."""
folders = []
# order 0
folders.append("%s/QM/order-0/site-all-cc/" % self._basepath)
# order 1
if 1 in self._orders:
for site in self._include_atoms:
folders.append("%s/QM/order-1/site-%d-up/" % (self._basepath, site))
folders.append("%s/QM/order-1/site-%d-dn/" % (self._basepath, site))
# order 2
if 2 in self._orders:
for site_i in self._include_atoms:
for site_j in self._include_atoms:
if site_j <= site_i:
continue
folders.append(
"%s/QM/order-2/site-%d-%d-up/"
% (self._basepath, site_i, site_j)
)
folders.append(
"%s/QM/order-2/site-%d-%d-dn/"
% (self._basepath, site_i, site_j)
)
return folders
[docs] def get_epn_matrix(self):
""" Collects :math:`\int_Omega rho_i(\mathbf{r}) /|\mathbf{r}-\mathbf{R}_I|`. """
N = len(self._include_atoms)
folders = self.get_folder_order()
coeff = np.zeros((len(folders), N))
def get_epn(folder, order, direction, combination):
res = 0.0
charges = self._nuclear_numbers + self._calculate_delta_Z_vector(
len(self._nuclear_numbers), order, combination, direction
)
try:
res = self._calculator.get_epn(
folder, self._coordinates, self._include_atoms, charges
)
except ValueError:
apdft.log.log(
"Calculation with incomplete results.",
level="error",
calulation=folder,
)
except FileNotFoundError:
apdft.log.log(
"Calculation is missing a result file.",
level="error",
calculation=folder,
)
return res
# order 0
pos = 0
# order 0
coeff[pos, :] = get_epn(folders[pos], 0, "up", 0)
pos += 1
# order 1
if 1 in self._orders:
for site in self._include_atoms:
coeff[pos, :] = get_epn(folders[pos], 1, "up", [site])
coeff[pos + 1, :] = get_epn(folders[pos + 1], 1, "dn", [site])
pos += 2
# order 2
if 2 in self._orders:
for site_i in self._include_atoms:
for site_j in self._include_atoms:
if site_j <= site_i:
continue
coeff[pos, :] = get_epn(folders[pos], 2, "up", [site_i, site_j])
coeff[pos + 1, :] = get_epn(
folders[pos + 1], 2, "dn", [site_i, site_j]
)
pos += 2
return coeff
[docs] def get_linear_density_coefficients(self, deltaZ):
""" Obtains the finite difference coefficients for a property linear in the density.
Args:
deltaZ: Array of integers of length N. Target system expressed in the change in nuclear charges from the reference system. [e]
Returns:
Vector of coefficients."""
return self._get_stencil_coefficients(deltaZ, 0)
[docs] def enumerate_all_targets(self):
""" Builds a list of all possible targets.
Note that the order is not guaranteed to be stable.
Args:
self: Class instance from which the total charge and number of sites is determined.
Returns:
A list of lists with the integer nuclear charges."""
# there might be a user-specified explicit list
if self._targetlist is not None:
return self._targetlist
# Generate targets
if self._max_deltaz is None:
around = None
limit = None
else:
around = np.array(self._nuclear_numbers)
limit = self._max_deltaz
res = []
nsites = len(self._nuclear_numbers)
nprotons = sum(self._nuclear_numbers)
for shift in range(-self._max_charge, self._max_charge + 1):
if nprotons + shift < 1:
continue
res += apdft.math.IntegerPartitions.partition(
nprotons + shift, nsites, around, limit
)
# filter for included atoms
ignore_atoms = list(
set(range(len(self._nuclear_numbers))) - set(self._include_atoms)
)
if len(self._include_atoms) != len(self._nuclear_numbers):
res = [
_
for _ in res
if [_[idx] for idx in ignore_atoms]
== [self._nuclear_numbers[idx] for idx in ignore_atoms]
]
return res
[docs] def estimate_cost_and_coverage(self):
""" Estimates number of single points (cost) and number of targets (coverage).
Args:
self: Class instance from which the total charge and number of sites is determined.
Returns:
Tuple of ints: number of single points, number of targets."""
N = len(self._include_atoms)
cost = sum({0: 1, 1: 2 * N, 2: N * (N - 1)}[_] for _ in self._orders)
coverage = len(self.enumerate_all_targets())
return cost, coverage
[docs] def get_energy_from_reference(self, nuclear_charges, is_reference_molecule=False):
""" Retreives the total energy from a QM reference.
Args:
nuclear_charges: Integer list of nuclear charges. [e]
Returns:
The total energy. [Hartree]"""
if is_reference_molecule:
return self._calculator.get_total_energy(
"%s/QM/order-0/site-all-cc" % self._basepath
)
else:
return self._calculator.get_total_energy(
"%s/QM/comparison-%s"
% (self._basepath, "-".join(map(str, nuclear_charges)))
)
[docs] def get_linear_density_matrix(self, propertyname):
""" Retrieves the value matrix for properties linear in density.
Valid properties are: ELECTRONIC_DIPOLE, IONIC_FORCE, ELECTRONIC_QUADRUPOLE.
Args:
self: APDFT instance.
propertyname: String. One of the choices above.
Returns:
(N, m) array for an m-dimensional property over N QM calculations or None if the property is not implemented with this QM code."""
functionname = "get_%s" % propertyname.lower()
try:
function = getattr(self._calculator, functionname)
except AttributeError:
return None
folders = self.get_folder_order()
results = []
for folder in folders:
try:
results.append(function(folder))
except ValueError:
apdft.log.log(
"Calculation with incomplete results.",
level="error",
calulation=folder,
)
# Only meaningful if all calculations are present.
if len(results) == len(folders):
return np.array(results)
[docs] def predict_all_targets(self):
# assert one order of targets
targets = self.enumerate_all_targets()
own_nuc_nuc = Coulomb.nuclei_nuclei(self._coordinates, self._nuclear_numbers)
energies = np.zeros((len(targets), len(self._orders)))
dipoles = np.zeros((len(targets), 3, len(self._orders)))
# get base information
refenergy = self.get_energy_from_reference(
self._nuclear_numbers, is_reference_molecule=True
)
epn_matrix = self.get_epn_matrix()
dipole_matrix = self.get_linear_density_matrix("ELECTRONIC_DIPOLE")
# get target predictions
for targetidx, target in enumerate(targets):
deltaZ = target - self._nuclear_numbers
deltaZ_included = deltaZ[self._include_atoms]
alphas = self.get_epn_coefficients(deltaZ_included)
# energies
deltaEnn = Coulomb.nuclei_nuclei(self._coordinates, target) - own_nuc_nuc
for order in sorted(self._orders):
contributions = -np.multiply(
np.outer(alphas[:, order], deltaZ_included), epn_matrix
).sum()
energies[targetidx, order] = contributions
if order > 0:
energies[targetidx, order] += energies[targetidx, order - 1]
energies[targetidx, :] += deltaEnn + refenergy
# dipoles
if dipole_matrix is not None:
betas = self.get_linear_density_coefficients(deltaZ_included)
nuc_dipole = Dipoles.point_charges(
self._coordinates.mean(axis=0), self._coordinates, target
)
for order in sorted(self._orders):
ed = np.multiply(dipole_matrix, betas[:, order, np.newaxis]).sum(
axis=0
)
dipoles[targetidx, :, order] = ed
dipoles[targetidx, :, order] += np.sum(
dipoles[targetidx, :, :order]
)
dipoles[targetidx] += nuc_dipole[:, np.newaxis]
# return results
return targets, energies, dipoles
[docs] def analyse(self, explicit_reference=False):
""" Performs actual analysis and integration. Prints results"""
try:
targets, energies, dipoles = self.predict_all_targets()
except (FileNotFoundError, AttributeError):
apdft.log.log(
"At least one of the QM calculations has not been performed yet. Please run all QM calculations first.",
level="warning",
)
return
if explicit_reference:
comparison_energies = np.zeros(len(targets))
comparison_dipoles = np.zeros((len(targets), 3))
for targetidx, target in enumerate(targets):
path = "QM/comparison-%s" % "-".join(map(str, target))
try:
comparison_energies[targetidx] = self._calculator.get_total_energy(
path
)
except FileNotFoundError:
apdft.log.log(
"Comparison calculation is missing. Predictions are unaffected. Will skip this comparison.",
level="warning",
calculation=path,
target=target,
)
comparison_energies[targetidx] = np.nan
comparison_dipoles[targetidx] = np.nan
continue
except ValueError:
apdft.log.log(
"Comparison calculation is damaged. Predictions are unaffected. Will skip this comparison.",
level="warning",
calculation=path,
target=target,
)
comparison_energies[targetidx] = np.nan
comparison_dipoles[targetidx] = np.nan
continue
nd = apdft.physics.Dipoles.point_charges(
[0, 0, 0], self._coordinates, target
)
# TODO: load dipole
# comparison_dipoles[targetidx] = ed + nd
else:
comparison_energies = None
comparison_dipoles = None
self._print_energies(targets, energies, comparison_energies)
self._print_dipoles(targets, dipoles, comparison_dipoles)
# persist results to disk
targetnames = [APDFT._get_target_name(_) for _ in targets]
result_energies = {"targets": targetnames, "total_energy": energies[:, -1]}
for order in self._orders:
result_energies["total_energy_order%d" % order] = energies[:, order]
result_dipoles = {
"targets": targetnames,
"dipole_moment_x": dipoles[:, 0, -1],
"dipole_moment_y": dipoles[:, 1, -1],
"dipole_moment_z": dipoles[:, 2, -1],
}
for order in self._orders:
for didx, dim in enumerate("xyz"):
result_dipoles["dipole_moment_%s_order%d" % (dim, order)] = dipoles[
:, didx, order
]
if explicit_reference:
result_energies["reference_energy"] = comparison_energies
result_dipoles["reference_dipole_x"] = comparison_dipoles[:, 0]
result_dipoles["reference_dipole_y"] = comparison_dipoles[:, 1]
result_dipoles["reference_dipole_z"] = comparison_dipoles[:, 2]
pd.DataFrame(result_energies).to_csv("energies.csv", index=False)
pd.DataFrame(result_dipoles).to_csv("dipoles.csv", index=False)
return targets, energies, comparison_energies