Source code for chemparseplot.parse.orca.geomscan
# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
#
# SPDX-License-Identifier: MIT
"""
For parsing outputs from input files like this:
!OPT UHF def2-SVP
%geom Scan
B 0 1 = 7.5589039543, 0.2116708996, 33
end
end
*xyzfile 0 1 h2_base.xyz
"""
import re
import chemparseplot.parse.converter as conv
import chemparseplot.parse.patterns as pat
from chemparseplot.units import Q_
[docs]
def extract_energy_data(data: str, energy_type: str) -> tuple[Q_, Q_]:
"""
Extracts and converts the energy data for a specified energy type.
This function assumes the input data is a blob of text. It searches for
'Calculated Surface' followed by the specified energy type ('Actual' or 'SCF')
and extracts the two-column data (distance and energy values) following it.
Energies are returned in Hartree and distances in Bohr, as these are the default
units used in ORCA.
Parameters
----------
data : str
The blob of text containing energy data.
energy_type : str
The type of energy to search for ('Actual' or 'SCF').
Returns
-------
tuple[Q_, Q_]
A tuple containing two `Quantity` objects from the `pint` library.
The first element is an array of distances in Bohr, and the second
element is an array of energies in Hartree.
"""
# Regular expression to find the energy type and the two-column data following it
# https://regex101.com/r/RF6b4V/2
# fmt: off
pattern = (
r".*? Calculated Surface.*?"
rf"{energy_type}.*?"
) + pat.TWO_COL_NUM
matchr = re.search(pattern, data, re.MULTILINE)
# fmt: on
if not matchr:
xdu = Q_([], "bohr")
ydu = Q_([], "hartree")
return xdu, ydu
energytxt = matchr.group("twocolnum")
xydat = conv.np_txt(energytxt)
xdu = Q_(xydat[:, 0], "bohr")
ydu = Q_(xydat[:, 1], "hartree")
return xdu, ydu