Skip to content

Commit c7e6fba

Browse files
wojdyrclaude
andcommitted
drg: two-phase aromaticity matching AceDRG's strict/permissive split
AceDRG uses strict aromaticity (isAromatic, NoMetal mode 0 pi count) for COD table lookups, then permissive aromaticity (isAromaticP, also checking NoMetal mode 1 and All pi counts) for output codClass names via reDoAtomCodClassNames(). Implement the same two-phase approach: - Phase 1 (strict): only NoMetal mode 0 pi, used for cod_class_no_charge and all derived lookup fields (cod_main, cod_root, nb_symb, etc.) - Phase 2 (permissive): also checks NoMetal mode 1 and All pi counts, updates cod_class for output but preserves strict lookup fields Also fixes O hybridization to use non-metal connection count (t_len) instead of total neighbor count, and removes the CCP4 metal-donor bond override that was causing more harm than good. Reduces bond value diffs from 205 to 11 across ~1259 CCD test compounds. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d01e40b commit c7e6fba

File tree

2 files changed

+99
-70
lines changed

2 files changed

+99
-70
lines changed

include/gemmi/acedrg_tables.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ struct GEMMI_DLL AcedrgTables {
483483
std::string rep;
484484
std::string s_rep;
485485
bool is_aromatic = false;
486+
bool is_aromatic_permissive = false;
486487
};
487488
struct SortMap {
488489
std::string key;

src/acedrg_tables.cpp

Lines changed: 98 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,28 @@ std::vector<CodAtomInfo> AcedrgTables::classify_atoms(const ChemComp& cc) const
790790
atom.cod_class_no_charge = atom.cod_class;
791791
}
792792

793+
// Phase 2: Rebuild codClass with permissive aromaticity for output.
794+
// AceDRG's reDoAtomCodClassNames() sets isAromatic = isAromaticP after lookups.
795+
// cod_class_no_charge (used for table lookups) retains the strict version.
796+
bool has_permissive_diff = false;
797+
for (const auto& ring : rings)
798+
if (ring.is_aromatic_permissive != ring.is_aromatic)
799+
has_permissive_diff = true;
800+
if (has_permissive_diff) {
801+
for (auto& ring : rings)
802+
ring.is_aromatic = ring.is_aromatic_permissive;
803+
for (auto& atom : atoms)
804+
atom.ring_rep_s.clear();
805+
set_atoms_ring_rep_s(atoms, rings);
806+
for (size_t i = 0; i < atoms.size(); ++i)
807+
set_atom_cod_class_name_new2(atoms[i], atoms[i], 2, atoms, neighbors);
808+
for (size_t i = 0; i < atoms.size(); ++i)
809+
set_special_3nb_symb2(atoms[i], atoms, neighbors);
810+
// Don't call cod_class_to_atom2 here — it would overwrite cod_main, cod_root,
811+
// nb_symb, nb2_symb, nb3_symb with permissive-aromaticity values, but these
812+
// fields must retain strict-aromaticity values for COD table lookups.
813+
}
814+
793815
return atoms;
794816
}
795817

@@ -895,10 +917,10 @@ void AcedrgTables::set_ring_aromaticity_from_bonds(
895917
const std::vector<std::vector<BondInfo>>& adj,
896918
const std::vector<CodAtomInfo>& atoms,
897919
std::vector<RingInfo>& rings) const {
898-
// AceDRG ring aromaticity used for final COD class names:
899-
// - ring must be planar (bondingIdx==2, N allowed)
900-
// - reDoAtomCodClassNames() sets isAromatic = isAromaticP, so ringRepS is
901-
// based on aromaticP (checkAromaSys(..., mode=1) with NoMetal/All π counts).
920+
// AceDRG has two-phase aromaticity:
921+
// - Strict (mode 0): only NoMetal pi count → isAromatic (used for COD table lookup)
922+
// - Permissive (mode 1): NoMetal+All pi counts → isAromaticP (used for output CIF)
923+
// Ring must be planar (bondingIdx==2, N allowed).
902924
auto count_non_mc = [&](int idx) -> int {
903925
int non_mc = 0;
904926
for (const auto& nb : adj[idx]) {
@@ -1020,6 +1042,59 @@ void AcedrgTables::set_ring_aromaticity_from_bonds(
10201042
return aN;
10211043
};
10221044

1045+
for (size_t i = 0; i < rings.size(); ++i) {
1046+
RingInfo& ring = rings[i];
1047+
ring.is_aromatic = false;
1048+
1049+
if (!is_ring_planar(ring))
1050+
continue;
1051+
1052+
// AceDRG uses strict aromaticity (mode 0) for the COD table lookup.
1053+
// Mode 0 differs from mode 1 for charged N+ with 3 connections:
1054+
// mode 0 gives 2 pi electrons, mode 1 gives 1.
1055+
// AceDRG only checks the NoMetal pi count in strict mode.
1056+
double pi1 = 0.0;
1057+
for (int idx : ring.atoms)
1058+
pi1 += count_atom_pi_no_metal(idx, 0);
1059+
if (pi1 > 0.0 && std::fabs(std::fmod(pi1, 4.0) - 2.0) < 0.001)
1060+
ring.is_aromatic = true;
1061+
if (verbose >= 2) {
1062+
std::fprintf(stderr, " ring %zu (size=%zu): pi1=%.1f aromatic=%d atoms:",
1063+
i, ring.atoms.size(), pi1, ring.is_aromatic ? 1 : 0);
1064+
for (int idx : ring.atoms)
1065+
std::fprintf(stderr, " %s", atoms[idx].id.c_str());
1066+
std::fprintf(stderr, "\n");
1067+
}
1068+
}
1069+
1070+
// AceDRG pyrole rule: if there are exactly 4 five-member rings with 4C+1N
1071+
// and they are planar, mark them aromatic even if pi-count failed.
1072+
std::vector<size_t> pyrole_rings;
1073+
for (size_t i = 0; i < rings.size(); ++i) {
1074+
const RingInfo& ring = rings[i];
1075+
if (ring.atoms.size() != 5)
1076+
continue;
1077+
int num_c = 0;
1078+
int num_n = 0;
1079+
for (int idx : ring.atoms) {
1080+
if (atoms[idx].el == El::C)
1081+
++num_c;
1082+
else if (atoms[idx].el == El::N)
1083+
++num_n;
1084+
}
1085+
if (num_c == 4 && num_n == 1)
1086+
pyrole_rings.push_back(i);
1087+
}
1088+
if (pyrole_rings.size() == 4) {
1089+
for (size_t i : pyrole_rings) {
1090+
if (is_ring_planar(rings[i]))
1091+
rings[i].is_aromatic = true;
1092+
}
1093+
}
1094+
1095+
// Permissive aromaticity (AceDRG mode 1): used for output codClass names.
1096+
// Differs from strict for charged N+ (mode 1 gives 1 pi, mode 0 gives 2),
1097+
// and also checks the "all" pi count (which always gives 1 for N+).
10231098
auto count_atom_pi_all = [&](int idx) -> double {
10241099
const auto& atom = atoms[idx];
10251100
int non_mc = count_non_mc(idx);
@@ -1114,13 +1189,12 @@ void AcedrgTables::set_ring_aromaticity_from_bonds(
11141189
return aN;
11151190
};
11161191

1117-
for (size_t i = 0; i < rings.size(); ++i) {
1118-
RingInfo& ring = rings[i];
1119-
ring.is_aromatic = false;
1120-
1192+
for (auto& ring : rings) {
1193+
ring.is_aromatic_permissive = ring.is_aromatic;
1194+
if (ring.is_aromatic)
1195+
continue;
11211196
if (!is_ring_planar(ring))
11221197
continue;
1123-
11241198
double pi1 = 0.0;
11251199
double pi2 = 0.0;
11261200
for (int idx : ring.atoms) {
@@ -1129,32 +1203,7 @@ void AcedrgTables::set_ring_aromaticity_from_bonds(
11291203
}
11301204
if ((pi1 > 0.0 && std::fabs(std::fmod(pi1, 4.0) - 2.0) < 0.001) ||
11311205
(pi2 > 0.0 && std::fabs(std::fmod(pi2, 4.0) - 2.0) < 0.001))
1132-
ring.is_aromatic = true;
1133-
}
1134-
1135-
// AceDRG pyrole rule: if there are exactly 4 five-member rings with 4C+1N
1136-
// and they are planar, mark them aromatic even if pi-count failed.
1137-
std::vector<size_t> pyrole_rings;
1138-
for (size_t i = 0; i < rings.size(); ++i) {
1139-
const RingInfo& ring = rings[i];
1140-
if (ring.atoms.size() != 5)
1141-
continue;
1142-
int num_c = 0;
1143-
int num_n = 0;
1144-
for (int idx : ring.atoms) {
1145-
if (atoms[idx].el == El::C)
1146-
++num_c;
1147-
else if (atoms[idx].el == El::N)
1148-
++num_n;
1149-
}
1150-
if (num_c == 4 && num_n == 1)
1151-
pyrole_rings.push_back(i);
1152-
}
1153-
if (pyrole_rings.size() == 4) {
1154-
for (size_t i : pyrole_rings) {
1155-
if (is_ring_planar(rings[i]))
1156-
rings[i].is_aromatic = true;
1157-
}
1206+
ring.is_aromatic_permissive = true;
11581207
}
11591208
}
11601209

@@ -1751,10 +1800,21 @@ void AcedrgTables::set_atoms_bonding_and_chiral_center(
17511800
atom.bonding_idx = 1;
17521801
}
17531802
} else if (atom.el == El::O) {
1754-
if (neighbors[atom.index].size() == 2) {
1803+
// Use t_len (non-metal connection count) like AceDRG, which
1804+
// excludes metal bonds from connAtoms before hybridization assignment.
1805+
if (t_len == 2) {
17551806
atom.bonding_idx = 3;
1756-
} else if (neighbors[atom.index].size() == 1) {
1757-
if (neighbors[neighbors[atom.index][0]].size() != 1)
1807+
} else if (t_len == 1) {
1808+
// Find the first non-metal neighbor and check its non-metal connections
1809+
int first_nb = -1;
1810+
for (int nb : neighbors[atom.index])
1811+
if (!atoms[nb].is_metal) { first_nb = nb; break; }
1812+
int nb_non_metal = 0;
1813+
if (first_nb >= 0)
1814+
for (int nnb : neighbors[first_nb])
1815+
if (!atoms[nnb].is_metal)
1816+
nb_non_metal++;
1817+
if (nb_non_metal != 1)
17581818
atom.bonding_idx = 2;
17591819
else
17601820
atom.bonding_idx = 3;
@@ -2728,22 +2788,6 @@ void AcedrgTables::fill_restraints(ChemComp& cc) const {
27282788
}
27292789
return false;
27302790
};
2731-
// CCP4 override for heavy-atom bonds adjacent to metal-coordinated donors.
2732-
// Excludes X-H bonds and bonds involving P/As, whose bonds (e.g.
2733-
// phosphate P-O) are well characterized by the multilevel tables.
2734-
if (idx1 >= 0 && idx2 >= 0 &&
2735-
!atom_info[idx1].is_metal && !atom_info[idx2].is_metal &&
2736-
atom_info[idx1].el != El::H && atom_info[idx2].el != El::H &&
2737-
atom_info[idx1].el != El::P && atom_info[idx2].el != El::P &&
2738-
atom_info[idx1].el != El::As && atom_info[idx2].el != El::As) {
2739-
auto is_donor = [](const CodAtomInfo& a) {
2740-
return a.metal_connectivity > 0 &&
2741-
(a.el == El::N || a.el == El::O || a.el == El::S ||
2742-
a.el == El::P || a.el == El::As || a.el == El::Se);
2743-
};
2744-
if (is_donor(atom_info[idx1]) || is_donor(atom_info[idx2]))
2745-
apply_ccp4(true);
2746-
}
27472791
// CCP4 energetic library fallback
27482792
if (std::isnan(bond.value))
27492793
apply_ccp4(false);
@@ -3003,22 +3047,6 @@ void AcedrgTables::fill_restraints(ChemComp& cc,
30033047
}
30043048
return false;
30053049
};
3006-
// CCP4 override for heavy-atom bonds adjacent to metal-coordinated donors.
3007-
// Excludes X-H bonds and bonds involving P/As, whose bonds (e.g.
3008-
// phosphate P-O) are well characterized by the multilevel tables.
3009-
if (idx1 >= 0 && idx2 >= 0 &&
3010-
!atom_info[idx1].is_metal && !atom_info[idx2].is_metal &&
3011-
atom_info[idx1].el != El::H && atom_info[idx2].el != El::H &&
3012-
atom_info[idx1].el != El::P && atom_info[idx2].el != El::P &&
3013-
atom_info[idx1].el != El::As && atom_info[idx2].el != El::As) {
3014-
auto is_donor = [](const CodAtomInfo& a) {
3015-
return a.metal_connectivity > 0 &&
3016-
(a.el == El::N || a.el == El::O || a.el == El::S ||
3017-
a.el == El::P || a.el == El::As || a.el == El::Se);
3018-
};
3019-
if (is_donor(atom_info[idx1]) || is_donor(atom_info[idx2]))
3020-
apply_ccp4(true);
3021-
}
30223050
// CCP4 energetic library fallback
30233051
if (std::isnan(bond.value))
30243052
apply_ccp4(false);

0 commit comments

Comments
 (0)