Skip to content

Commit 19f192a

Browse files
committed
Move non-canonical atom name check to make_mmcif
Don't check for non-canonical atom names just yet in the dumper, but add an option to check them at make_mmcif time, defaulting to off.
1 parent d3d3e78 commit 19f192a

File tree

5 files changed

+196
-150
lines changed

5 files changed

+196
-150
lines changed

ihm/dumper.py

Lines changed: 0 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -21,87 +21,6 @@
2121
from . import restraint
2222
from . import geometry
2323

24-
# All canonical atom names for each standard residue type, as per CCD.
25-
# This is generated using the util/get_ccd_atoms.py script.
26-
KNOWN_ATOM_NAMES = {
27-
'A': {"C4'", "C2'", 'C2', "C1'", 'N7', 'H62', 'OP2', 'N3', 'C5', 'P',
28-
"H5''", 'H2', "C5'", 'H61', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8',
29-
'N9', 'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'N6',
30-
'HOP2', "O5'", "O2'", "HO3'", "H5'", "C3'", 'C6', "H4'"},
31-
'ALA': {'H2', 'HB1', 'HB3', 'HB2', 'N', 'HXT', 'O', 'CB', 'C', 'HA', 'CA',
32-
'H', 'OXT'},
33-
'ARG': {'HB2', 'CG', 'NE', 'H', 'H2', 'HH22', 'N', 'HG2', 'CA', 'NH2',
34-
'HH11', 'HG3', 'HH21', 'CZ', 'HB3', 'HXT', 'O', 'C', 'HD3', 'HH12',
35-
'CB', 'NH1', 'CD', 'HA', 'HD2', 'HE', 'OXT'},
36-
'ASN': {'H2', 'HB3', 'HD22', 'HB2', 'N', 'CG', 'O', 'CB', 'ND2', 'HXT',
37-
'C', 'HA', 'HD21', 'CA', 'OD1', 'H', 'OXT'},
38-
'ASP': {'H2', 'HB3', 'HB2', 'N', 'CG', 'O', 'CB', 'HXT', 'C', 'HA', 'OD2',
39-
'CA', 'OD1', 'HD2', 'H', 'OXT'},
40-
'C': {"C4'", "C2'", 'C2', 'O2', 'H42', 'H5', "C1'", 'OP2', 'N3', 'C5',
41-
'P', "H5''", 'H41', 'H6', "C5'", "H3'", 'C4', 'N1', 'N4', "H1'",
42-
'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'",
43-
"O2'", "HO3'", "H5'", "C3'", 'C6', "H4'"},
44-
'CYS': {'H2', 'HB3', 'HB2', 'N', 'SG', 'O', 'CB', 'HXT', 'C', 'HA', 'HG',
45-
'CA', 'H', 'OXT'},
46-
'DA': {"C4'", "C2'", 'C2', "C1'", 'N7', 'H62', 'OP2', 'N3', 'C5', 'P',
47-
"H5''", 'H2', "C5'", 'H61', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8',
48-
'N9', 'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'N6', 'HOP2',
49-
"O5'", "H2''", "HO3'", "H5'", "C3'", 'C6', "H4'"},
50-
'DC': {"C4'", "C2'", 'C2', 'O2', 'H42', 'H5', "C1'", 'OP2', 'N3', 'C5',
51-
'P', "H5''", 'H41', 'H6', "C5'", "H3'", 'C4', 'N1', 'N4', "H1'",
52-
'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
53-
"HO3'", "H5'", "C3'", 'C6', "H4'"},
54-
'DG': {"C4'", "C2'", 'C2', "C1'", 'N7', 'OP2', 'N3', 'C5', 'P', "H5''",
55-
"C5'", 'O6', 'H1', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8', 'N9',
56-
'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
57-
'H21', 'H22', "HO3'", "H5'", "C3'", 'N2', 'C6', "H4'"},
58-
'DT': {"C4'", "C2'", 'C2', 'O2', 'O4', "C1'", 'OP2', 'N3', 'C5', 'P',
59-
"H5''", 'H6', "C5'", "H3'", 'C4', 'N1', 'C7', "H1'", 'H73', 'HOP3',
60-
'H3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
61-
'H71', "HO3'", "H5'", "C3'", 'H72', 'C6', "H4'"},
62-
'G': {"C4'", "C2'", 'C2', "C1'", 'N7', 'OP2', 'N3', 'C5', 'P', "H5''",
63-
"C5'", 'O6', 'H1', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8', 'N9',
64-
'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'",
65-
"O2'", 'H21', 'H22', "HO3'", "H5'", "C3'", 'N2', 'C6', "H4'"},
66-
'GLN': {'HB2', 'CG', 'H', 'H2', 'N', 'HG2', 'HE22', 'CA', 'HG3', 'HE21',
67-
'HB3', 'HXT', 'O', 'NE2', 'C', 'OE1', 'CB', 'CD', 'HA', 'OXT'},
68-
'GLU': {'HB2', 'CG', 'H', 'H2', 'N', 'HG2', 'CA', 'HG3', 'HB3', 'HXT',
69-
'O', 'HE2', 'C', 'OE2', 'OE1', 'CB', 'CD', 'HA', 'OXT'},
70-
'GLY': {'HA3', 'HXT', 'CA', 'O', 'HA2', 'H', 'N', 'C', 'H2', 'OXT'},
71-
'HIS': {'HB2', 'CG', 'CE1', 'HE1', 'H', 'ND1', 'H2', 'N', 'CA', 'HD1',
72-
'HB3', 'HXT', 'O', 'HE2', 'NE2', 'C', 'CD2', 'CB', 'HA', 'HD2',
73-
'OXT'},
74-
'ILE': {'HD11', 'CG1', 'H', 'HD12', 'H2', 'N', 'CA', 'HD13', 'HG13',
75-
'HXT', 'O', 'HB', 'C', 'CD1', 'HG23', 'HG22', 'HG21', 'HG12',
76-
'CB', 'CG2', 'HA', 'OXT'},
77-
'LEU': {'HD11', 'HB2', 'HD22', 'CG', 'HD21', 'H', 'HD12', 'H2', 'N',
78-
'HD23', 'CA', 'HD13', 'HB3', 'HXT', 'O', 'C', 'CD2', 'CD1', 'CB',
79-
'HA', 'HG', 'OXT'},
80-
'LYS': {'HB2', 'CG', 'CE', 'H', 'H2', 'N', 'HG2', 'HE3', 'CA', 'HG3',
81-
'HB3', 'HXT', 'O', 'HE2', 'HZ1', 'HZ3', 'C', 'HD3', 'CB', 'CD',
82-
'HA', 'HZ2', 'HD2', 'NZ', 'OXT'},
83-
'MET': {'HB2', 'CG', 'HE1', 'CE', 'H', 'H2', 'N', 'HG2', 'HE3', 'CA',
84-
'HG3', 'SD', 'HB3', 'HXT', 'O', 'HE2', 'C', 'CB', 'HA', 'OXT'},
85-
'PHE': {'HB2', 'CG', 'CE1', 'HE1', 'H', 'H2', 'N', 'HZ', 'CA', 'HD1',
86-
'CZ', 'HB3', 'HXT', 'O', 'HE2', 'C', 'CD2', 'CD1', 'CB', 'CE2',
87-
'HA', 'HD2', 'OXT'},
88-
'PRO': {'HB3', 'HB2', 'N', 'CG', 'O', 'CB', 'HG2', 'HXT', 'CD', 'C', 'HA',
89-
'CA', 'HD2', 'H', 'HG3', 'HD3', 'OXT'},
90-
'SER': {'H2', 'HB3', 'HB2', 'N', 'HXT', 'O', 'CB', 'C', 'HA', 'HG', 'CA',
91-
'H', 'OG', 'OXT'},
92-
'THR': {'H2', 'HXT', 'N', 'HG23', 'O', 'CB', 'CG2', 'OG1', 'HB', 'C',
93-
'HA', 'CA', 'HG22', 'H', 'HG1', 'HG21', 'OXT'},
94-
'TRP': {'HB2', 'CG', 'CE3', 'CZ3', 'HE1', 'H', 'H2', 'N', 'HE3', 'CA',
95-
'CZ2', 'HD1', 'HB3', 'HXT', 'O', 'HZ3', 'C', 'CD2', 'CD1', 'NE1',
96-
'CB', 'HH2', 'CE2', 'HA', 'CH2', 'HZ2', 'OXT'},
97-
'U': {"C4'", "C2'", 'C2', 'O2', 'H5', 'O4', "C1'", 'OP2', 'N3', 'C5', 'P',
98-
"H5''", 'H6', "C5'", "H3'", 'C4', 'N1', "H1'", 'HOP3', 'H3', 'OP1',
99-
"O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'", "O2'", "HO3'",
100-
"H5'", "C3'", 'C6', "H4'"},
101-
'VAL': {'CG1', 'H', 'H2', 'N', 'CA', 'HG13', 'HXT', 'O', 'HB', 'C',
102-
'HG23', 'HG22', 'HG21', 'HG12', 'CB', 'CG2', 'HA', 'OXT', 'HG11'}
103-
}
104-
10524

10625
def _is_subrange(rng1, rng2):
10726
"""Return True iff rng1 is wholly inside rng2"""
@@ -1964,7 +1883,6 @@ def __get_assembly_checker(self):
19641883

19651884
def dump_atoms(self, system, writer, add_ihm=True):
19661885
seen_types = {}
1967-
seen_atom_names = collections.defaultdict(set)
19681886
ordinal = itertools.count(1)
19691887
it = ["group_PDB", "id", "type_symbol", "label_atom_id",
19701888
"label_alt_id", "label_comp_id", "label_seq_id", "auth_seq_id",
@@ -1986,7 +1904,6 @@ def dump_atoms(self, system, writer, add_ihm=True):
19861904
label_seq_id = None
19871905
comp = atom.asym_unit.sequence[seq_id - 1]
19881906
seen_types[atom.type_symbol] = None
1989-
seen_atom_names[comp.id].add(atom.atom_id)
19901907
auth_seq_id, ins = \
19911908
atom.asym_unit._get_auth_seq_id_ins_code(seq_id)
19921909
lp.write(id=next(ordinal),
@@ -2007,26 +1924,8 @@ def dump_atoms(self, system, writer, add_ihm=True):
20071924
pdbx_PDB_model_num=model._id,
20081925
ihm_model_id=model._id)
20091926
self._assembly_checker.add_model_asyms(model, seen_asym_ids)
2010-
if self._check:
2011-
self._check_atom_names(seen_atom_names)
20121927
return seen_types
20131928

2014-
def _check_atom_names(self, seen_atom_names):
2015-
"""Check that only standard atom names are used for known
2016-
residue types"""
2017-
def _get_non_canon():
2018-
for restyp, atoms in seen_atom_names.items():
2019-
if restyp in KNOWN_ATOM_NAMES:
2020-
non_canon_atoms = atoms - KNOWN_ATOM_NAMES[restyp]
2021-
if non_canon_atoms:
2022-
yield restyp, non_canon_atoms
2023-
non_canon = sorted(_get_non_canon(), key=operator.itemgetter(0))
2024-
if non_canon:
2025-
raise ValueError(
2026-
"Non-canonical atom names found in the following residues: "
2027-
+ "; ".join("%s: %r" % (restyp, sorted(atoms))
2028-
for (restyp, atoms) in non_canon))
2029-
20301929

20311930
class _ModelDumper(_ModelDumperBase):
20321931
def dump(self, system, writer):

ihm/util/make_mmcif.py

Lines changed: 130 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,93 @@
2929
import ihm.util
3030
import os
3131
import argparse
32-
33-
34-
def add_ihm_info(s, fix_histidines):
32+
import collections
33+
import operator
34+
35+
36+
# All canonical atom names for each standard residue type, as per CCD.
37+
# This is generated using the util/get_ccd_atoms.py script.
38+
KNOWN_ATOM_NAMES = {
39+
'A': {"C4'", "C2'", 'C2', "C1'", 'N7', 'H62', 'OP2', 'N3', 'C5', 'P',
40+
"H5''", 'H2', "C5'", 'H61', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8',
41+
'N9', 'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'N6',
42+
'HOP2', "O5'", "O2'", "HO3'", "H5'", "C3'", 'C6', "H4'"},
43+
'ALA': {'H2', 'HB1', 'HB3', 'HB2', 'N', 'HXT', 'O', 'CB', 'C', 'HA', 'CA',
44+
'H', 'OXT'},
45+
'ARG': {'HB2', 'CG', 'NE', 'H', 'H2', 'HH22', 'N', 'HG2', 'CA', 'NH2',
46+
'HH11', 'HG3', 'HH21', 'CZ', 'HB3', 'HXT', 'O', 'C', 'HD3', 'HH12',
47+
'CB', 'NH1', 'CD', 'HA', 'HD2', 'HE', 'OXT'},
48+
'ASN': {'H2', 'HB3', 'HD22', 'HB2', 'N', 'CG', 'O', 'CB', 'ND2', 'HXT',
49+
'C', 'HA', 'HD21', 'CA', 'OD1', 'H', 'OXT'},
50+
'ASP': {'H2', 'HB3', 'HB2', 'N', 'CG', 'O', 'CB', 'HXT', 'C', 'HA', 'OD2',
51+
'CA', 'OD1', 'HD2', 'H', 'OXT'},
52+
'C': {"C4'", "C2'", 'C2', 'O2', 'H42', 'H5', "C1'", 'OP2', 'N3', 'C5',
53+
'P', "H5''", 'H41', 'H6', "C5'", "H3'", 'C4', 'N1', 'N4', "H1'",
54+
'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'",
55+
"O2'", "HO3'", "H5'", "C3'", 'C6', "H4'"},
56+
'CYS': {'H2', 'HB3', 'HB2', 'N', 'SG', 'O', 'CB', 'HXT', 'C', 'HA', 'HG',
57+
'CA', 'H', 'OXT'},
58+
'DA': {"C4'", "C2'", 'C2', "C1'", 'N7', 'H62', 'OP2', 'N3', 'C5', 'P',
59+
"H5''", 'H2', "C5'", 'H61', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8',
60+
'N9', 'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'N6', 'HOP2',
61+
"O5'", "H2''", "HO3'", "H5'", "C3'", 'C6', "H4'"},
62+
'DC': {"C4'", "C2'", 'C2', 'O2', 'H42', 'H5', "C1'", 'OP2', 'N3', 'C5',
63+
'P', "H5''", 'H41', 'H6', "C5'", "H3'", 'C4', 'N1', 'N4', "H1'",
64+
'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
65+
"HO3'", "H5'", "C3'", 'C6', "H4'"},
66+
'DG': {"C4'", "C2'", 'C2', "C1'", 'N7', 'OP2', 'N3', 'C5', 'P', "H5''",
67+
"C5'", 'O6', 'H1', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8', 'N9',
68+
'HOP3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
69+
'H21', 'H22', "HO3'", "H5'", "C3'", 'N2', 'C6', "H4'"},
70+
'DT': {"C4'", "C2'", 'C2', 'O2', 'O4', "C1'", 'OP2', 'N3', 'C5', 'P',
71+
"H5''", 'H6', "C5'", "H3'", 'C4', 'N1', 'C7', "H1'", 'H73', 'HOP3',
72+
'H3', 'OP1', "O4'", "H2'", 'OP3', "O3'", 'HOP2', "O5'", "H2''",
73+
'H71', "HO3'", "H5'", "C3'", 'H72', 'C6', "H4'"},
74+
'G': {"C4'", "C2'", 'C2', "C1'", 'N7', 'OP2', 'N3', 'C5', 'P', "H5''",
75+
"C5'", 'O6', 'H1', "H3'", 'C4', 'N1', 'H8', "H1'", 'C8', 'N9',
76+
'HOP3', 'OP1', "O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'",
77+
"O2'", 'H21', 'H22', "HO3'", "H5'", "C3'", 'N2', 'C6', "H4'"},
78+
'GLN': {'HB2', 'CG', 'H', 'H2', 'N', 'HG2', 'HE22', 'CA', 'HG3', 'HE21',
79+
'HB3', 'HXT', 'O', 'NE2', 'C', 'OE1', 'CB', 'CD', 'HA', 'OXT'},
80+
'GLU': {'HB2', 'CG', 'H', 'H2', 'N', 'HG2', 'CA', 'HG3', 'HB3', 'HXT',
81+
'O', 'HE2', 'C', 'OE2', 'OE1', 'CB', 'CD', 'HA', 'OXT'},
82+
'GLY': {'HA3', 'HXT', 'CA', 'O', 'HA2', 'H', 'N', 'C', 'H2', 'OXT'},
83+
'HIS': {'HB2', 'CG', 'CE1', 'HE1', 'H', 'ND1', 'H2', 'N', 'CA', 'HD1',
84+
'HB3', 'HXT', 'O', 'HE2', 'NE2', 'C', 'CD2', 'CB', 'HA', 'HD2',
85+
'OXT'},
86+
'ILE': {'HD11', 'CG1', 'H', 'HD12', 'H2', 'N', 'CA', 'HD13', 'HG13',
87+
'HXT', 'O', 'HB', 'C', 'CD1', 'HG23', 'HG22', 'HG21', 'HG12',
88+
'CB', 'CG2', 'HA', 'OXT'},
89+
'LEU': {'HD11', 'HB2', 'HD22', 'CG', 'HD21', 'H', 'HD12', 'H2', 'N',
90+
'HD23', 'CA', 'HD13', 'HB3', 'HXT', 'O', 'C', 'CD2', 'CD1', 'CB',
91+
'HA', 'HG', 'OXT'},
92+
'LYS': {'HB2', 'CG', 'CE', 'H', 'H2', 'N', 'HG2', 'HE3', 'CA', 'HG3',
93+
'HB3', 'HXT', 'O', 'HE2', 'HZ1', 'HZ3', 'C', 'HD3', 'CB', 'CD',
94+
'HA', 'HZ2', 'HD2', 'NZ', 'OXT'},
95+
'MET': {'HB2', 'CG', 'HE1', 'CE', 'H', 'H2', 'N', 'HG2', 'HE3', 'CA',
96+
'HG3', 'SD', 'HB3', 'HXT', 'O', 'HE2', 'C', 'CB', 'HA', 'OXT'},
97+
'PHE': {'HB2', 'CG', 'CE1', 'HE1', 'H', 'H2', 'N', 'HZ', 'CA', 'HD1',
98+
'CZ', 'HB3', 'HXT', 'O', 'HE2', 'C', 'CD2', 'CD1', 'CB', 'CE2',
99+
'HA', 'HD2', 'OXT'},
100+
'PRO': {'HB3', 'HB2', 'N', 'CG', 'O', 'CB', 'HG2', 'HXT', 'CD', 'C', 'HA',
101+
'CA', 'HD2', 'H', 'HG3', 'HD3', 'OXT'},
102+
'SER': {'H2', 'HB3', 'HB2', 'N', 'HXT', 'O', 'CB', 'C', 'HA', 'HG', 'CA',
103+
'H', 'OG', 'OXT'},
104+
'THR': {'H2', 'HXT', 'N', 'HG23', 'O', 'CB', 'CG2', 'OG1', 'HB', 'C',
105+
'HA', 'CA', 'HG22', 'H', 'HG1', 'HG21', 'OXT'},
106+
'TRP': {'HB2', 'CG', 'CE3', 'CZ3', 'HE1', 'H', 'H2', 'N', 'HE3', 'CA',
107+
'CZ2', 'HD1', 'HB3', 'HXT', 'O', 'HZ3', 'C', 'CD2', 'CD1', 'NE1',
108+
'CB', 'HH2', 'CE2', 'HA', 'CH2', 'HZ2', 'OXT'},
109+
'U': {"C4'", "C2'", 'C2', 'O2', 'H5', 'O4', "C1'", 'OP2', 'N3', 'C5', 'P',
110+
"H5''", 'H6', "C5'", "H3'", 'C4', 'N1', "H1'", 'HOP3', 'H3', 'OP1',
111+
"O4'", "H2'", "HO2'", 'OP3', "O3'", 'HOP2', "O5'", "O2'", "HO3'",
112+
"H5'", "C3'", 'C6', "H4'"},
113+
'VAL': {'CG1', 'H', 'H2', 'N', 'CA', 'HG13', 'HXT', 'O', 'HB', 'C',
114+
'HG23', 'HG22', 'HG21', 'HG12', 'CB', 'CG2', 'HA', 'OXT', 'HG11'}
115+
}
116+
117+
118+
def add_ihm_info(s, fix_histidines, check_atom_names):
35119
# Non-standard histidine names (protonation states)
36120
histidines = frozenset(('HIP', 'HID', 'HIE'))
37121

@@ -63,6 +147,8 @@ def add_ihm_info(s, fix_histidines):
63147
_get_not_modeled_residues(model))
64148
if fix_histidines:
65149
_fix_histidine_het_atoms(model, histidines)
150+
if check_atom_names != 'no':
151+
_check_atom_names(model)
66152
if fix_histidines:
67153
_fix_histidine_chem_comps(s, histidines)
68154
return s
@@ -79,6 +165,34 @@ def _fix_histidine_het_atoms(model, histidines):
79165
atom.het = False
80166

81167

168+
def _get_non_canon(seen_atom_names):
169+
"""Get all non-canonical atom names for each residue type"""
170+
for restyp, atoms in seen_atom_names.items():
171+
# todo: if restyp not known, query Ligand Expo and parse the
172+
# resulting mmCIF
173+
if restyp in KNOWN_ATOM_NAMES:
174+
non_canon_atoms = atoms - KNOWN_ATOM_NAMES[restyp]
175+
if non_canon_atoms:
176+
yield restyp, non_canon_atoms
177+
178+
179+
def _check_atom_names(model):
180+
"""Check that only standard atom names are used for known
181+
residue types"""
182+
seen_atom_names = collections.defaultdict(set)
183+
for atom in model._atoms:
184+
seq_id = 1 if atom.seq_id is None else atom.seq_id
185+
comp = atom.asym_unit.sequence[seq_id - 1]
186+
seen_atom_names[comp.id].add(atom.atom_id)
187+
non_canon = sorted(_get_non_canon(seen_atom_names),
188+
key=operator.itemgetter(0))
189+
if non_canon:
190+
raise ValueError(
191+
"Non-canonical atom names found in the following residues: "
192+
+ "; ".join("%s: %r" % (restyp, sorted(atoms))
193+
for (restyp, atoms) in non_canon))
194+
195+
82196
def _fix_histidine_chem_comps(s, histidines):
83197
"""Change any non-standard histidine chemical components to normal HIS"""
84198
his = ihm.LPeptideAlphabet()['H']
@@ -130,15 +244,15 @@ def _get_not_modeled_residues(model):
130244
yield ihm.model.NotModeledResidueRange(asym, r[0], r[1])
131245

132246

133-
def add_ihm_info_one_system(fname, fix_histidines):
247+
def add_ihm_info_one_system(fname, fix_histidines, check_atom_names):
134248
"""Read mmCIF file `fname`, which must contain a single System, and
135249
return it with any missing IHM data added."""
136250
with open(fname) as fh:
137251
systems = ihm.reader.read(fh)
138252
if len(systems) != 1:
139253
raise ValueError("mmCIF file %s must contain exactly 1 data block "
140254
"(%d found)" % (fname, len(systems)))
141-
return add_ihm_info(systems[0], fix_histidines)
255+
return add_ihm_info(systems[0], fix_histidines, check_atom_names)
142256

143257

144258
def combine(s, other_s):
@@ -256,6 +370,11 @@ def get_args():
256370
p.add_argument("--histidines", action='store_true', dest="fix_histidines",
257371
help="Convert any non-standard histidine names (HIP, HID, "
258372
"HIE, for different protonation states) to HIS")
373+
p.add_argument('--check_atom_names', choices=['no', 'standard'],
374+
dest="check_atom_names", default='no',
375+
help="If 'standard', check for non-canonical atom names "
376+
"in standard amino acid and nucleic acid chemical "
377+
"components")
259378
return p.parse_args()
260379

261380

@@ -267,9 +386,11 @@ def main():
267386
raise ValueError("Input and output are the same file")
268387

269388
if args.add:
270-
s = add_ihm_info_one_system(args.input, args.fix_histidines)
389+
s = add_ihm_info_one_system(args.input, args.fix_histidines,
390+
args.check_atom_names)
271391
for other in args.add:
272-
other_s = add_ihm_info_one_system(other, args.fix_histidines)
392+
other_s = add_ihm_info_one_system(other, args.fix_histidines,
393+
args.check_atom_names)
273394
combine(s, other_s)
274395
with open(args.output, 'w') as fhout:
275396
ihm.dumper.write(
@@ -279,7 +400,8 @@ def main():
279400
with open(args.input) as fh:
280401
with open(args.output, 'w') as fhout:
281402
ihm.dumper.write(
282-
fhout, [add_ihm_info(s, args.fix_histidines)
403+
fhout, [add_ihm_info(s, args.fix_histidines,
404+
args.check_atom_names)
283405
for s in ihm.reader.read(fh)],
284406
variant=ihm.dumper.IgnoreVariant(['_audit_conform']))
285407

test/input/non_canon_atom.cif

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
data_model
2+
#
3+
_exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31'
4+
#
5+
_modeller.version 9.24
6+
#
7+
loop_
8+
_struct_asym.id
9+
_struct_asym.entity_id
10+
_struct_asym.details
11+
A 1 ?
12+
#
13+
loop_
14+
_entity_poly_seq.entity_id
15+
_entity_poly_seq.num
16+
_entity_poly_seq.mon_id
17+
1 1 VAL
18+
1 2 GLY
19+
1 3 GLN
20+
1 4 MSE
21+
#
22+
loop_
23+
_atom_site.group_PDB
24+
_atom_site.type_symbol
25+
_atom_site.label_atom_id
26+
_atom_site.label_alt_id
27+
_atom_site.label_comp_id
28+
_atom_site.label_asym_id
29+
_atom_site.auth_asym_id
30+
_atom_site.label_seq_id
31+
_atom_site.auth_seq_id
32+
_atom_site.pdbx_PDB_ins_code
33+
_atom_site.Cartn_x
34+
_atom_site.Cartn_y
35+
_atom_site.Cartn_z
36+
_atom_site.occupancy
37+
_atom_site.B_iso_or_equiv
38+
_atom_site.label_entity_id
39+
_atom_site.id
40+
_atom_site.pdbx_PDB_model_num
41+
ATOM C bad1 . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 1 1
42+
ATOM C bad2 . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1
43+
ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 3 1
44+
ATOM C bad3 . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 4 1
45+
ATOM C ig1 . MSE A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 5 1

0 commit comments

Comments
 (0)