3434import collections
3535import operator
3636import warnings
37+ import inspect
3738
3839
3940# All canonical atom names for each standard residue type, as per CCD.
118119}
119120
120121
121- def add_ihm_info (s , fix_histidines , check_atom_names ):
122+ def add_ihm_info (s , fix_histidines , check_atom_names , fix_chem_comp ):
122123 # Non-standard histidine names (protonation states)
123124 histidines = frozenset (('HIP' , 'HID' , 'HIE' ))
124125
@@ -155,6 +156,8 @@ def add_ihm_info(s, fix_histidines, check_atom_names):
155156 if fix_histidines :
156157 _fix_histidine_chem_comps (s , histidines )
157158 _fix_empty_assemblies (s )
159+ if fix_chem_comp :
160+ _fix_incomplete_chem_comps (s )
158161 return s
159162
160163
@@ -180,20 +183,25 @@ def __call__(self, comp_id, atom_id):
180183 self .atoms [comp_id ].add (atom_id )
181184
182185
183- def _get_non_std_restyp (restyp ):
184- """Return CCD info for the given residue type """
186+ def _read_ccd (restyp , category_handler ):
187+ """Read the given residue type from CCD using the given CIF handler(s) """
185188 url_top = 'https://files.rcsb.org'
186189 url_pattern = url_top + '/pub/pdb/refdata/chem_comp/%s/%s/%s.cif'
187190 url = url_pattern % (restyp [- 1 ], restyp , restyp )
188- cca = _ChemCompAtomHandler ()
189191 try :
190192 with urllib .request .urlopen (url ) as fh :
191- c = ihm .format .CifReader (fh ,
192- category_handler = { '_chem_comp_atom' : cca } )
193+ c = ihm .format .CifReader (
194+ fh , category_handler = category_handler )
193195 c .read_file ()
194196 except urllib .error .URLError as exc :
195197 warnings .warn (
196198 "Component %s could not be found in CCD: %s" % (restyp , exc ))
199+
200+
201+ def _get_non_std_restyp (restyp ):
202+ """Return CCD atom info for the given residue type"""
203+ cca = _ChemCompAtomHandler ()
204+ _read_ccd (restyp , {'_chem_comp_atom' : cca })
197205 return cca .atoms
198206
199207
@@ -290,15 +298,48 @@ def _fix_empty_assemblies(s):
290298 s .complete_assembly .description = asmb .description
291299
292300
293- def add_ihm_info_one_system (fname , fix_histidines , check_atom_names ):
301+ class _ChemCompHandler :
302+ """Read the _chem_comp table from a CCD entry"""
303+ not_in_file = omitted = unknown = None
304+
305+ def __call__ (self , name , type , formula ):
306+ self .name , self .type , self .formula = name , type , formula
307+
308+
309+ def _fix_incomplete_chem_comps (s ):
310+ """Add any missing information to ChemComps using CCD"""
311+ # Map type to ChemComp subclass. Map nonpolymer to NonPolyChemComp,
312+ # not WaterChemComp
313+ typmap = {x [1 ].type .lower (): x [1 ]
314+ for x in inspect .getmembers (ihm , inspect .isclass )
315+ if issubclass (x [1 ], ihm .ChemComp )
316+ and x [1 ] is not ihm .WaterChemComp }
317+ for cc in s ._orphan_chem_comps :
318+ if cc .type == 'other' or cc .name is None or cc .formula is None :
319+ _fix_chem_comp (cc , typmap )
320+
321+
322+ def _fix_chem_comp (cc , typmap ):
323+ """Add missing information to a single ChemComp from CCD"""
324+ h = _ChemCompHandler ()
325+ _read_ccd (cc .id .upper (), {'_chem_comp' : h })
326+ if hasattr (h , 'name' ) and h .name is not None :
327+ cc .name = h .name
328+ cc .formula = h .formula
329+ cc .__class__ = typmap .get (h .type .lower (), ihm .ChemComp )
330+
331+
332+ def add_ihm_info_one_system (fname , fix_histidines , check_atom_names ,
333+ fix_chem_comp ):
294334 """Read mmCIF file `fname`, which must contain a single System, and
295335 return it with any missing IHM data added."""
296336 with open (fname ) as fh :
297337 systems = ihm .reader .read (fh )
298338 if len (systems ) != 1 :
299339 raise ValueError ("mmCIF file %s must contain exactly 1 data block "
300340 "(%d found)" % (fname , len (systems )))
301- return add_ihm_info (systems [0 ], fix_histidines , check_atom_names )
341+ return add_ihm_info (systems [0 ], fix_histidines , check_atom_names ,
342+ fix_chem_comp )
302343
303344
304345def combine (s , other_s ):
@@ -422,6 +463,10 @@ def get_args():
422463 "in standard amino acid and nucleic acid chemical "
423464 "components; if 'all', also check non-standard "
424465 "residue types by querying CCD (needs network access)" )
466+ p .add_argument ("--fix_chem_comp" , action = 'store_true' ,
467+ dest = "fix_chem_comp" ,
468+ help = "Add any missing data to the chem_comp table by"
469+ "querying CCD (needs network access)" )
425470 return p .parse_args ()
426471
427472
@@ -434,10 +479,12 @@ def main():
434479
435480 if args .add :
436481 s = add_ihm_info_one_system (args .input , args .fix_histidines ,
437- args .check_atom_names )
482+ args .check_atom_names ,
483+ args .fix_chem_comp )
438484 for other in args .add :
439485 other_s = add_ihm_info_one_system (other , args .fix_histidines ,
440- args .check_atom_names )
486+ args .check_atom_names ,
487+ args .fix_chem_comp )
441488 combine (s , other_s )
442489 with open (args .output , 'w' ) as fhout :
443490 ihm .dumper .write (
@@ -448,7 +495,8 @@ def main():
448495 with open (args .output , 'w' ) as fhout :
449496 ihm .dumper .write (
450497 fhout , [add_ihm_info (s , args .fix_histidines ,
451- args .check_atom_names )
498+ args .check_atom_names ,
499+ args .fix_chem_comp )
452500 for s in ihm .reader .read (fh )],
453501 variant = ihm .dumper .IgnoreVariant (['_audit_conform' ]))
454502
0 commit comments