@@ -70,7 +70,7 @@ class DataModel:
7070 def __init__ (
7171 self ,
7272 xsd_file : str ,
73- short_name : str = None ,
73+ short_name : str = "DocumentRoot" ,
7474 long_name : str = None ,
7575 base_url : str = None ,
7676 model_config : dict = None ,
@@ -226,8 +226,7 @@ def _build_model(self):
226226 """
227227 # parse the XML schema recursively and hold a reference to the head table
228228 root_table = self ._parse_tree (
229- self .xml_schema [0 ] if len (self .xml_schema ) == 1 else self .xml_schema ,
230- is_root_table = True ,
229+ self .xml_schema [0 ] if len (self .xml_schema ) == 1 else self .xml_schema
231230 )
232231 self .root_table = root_table .type_name
233232 # compute a text representation of the original data model and store it
@@ -273,9 +272,7 @@ def _build_model(self):
273272 for tb in self .fk_ordered_tables :
274273 tb .build_sqlalchemy_tables ()
275274
276- def _parse_tree (
277- self , parent_node : xmlschema .XsdElement , is_root_table : bool = False
278- ):
275+ def _parse_tree (self , parent_node : xmlschema .XsdElement , nodes_path : list = None ):
279276 """Parse a node of an XML schema recursively and create a target data model without any simplification
280277
281278 We parse the XSD tree recursively to create for each node (basically a complex type in the XSD) an equivalent \
@@ -289,7 +286,7 @@ def _parse_tree(
289286
290287 Args:
291288 parent_node: the current XSD node being parsed
292- is_root_table: True if this is the root table
289+ nodes_path: a list of nodes types from the root node
293290 """
294291
295292 # find current node type and name and returns corresponding table if it already exists
@@ -301,12 +298,16 @@ def _parse_tree(
301298 if parent_type is None :
302299 parent_type = parent_node .local_name
303300
301+ nodes_path = (nodes_path if nodes_path else []) + [parent_type ]
302+
304303 # if this type has already been encountered, stop here and return existing table
305304 if parent_type in self .tables :
306305 parent_table = self .tables [parent_type ]
307306 return parent_table
308307
309- # elements names and types should be bijective. If an element name is used for different types,
308+ # For database tables we use element names rather than XSD types, under the assumption that they are often
309+ # more meaningful given that they are the one which appear in XML documents. However, same names can be used
310+ # for different XSD types, so if an element name is used for different types,
310311 # we add a suffix to the name to make it unique again (using a dict to keep the name/type association)
311312 parent_name = (
312313 parent_node .local_name
@@ -324,7 +325,7 @@ def _parse_tree(
324325 parent_table = self ._create_table_model (
325326 parent_name ,
326327 parent_type ,
327- is_root_table ,
328+ len ( nodes_path ) == 1 ,
328329 isinstance (parent_node , xmlschema .XMLSchema ),
329330 )
330331 self .tables [parent_type ] = parent_table
@@ -363,6 +364,13 @@ def recurse_parse_simple_type(elem_type):
363364 if elem_type .base_type
364365 else recurse_parse_simple_type (elem_type .member_types )
365366 )
367+ if elem_type .is_list ():
368+ return (
369+ "string" ,
370+ 0 ,
371+ None ,
372+ elem_type .allow_empty ,
373+ )
366374 if elem_type .is_restriction ():
367375 dt = elem_type .base_type .local_name
368376 mil = elem_type .min_length
@@ -384,7 +392,12 @@ def recurse_parse_simple_type(elem_type):
384392 else None
385393 )
386394 ae = ae and bt_ae if ae is not None and bt_ae is not None else None
387- if elem_type .enumeration is not None and dt in ["string" , "NMTOKEN" , "duration" , "token" ]:
395+ if elem_type .enumeration is not None and dt in [
396+ "string" ,
397+ "NMTOKEN" ,
398+ "duration" ,
399+ "token" ,
400+ ]:
388401 mil = min ([len (val ) for val in elem_type .enumeration ])
389402 mal = max ([len (val ) for val in elem_type .enumeration ])
390403 return dt , mil , mal , ae
@@ -410,25 +423,31 @@ def get_occurs(particle):
410423 ),
411424 ]
412425
413- # go through item attributes and add them as columns
426+ # go through item attributes and add them as columns, adding a suffix if an element with the same name exists
427+ children_names = None
414428 for attrib_name , attrib in parent_node .attributes .items ():
429+ if children_names is None :
430+ children_names = [child .local_name for child in parent_node ]
415431 (
416432 data_type ,
417433 min_length ,
418434 max_length ,
419435 allow_empty ,
420436 ) = recurse_parse_simple_type ([attrib .type ])
437+ suffix = attrib_name in children_names
421438 parent_table .add_column (
422- f"{ attrib_name } " ,
439+ f"{ attrib_name } { '_attr' if suffix else '' } " ,
423440 data_type ,
424441 [0 , 1 ],
425442 min_length ,
426443 max_length ,
427444 True ,
445+ suffix ,
428446 False ,
429447 allow_empty ,
430448 None ,
431449 )
450+
432451 nested_containers = []
433452 # go through the children to add either arguments either relations to the current element
434453 for child in parent_node :
@@ -454,6 +473,7 @@ def get_occurs(particle):
454473 if child .parent
455474 and child .parent .max_occurs != 1
456475 and child .parent .model != "choice"
476+ and child .max_occurs == 1
457477 else None
458478 ),
459479 )
@@ -482,32 +502,39 @@ def get_occurs(particle):
482502 max_length ,
483503 False ,
484504 False ,
505+ False ,
485506 allow_empty ,
486507 nested_containers [- 1 ][1 ],
487508 )
488509
489510 elif ct .is_complex ():
490- child_table = self ._parse_tree (child )
491- child_table .model_group = (
492- "choice"
493- if ct .model_group and ct .model_group .model == "choice"
494- else "sequence"
495- )
496- occurs = get_occurs (child )
497- if child .is_single ():
498- parent_table .add_relation_1 (
499- child .local_name ,
500- child_table ,
501- occurs ,
502- nested_containers [- 1 ][1 ],
511+ # ignoring recursive definitions by skipping these fields
512+ if child .type .local_name in nodes_path :
513+ logger .warning (
514+ f"type '{ child .type .local_name } ' contains a recursive definition"
503515 )
504516 else :
505- parent_table . add_relation_n (
506- child . local_name ,
507- child_table ,
508- occurs ,
509- nested_containers [ - 1 ][ 1 ],
517+ child_table = self . _parse_tree ( child , nodes_path )
518+ child_table . model_group = (
519+ "choice"
520+ if ct . model_group and ct . model_group . model == "choice"
521+ else "sequence"
510522 )
523+ occurs = get_occurs (child )
524+ if occurs [1 ] == 1 :
525+ parent_table .add_relation_1 (
526+ child .local_name ,
527+ child_table ,
528+ occurs ,
529+ nested_containers [- 1 ][1 ],
530+ )
531+ else :
532+ parent_table .add_relation_n (
533+ child .local_name ,
534+ child_table ,
535+ occurs ,
536+ nested_containers [- 1 ][1 ],
537+ )
511538 else :
512539 raise ValueError ("unknown case; please check" )
513540 else :
@@ -534,6 +561,7 @@ def get_occurs(particle):
534561 min_length ,
535562 max_length ,
536563 False ,
564+ False ,
537565 True ,
538566 allow_empty ,
539567 None ,
@@ -544,31 +572,19 @@ def get_occurs(particle):
544572 def _repr_tree (
545573 self ,
546574 parent_table : Union [DataModelTableReused , DataModelTableDuplicated ],
547- visited_nodes : Union [set , None ] = None ,
548575 ):
549576 """Build a text representation of the data model tree
550577
551578 Args:
552579 parent_table: the current data model table object
553580 """
554- if visited_nodes is None :
555- visited_nodes = set ()
556- else :
557- visited_nodes = {item for item in visited_nodes }
558- visited_nodes .add (parent_table .name )
559581 for field_type , name , field in parent_table .fields :
560582 if field_type == "col" :
561583 yield f"{ field .name } { field .occurs } : { field .data_type } "
562- elif field_type == "rel1" :
563- mg = " (choice)" if field .other_table .model_group == "choice" else ""
564- yield f"{ field .name } { field .occurs } { mg } :{ ' ...' if field_type in visited_nodes else '' } "
565- if field .other_table .name not in visited_nodes :
566- for line in self ._repr_tree (field .other_table , visited_nodes ):
567- yield f" { line } "
568- elif field_type == "reln" :
584+ else :
569585 mg = " (choice)" if field .other_table .model_group == "choice" else ""
570- yield f"{ field .name } { field .occurs } { mg } :{ ' ...' if field_type in visited_nodes else '' } "
571- for line in self ._repr_tree (field .other_table , visited_nodes ):
586+ yield f"{ field .name } { field .occurs } { mg } :"
587+ for line in self ._repr_tree (field .other_table ):
572588 yield f" { line } "
573589
574590 def get_entity_rel_diagram (self , text_context : bool = True ) -> str :
0 commit comments