3 from pathlib
import Path
4 import xml.etree.ElementTree
as ET
9 Parses and extracts docs from Doxygen-generated XML.
23 cpp_class: str) -> Path |
None:
24 """Finds the XML file path for a given class name using the index."""
25 xml_folder_path = Path(xml_folder)
26 xml_index_file = xml_folder_path /
"index.xml"
28 index_tree = self.
parse_xml(xml_index_file)
31 f
"Index file {xml_index_file} was empty or failed to parse.")
34 index_root = index_tree.getroot()
35 class_index = index_root.find(
36 f
".//compound[@kind='class'][name='{cpp_class}']")
37 if class_index
is None:
39 class_index = index_root.find(
40 f
".//compound[@kind='struct'][name='{cpp_class}']")
42 if class_index
is None:
44 f
"Class or Struct '{cpp_class}' not found in index file {xml_index_file}."
48 refid = class_index.attrib.get(
'refid')
51 f
"Class or Struct '{cpp_class}' found in index, but has no refid."
55 return xml_folder_path / f
"{refid}.xml"
59 Get the ElementTree of an XML file given the file name.
60 Uses a cache to avoid re-parsing.
61 If an error occurs, prints a warning and returns None.
63 xml_file_path = Path(xml_file)
64 file_key =
str(xml_file_path.resolve())
70 tree = ET.parse(xml_file_path)
73 except FileNotFoundError:
74 print(f
"Warning: XML file '{xml_file_path}' not found.")
78 print(f
"Warning: Failed to parse XML file '{xml_file_path}'.")
83 cpp_method: str, method_args_names:
'list[str]'):
85 Extract the docstrings for a C++ class's method from the Doxygen-generated XML.
86 If not found in the specified class, searches parent classes recursively.
89 xml_folder (str): The path to the folder that contains all of the Doxygen-generated XML.
90 cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
91 cpp_method (str): The name of the C++ method whose docstring is to be extracted.
92 method_args_names (list): A list of the names of the cpp_method's parameters.
95 f
"--- Attempting to extract docs for {cpp_class}.{cpp_method} ---")
99 if not xml_class_file:
101 f
"Could not find XML file for class {cpp_class}.")
105 class_tree = self.
parse_xml(xml_class_file)
108 f
"Class file {xml_class_file} was empty or failed to parse.")
111 class_root = class_tree.getroot()
115 class_root, cpp_method)
119 maybe_member_defs, method_args_names, cpp_class,
126 cpp_class, cpp_method, method_args_names, member_defs)
129 if documenting_index <
len(member_defs):
131 f
"Found direct documentation for {cpp_class}.{cpp_method}."
135 member_defs[documenting_index], ignored_params)
138 f
"Calculated documenting_index {documenting_index} is out of bounds for {cpp_class}.{cpp_method} (len={len(member_defs)})."
144 f
"No direct documentation found for {cpp_class}.{cpp_method} with matching args. Checking base classes..."
148 compound_def = class_root.find(
"compounddef")
149 if compound_def
is None:
151 f
"Could not find <compounddef> in {xml_class_file}")
154 base_refs = compound_def.findall(
"basecompoundref")
155 for base_ref
in base_refs:
156 base_refid = base_ref.attrib.get(
"refid")
162 parent_xml_file = Path(xml_folder) / f
"{base_refid}.xml"
163 parent_tree = self.
parse_xml(parent_xml_file)
166 f
"Could not parse parent XML file {parent_xml_file} for refid {base_refid}."
170 parent_root = parent_tree.getroot()
171 parent_compound_def = parent_root.find(
"compounddef")
172 if parent_compound_def
is None:
174 f
"Could not find <compounddef> in parent XML {parent_xml_file}."
178 parent_name_element = parent_compound_def.find(
"compoundname")
179 if parent_name_element
is None or not parent_name_element.text:
181 f
"Could not find <compoundname> in parent XML {parent_xml_file}."
185 parent_class_name = parent_name_element.text
187 f
"Recursively searching for {cpp_method} in base class: {parent_class_name} (refid: {base_refid})"
199 f
"Found documentation for {cpp_method} in base class {parent_class_name}."
205 return parent_docstring
208 f
"Method {cpp_method} not found or documented in base class {parent_class_name}."
213 f
"Method {cpp_method} with matching args not documented in {cpp_class} or any base classes."
218 cpp_method: str) -> list[ET.Element]:
219 """Finds member definitions for a method name within a given class XML root."""
222 member_defs = class_root.findall(
223 f
"compounddef/sectiondef[@kind='public-func']/memberdef[@kind='function'][name='{cpp_method}']"
227 f
"compounddef/sectiondef[@kind='public-static-func']/memberdef[@kind='function'][name='{cpp_method}']"
232 f
"compounddef/sectiondef[@kind='user-defined']/memberdef[@kind='function'][name='{cpp_method}']"
239 method_args_names: list[str], cpp_class: str,
242 Remove member definitions which do not match the supplied argument names list.
245 maybe_member_defs (list): The list of all member definitions in the class which share the same name.
246 method_args_names (list): The list of argument names in the definition of the function whose documentation is desired.
247 cpp_class (str): The name of the class being investigated (for verbose output).
248 cpp_method (str): The name of the method being investigated (for verbose output).
251 tuple[list, list]: (the filtered member definitions, parameters which should be ignored because they are optional)
258 for maybe_member_def
in maybe_member_defs:
261 args_string_elem = maybe_member_def.find(
'argsstring')
262 args_string_text = args_string_elem.text
if args_string_elem
is not None else "[no argsstring]"
264 f
" Investigating potential match for {cpp_class}.{cpp_method}: "
265 f
"argstring '{args_string_text}' (loc: {maybe_member_def.find('location').attrib.get('file', '?')}:{maybe_member_def.find('location').attrib.get('line', '?')})"
269 params = maybe_member_def.findall(
"param")
270 num_tot_params =
len(params)
273 num_req_params = num_tot_params - sum([
274 1
if param.find(
"defval")
is not None else 0
279 f
" XML Params: Total={num_tot_params}, Required={num_req_params}. Provided args count: {len(method_args_names)}"
286 if len(method_args_names) != num_req_params
and len(
287 method_args_names) != num_tot_params:
289 f
" Parameter count mismatch. Skipping.")
296 if len(method_args_names) > num_tot_params:
299 f
" Provided args count ({len(method_args_names)}) > XML total params ({num_tot_params}). Skipping."
302 for i, arg_name
in enumerate(method_args_names):
304 param_elem = params[i]
306 param_name_elem = param_elem.find(
"declname")
308 if param_name_elem
is None:
309 param_name_elem = param_elem.find(
"defname")
311 if param_name_elem
is None or param_name_elem.text
is None:
315 f
" Could not find XML name for parameter index {i}. Skipping."
320 xml_param_name = param_name_elem.text
322 if arg_name != xml_param_name:
324 f
" Parameter name mismatch at index {i}: Provided='{arg_name}', XML='{xml_param_name}'. Skipping."
334 member_defs.append(maybe_member_def)
336 " Confirmed as candidate function by arg names and count.")
341 if len(method_args_names
342 ) == num_req_params
and num_req_params != num_tot_params:
344 f
" Matched on required params ({num_req_params}). Identifying ignored optional params..."
346 for i
in range(num_req_params, num_tot_params):
347 ignored_name_elem = params[i].find(
"declname")
348 if ignored_name_elem
is None:
349 ignored_name_elem = params[i].find(
"defname")
350 if ignored_name_elem
is not None and ignored_name_elem.text:
351 current_ignored.append(ignored_name_elem.text)
353 f
" Ignoring optional param: {ignored_name_elem.text}"
358 ignored_params = current_ignored
361 f
" Matched on total params ({num_tot_params}) or req==tot. No ignored optional params for this match."
366 return member_defs, ignored_params
369 method_args_names: list,
372 Determine which member definition to retrieve documentation from, if there are multiple.
375 cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
376 cpp_method (str): The name of the C++ method whose docstring is to be extracted.
377 method_args_names (list): A list of the names of the cpp_method's parameters.
378 member_defs (list): All of the member definitions of cpp_class which match cpp_method in name
379 and whose arguments have the same names as method_args_names.
382 int: The index indicating which member definition to document.
384 documenting_index = 0
385 num_matches =
len(member_defs)
393 function_key = f
"{cpp_class}.{cpp_method}({','.join(method_args_names) if method_args_names else ''})"
397 times_documented = self.
_memory.
get(function_key, -1)
400 documenting_index = times_documented + 1
405 if documenting_index >= num_matches:
407 f
" Memory index {documenting_index} >= num_matches {num_matches} for {function_key}. Wrapping to 0."
409 documenting_index = 0
413 self.
_memory[function_key] = documenting_index
416 f
" Multiple matches ({num_matches}) found for {function_key}. Using index {documenting_index} based on memory."
418 return documenting_index
421 ignored_params: list):
422 """Gets the formatted docstring for the supplied XML element representing a member definition.
425 member_def (xml.etree.ElementTree.Element): The member definition to document.
426 ignored_params (list): The optional parameters which should be ignored *for this specific overload match*, if any.
429 str: The formatted docstring. Returns empty string if member_def lacks documentation tags.
435 location_elem = member_def.find(
'location')
436 loc_info = f
"(from {location_elem.attrib.get('file', '?')}:{location_elem.attrib.get('line', '?')})" if location_elem
is not None else ""
439 brief_description = member_def.find(
"./briefdescription")
440 detailed_description = member_def.find(
"./detaileddescription")
443 if brief_description
is not None:
444 brief_text =
"".join(t.strip()
445 for para
in brief_description.findall(
"para")
446 for t
in para.itertext()
447 if t.strip()).strip()
449 docstring += brief_text
454 if detailed_description
is not None:
455 detailed_content =
""
457 has_detailed_para =
False
458 for element
in list(detailed_description):
460 if element.tag ==
"para" and not element.findall(
461 ".//parameterlist")
and not element.findall(
462 ".//simplesect[@kind='return']"
464 para_text =
"".join(t
for t
in element.itertext()
465 if t.strip()).strip()
467 detailed_content += para_text +
" "
468 has_detailed_para =
True
470 if has_detailed_para:
473 detailed_content = detailed_content.strip()
474 docstring += detailed_content
477 f
" Detailed Paras: {detailed_content}")
480 parameter_list = detailed_description.find(
481 ".//parameterlist[@kind='param']")
483 if parameter_list
is not None:
485 f
" Processing parameters (ignoring: {ignored_params})..."
487 for i, parameter_item
in enumerate(
488 parameter_list.findall(
"./parameteritem")):
489 name_elem = parameter_item.find(
490 "./parameternamelist/parametername")
491 desc_elem = parameter_item.find(
492 "./parameterdescription/para")
493 name = name_elem.text.strip(
494 )
if name_elem
is not None and name_elem.text
else f
'[Param {i+1}]'
496 desc =
"".join(t
for t
in desc_elem.itertext()
if t.strip(
498 )
if desc_elem
is not None else 'No description provided'
500 if name
not in ignored_params:
501 param_docs += f
"{name}: {desc}\n"
505 f
" Ignoring documented param: {name}")
509 docstring +=
"\n\nArgs:\n"
511 docstring +=
"Args:\n"
513 docstring += param_docs.strip()
518 return_sect = detailed_description.find(
519 ".//simplesect[@kind='return']")
521 if return_sect
is not None:
523 return_para = return_sect.find(
525 if return_para
is not None:
526 return_text =
"".join(t
for t
in return_para.itertext()
527 if t.strip()).strip()
529 return_doc = f
"Returns: {return_text}"
535 docstring +=
"\n\n" + return_doc
538 docstring +=
"\n\n" + return_doc
540 docstring += return_doc
543 final_docstring = docstring.strip()
544 if not final_docstring
and not has_content:
546 " No documentation content found in brief/detailed descriptions."
550 return final_docstring
554 Print text if the parser is in verbose mode.
560 if __name__ ==
"__main__":
561 if len(sys.argv) < 4
or len(sys.argv) > 5:
563 "Usage: python xml_parser.py <doxygen_xml_folder> <cpp_class> <cpp_method> <method_args_names (comma-separated)>"
568 parser._verbose =
True
569 xml_folder = sys.argv[1]
570 cpp_class = sys.argv[2]
571 cpp_method = sys.argv[3]
573 if len(sys.argv) == 5
and sys.argv[4]:
574 method_args = sys.argv[4].
split(
",")
576 extracted_doc = parser.extract_docstring(xml_folder, cpp_class, cpp_method,
579 print(
"\n--- Extracted Docstring ---")
580 print(extracted_doc
if extracted_doc
else "[No documentation found]")
581 print(
"---------------------------")