3 from pathlib 
import Path
 
    4 import xml.etree.ElementTree 
as ET
 
    9     Parses and extracts docs from Doxygen-generated XML. 
   23                             cpp_class: str) -> Path | 
None:
 
   24         """Finds the XML file path for a given class name using the index.""" 
   25         xml_folder_path = Path(xml_folder)
 
   26         xml_index_file = xml_folder_path / 
"index.xml" 
   28         index_tree = self.
parse_xml(xml_index_file)
 
   31                 f
"Index file {xml_index_file} was empty or failed to parse.")
 
   34         index_root = index_tree.getroot()
 
   35         class_index = index_root.find(
 
   36             f
".//compound[@kind='class'][name='{cpp_class}']")
 
   37         if class_index 
is None:
 
   39             class_index = index_root.find(
 
   40                 f
".//compound[@kind='struct'][name='{cpp_class}']")
 
   42         if class_index 
is None:
 
   44                 f
"Class or Struct '{cpp_class}' not found in index file {xml_index_file}." 
   48         refid = class_index.attrib.get(
'refid')
 
   51                 f
"Class or Struct '{cpp_class}' found in index, but has no refid." 
   55         return xml_folder_path / f
"{refid}.xml" 
   59         Get the ElementTree of an XML file given the file name. 
   60         Uses a cache to avoid re-parsing. 
   61         If an error occurs, prints a warning and returns None. 
   63         xml_file_path = Path(xml_file)
 
   64         file_key = 
str(xml_file_path.resolve())
 
   70             tree = ET.parse(xml_file_path)
 
   73         except FileNotFoundError:
 
   74             print(f
"Warning: XML file '{xml_file_path}' not found.")
 
   78             print(f
"Warning: Failed to parse XML file '{xml_file_path}'.")
 
   83                           cpp_method: str, method_args_names: 
'list[str]'):
 
   85         Extract the docstrings for a C++ class's method from the Doxygen-generated XML. 
   86         If not found in the specified class, searches parent classes recursively. 
   89             xml_folder (str): The path to the folder that contains all of the Doxygen-generated XML. 
   90             cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted. 
   91             cpp_method (str): The name of the C++ method whose docstring is to be extracted. 
   92             method_args_names (list): A list of the names of the cpp_method's parameters. 
   95             f
"--- Attempting to extract docs for {cpp_class}.{cpp_method} ---")
 
   99         if not xml_class_file:
 
  101                 f
"Could not find XML file for class {cpp_class}.")
 
  105         class_tree = self.
parse_xml(xml_class_file)
 
  108                 f
"Class file {xml_class_file} was empty or failed to parse.")
 
  111         class_root = class_tree.getroot()
 
  115             class_root, cpp_method)
 
  119             maybe_member_defs, method_args_names, cpp_class,
 
  126                 cpp_class, cpp_method, method_args_names, member_defs)
 
  129             if documenting_index < 
len(member_defs):
 
  131                     f
"Found direct documentation for {cpp_class}.{cpp_method}." 
  135                     member_defs[documenting_index], ignored_params)
 
  138                     f
"Calculated documenting_index {documenting_index} is out of bounds for {cpp_class}.{cpp_method} (len={len(member_defs)})." 
  144             f
"No direct documentation found for {cpp_class}.{cpp_method} with matching args. Checking base classes..." 
  148         compound_def = class_root.find(
"compounddef")
 
  149         if compound_def 
is None:
 
  151                 f
"Could not find <compounddef> in {xml_class_file}")
 
  154         base_refs = compound_def.findall(
"basecompoundref")
 
  155         for base_ref 
in base_refs:
 
  156             base_refid = base_ref.attrib.get(
"refid")
 
  162             parent_xml_file = Path(xml_folder) / f
"{base_refid}.xml" 
  163             parent_tree = self.
parse_xml(parent_xml_file)
 
  166                     f
"Could not parse parent XML file {parent_xml_file} for refid {base_refid}." 
  170             parent_root = parent_tree.getroot()
 
  171             parent_compound_def = parent_root.find(
"compounddef")
 
  172             if parent_compound_def 
is None:
 
  174                     f
"Could not find <compounddef> in parent XML {parent_xml_file}." 
  178             parent_name_element = parent_compound_def.find(
"compoundname")
 
  179             if parent_name_element 
is None or not parent_name_element.text:
 
  181                     f
"Could not find <compoundname> in parent XML {parent_xml_file}." 
  185             parent_class_name = parent_name_element.text
 
  187                 f
"Recursively searching for {cpp_method} in base class: {parent_class_name} (refid: {base_refid})" 
  199                     f
"Found documentation for {cpp_method} in base class {parent_class_name}." 
  205                 return parent_docstring
 
  208                     f
"Method {cpp_method} not found or documented in base class {parent_class_name}." 
  213             f
"Method {cpp_method} with matching args not documented in {cpp_class} or any base classes." 
  218                                   cpp_method: str) -> list[ET.Element]:
 
  219         """Finds member definitions for a method name within a given class XML root.""" 
  222         member_defs = class_root.findall(
 
  223             f
"compounddef/sectiondef[@kind='public-func']/memberdef[@kind='function'][name='{cpp_method}']" 
  227                 f
"compounddef/sectiondef[@kind='public-static-func']/memberdef[@kind='function'][name='{cpp_method}']" 
  232                 f
"compounddef/sectiondef[@kind='user-defined']/memberdef[@kind='function'][name='{cpp_method}']" 
  239                            method_args_names: list[str], cpp_class: str,
 
  242         Remove member definitions which do not match the supplied argument names list. 
  245             maybe_member_defs (list): The list of all member definitions in the class which share the same name. 
  246             method_args_names (list): The list of argument names in the definition of the function whose documentation is desired. 
  247             cpp_class (str): The name of the class being investigated (for verbose output). 
  248             cpp_method (str): The name of the method being investigated (for verbose output). 
  251             tuple[list, list]: (the filtered member definitions, parameters which should be ignored because they are optional) 
  258         for maybe_member_def 
in maybe_member_defs:
 
  261             args_string_elem = maybe_member_def.find(
'argsstring')
 
  262             args_string_text = args_string_elem.text 
if args_string_elem 
is not None else "[no argsstring]" 
  264                 f
"  Investigating potential match for {cpp_class}.{cpp_method}: " 
  265                 f
"argstring '{args_string_text}' (loc: {maybe_member_def.find('location').attrib.get('file', '?')}:{maybe_member_def.find('location').attrib.get('line', '?')})" 
  269             params = maybe_member_def.findall(
"param")
 
  270             num_tot_params = 
len(params)
 
  273             num_req_params = num_tot_params - sum([
 
  274                 1 
if param.find(
"defval") 
is not None else 0
 
  279                 f
"    XML Params: Total={num_tot_params}, Required={num_req_params}. Provided args count: {len(method_args_names)}" 
  286             if len(method_args_names) != num_req_params 
and len(
 
  287                     method_args_names) != num_tot_params:
 
  289                     f
"    Parameter count mismatch. Skipping.")
 
  296             if len(method_args_names) > num_tot_params:
 
  299                     f
"    Provided args count ({len(method_args_names)}) > XML total params ({num_tot_params}). Skipping." 
  302                 for i, arg_name 
in enumerate(method_args_names):
 
  304                     param_elem = params[i]
 
  306                     param_name_elem = param_elem.find(
"declname")
 
  308                     if param_name_elem 
is None:
 
  309                         param_name_elem = param_elem.find(
"defname")
 
  311                     if param_name_elem 
is None or param_name_elem.text 
is None:
 
  315                             f
"    Could not find XML name for parameter index {i}. Skipping." 
  320                     xml_param_name = param_name_elem.text
 
  322                     if arg_name != xml_param_name:
 
  324                             f
"    Parameter name mismatch at index {i}: Provided='{arg_name}', XML='{xml_param_name}'. Skipping." 
  334             member_defs.append(maybe_member_def)
 
  336                 "    Confirmed as candidate function by arg names and count.")
 
  341             if len(method_args_names
 
  342                    ) == num_req_params 
and num_req_params != num_tot_params:
 
  344                     f
"    Matched on required params ({num_req_params}). Identifying ignored optional params..." 
  346                 for i 
in range(num_req_params, num_tot_params):
 
  347                     ignored_name_elem = params[i].find(
"declname")
 
  348                     if ignored_name_elem 
is None:
 
  349                         ignored_name_elem = params[i].find(
"defname")
 
  350                     if ignored_name_elem 
is not None and ignored_name_elem.text:
 
  351                         current_ignored.append(ignored_name_elem.text)
 
  353                             f
"      Ignoring optional param: {ignored_name_elem.text}" 
  358                 ignored_params = current_ignored  
 
  361                     f
"    Matched on total params ({num_tot_params}) or req==tot. No ignored optional params for this match." 
  366         return member_defs, ignored_params  
 
  369                                     method_args_names: list,
 
  372         Determine which member definition to retrieve documentation from, if there are multiple. 
  375             cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted. 
  376             cpp_method (str): The name of the C++ method whose docstring is to be extracted. 
  377             method_args_names (list): A list of the names of the cpp_method's parameters. 
  378             member_defs (list): All of the member definitions of cpp_class which match cpp_method in name 
  379                 and whose arguments have the same names as method_args_names. 
  382             int: The index indicating which member definition to document. 
  384         documenting_index = 0
 
  385         num_matches = 
len(member_defs)
 
  393         function_key = f
"{cpp_class}.{cpp_method}({','.join(method_args_names) if method_args_names else ''})" 
  397         times_documented = self.
_memory.
get(function_key, -1)
 
  400         documenting_index = times_documented + 1
 
  405         if documenting_index >= num_matches:
 
  407                 f
"    Memory index {documenting_index} >= num_matches {num_matches} for {function_key}. Wrapping to 0." 
  409             documenting_index = 0
 
  413             self.
_memory[function_key] = documenting_index
 
  416             f
"  Multiple matches ({num_matches}) found for {function_key}. Using index {documenting_index} based on memory." 
  418         return documenting_index
 
  421                                 ignored_params: list):
 
  422         """Gets the formatted docstring for the supplied XML element representing a member definition. 
  425             member_def (xml.etree.ElementTree.Element): The member definition to document. 
  426             ignored_params (list): The optional parameters which should be ignored *for this specific overload match*, if any. 
  429             str: The formatted docstring. Returns empty string if member_def lacks documentation tags. 
  435         location_elem = member_def.find(
'location')
 
  436         loc_info = f
"(from {location_elem.attrib.get('file', '?')}:{location_elem.attrib.get('line', '?')})" if location_elem 
is not None else "" 
  439         brief_description = member_def.find(
"./briefdescription")
 
  440         detailed_description = member_def.find(
"./detaileddescription")
 
  443         if brief_description 
is not None:
 
  444             brief_text = 
"".join(t.strip()
 
  445                                  for para 
in brief_description.findall(
"para")
 
  446                                  for t 
in para.itertext()
 
  447                                  if t.strip()).strip()
 
  449                 docstring += brief_text
 
  454         if detailed_description 
is not None:
 
  455             detailed_content = 
"" 
  457             has_detailed_para = 
False 
  458             for element 
in list(detailed_description):
 
  460                 if element.tag == 
"para" and not element.findall(
 
  461                         ".//parameterlist") 
and not element.findall(
 
  462                             ".//simplesect[@kind='return']" 
  464                     para_text = 
"".join(t 
for t 
in element.itertext()
 
  465                                         if t.strip()).strip()
 
  467                         detailed_content += para_text + 
" " 
  468                         has_detailed_para = 
True 
  470             if has_detailed_para:
 
  473                 detailed_content = detailed_content.strip()
 
  474                 docstring += detailed_content
 
  477                     f
"    Detailed Paras: {detailed_content}")
 
  480             parameter_list = detailed_description.find(
 
  481                 ".//parameterlist[@kind='param']")  
 
  483             if parameter_list 
is not None:
 
  485                     f
"    Processing parameters (ignoring: {ignored_params})..." 
  487                 for i, parameter_item 
in enumerate(
 
  488                         parameter_list.findall(
"./parameteritem")):
 
  489                     name_elem = parameter_item.find(
 
  490                         "./parameternamelist/parametername")
 
  491                     desc_elem = parameter_item.find(  
 
  492                         "./parameterdescription/para")
 
  493                     name = name_elem.text.strip(
 
  494                     ) 
if name_elem 
is not None and name_elem.text 
else f
'[Param {i+1}]' 
  496                     desc = 
"".join(t 
for t 
in desc_elem.itertext() 
if t.strip(
 
  498                     ) 
if desc_elem 
is not None else 'No description provided' 
  500                     if name 
not in ignored_params:
 
  501                         param_docs += f
"{name}: {desc}\n" 
  505                             f
"      Ignoring documented param: {name}")
 
  509                     docstring += 
"\n\nArgs:\n"   
  511                     docstring += 
"Args:\n" 
  513                 docstring += param_docs.strip()
 
  518             return_sect = detailed_description.find(
 
  519                 ".//simplesect[@kind='return']")
 
  521             if return_sect 
is not None:
 
  523                 return_para = return_sect.find(
 
  525                 if return_para 
is not None:
 
  526                     return_text = 
"".join(t 
for t 
in return_para.itertext()
 
  527                                           if t.strip()).strip()
 
  529                         return_doc = f
"Returns: {return_text}" 
  535                     docstring += 
"\n\n" + return_doc
 
  538                     docstring += 
"\n\n" + return_doc
 
  540                     docstring += return_doc
 
  543         final_docstring = docstring.strip()
 
  544         if not final_docstring 
and not has_content:
 
  546                 "    No documentation content found in brief/detailed descriptions." 
  550         return final_docstring
 
  554         Print text if the parser is in verbose mode. 
  560 if __name__ == 
"__main__":
 
  561     if len(sys.argv) < 4 
or len(sys.argv) > 5:
 
  563             "Usage: python xml_parser.py <doxygen_xml_folder> <cpp_class> <cpp_method> <method_args_names (comma-separated)>" 
  568     parser._verbose = 
True 
  569     xml_folder = sys.argv[1]
 
  570     cpp_class = sys.argv[2]
 
  571     cpp_method = sys.argv[3]
 
  573     if len(sys.argv) == 5 
and sys.argv[4]:
 
  574         method_args = sys.argv[4].
split(
",")
 
  576     extracted_doc = parser.extract_docstring(xml_folder, cpp_class, cpp_method,
 
  579     print(
"\n--- Extracted Docstring ---")
 
  580     print(extracted_doc 
if extracted_doc 
else "[No documentation found]")
 
  581     print(
"---------------------------")