xml_parser.py
Go to the documentation of this file.
1 import os
2 import sys
3 from pathlib import Path
4 import xml.etree.ElementTree as ET
5 
6 
8  """
9  Parses and extracts docs from Doxygen-generated XML.
10  """
11 
12  def __init__(self):
13  # Memory for overloaded functions with identical parameter name sets
14  self._memory = {}
15  # This is useful for investigating functions that cause problems for extract_docstring.
16  # Set this to true to have useful information for debugging this class, as in the CLI
17  # function at the bottom of this class.
18  self._verbose = False
19 
20  def parse_xml(self, xml_file: str):
21  """
22  Get the ElementTree of an XML file given the file name.
23  If an error occurs, prints a warning and returns None.
24  """
25  try:
26  return ET.parse(xml_file)
27  except FileNotFoundError:
28  print(f"Warning: XML file '{xml_file}' not found.")
29  return None
30  except ET.ParseError:
31  print(f"Warning: Failed to parse XML file '{xml_file}'.")
32  return None
33 
34  def extract_docstring(self, xml_folder: str, cpp_class: str,
35  cpp_method: str, method_args_names: 'list[str]'):
36  """
37  Extract the docstrings for a C++ class's method from the Doxygen-generated XML.
38 
39  Args:
40  xml_folder (str): The path to the folder that contains all of the Doxygen-generated XML.
41  cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
42  cpp_method (str): The name of the C++ method whose docstring is to be extracted.
43  method_args_names (list): A list of the names of the cpp_method's parameters.
44  """
45  self.print_if_verbose(f"Extracting docs for {cpp_class}.{cpp_method}")
46 
47  # Get all of the member definitions in cpp_class with name cpp_method
48  maybe_member_defs = self.get_member_defs(xml_folder, cpp_class,
49  cpp_method)
50 
51  # Filter member definitions which don't match the given argument names
52  member_defs, ignored_params = self.filter_member_defs(
53  maybe_member_defs, method_args_names)
54 
55  # Find which member to get docs from, if there are multiple that match in name and args
56  documenting_index = self.determine_documenting_index(
57  cpp_class, cpp_method, method_args_names, member_defs)
58 
59  # Extract the docs for the function that matches cpp_class.cpp_method(*method_args_names).
60  return self.get_formatted_docstring(member_defs[documenting_index],
61  ignored_params)
62 
63  def get_member_defs(self, xml_folder: str, cpp_class: str,
64  cpp_method: str):
65  """Get all of the member definitions in cpp_class with name cpp_method.
66 
67  Args:
68  xml_folder (str): The folder containing the Doxygen XML documentation.
69  cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
70  cpp_method (str): The name of the C++ method whose docstring is to be extracted.
71 
72  Returns:
73  list: All of the member definitions in cpp_class with name cpp_method.
74  """
75  xml_folder_path = Path(xml_folder)
76 
77  # Create the path to the Doxygen XML index file.
78  xml_index_file = xml_folder_path / "index.xml"
79 
80  # Parse the index file
81  index_tree = self.parse_xml(xml_index_file)
82  if not index_tree:
83  self.print_if_verbose(f"Index file {xml_index_file} was empty.")
84  return ""
85 
86  index_root = index_tree.getroot()
87 
88  # Find the compound with name == cpp_class
89  class_index = index_root.find(f"./*[name='{cpp_class}']")
90 
91  if class_index is None:
92  self.print_if_verbose(
93  f"Could not extract docs for {cpp_class}.{cpp_method}; class not found in index file."
94  )
95  return ""
96 
97  # Create the path to the file with the documentation for cpp_class.
98  xml_class_file = xml_folder_path / class_index.attrib['refid'] + '.xml'
99 
100  # Parse the class file
101  class_tree = self.parse_xml(xml_class_file)
102  if not class_tree:
103  self.print_if_verbose(f"Class file {xml_class_file} was empty.")
104  return ""
105 
106  class_root = class_tree.getroot()
107 
108  # Find the member(s) in cpp_class with name == cpp_method
109  maybe_member_defs = class_root.findall(
110  f"compounddef/sectiondef//*[name='{cpp_method}']")
111 
112  return maybe_member_defs
113 
114  def filter_member_defs(self, maybe_member_defs: list,
115  method_args_names: list):
116  """
117  Remove member definitions which do not match the supplied argument names list.
118 
119  Args:
120  maybe_member_defs (list): The list of all member definitions in the class which share the same name.
121  method_args_names (list): The list of argument names in the definition of the function whose documentation is desired.
122  Supplying the argument names allows for the filtering of overloaded functions with the same name but different arguments.
123 
124  Returns:
125  tuple[list, list]: (the filtered member definitions, parameters which should be ignored because they are optional)
126  """
127  member_defs = []
128 
129  # Optional parameters we should ignore if we encounter them in the docstring
130  ignored_params = []
131 
132  # Filter out the members which don't match the method_args_names
133  for maybe_member_def in maybe_member_defs:
134  self.print_if_verbose(
135  f"Investigating member_def with argstring {maybe_member_def.find('argsstring').text}"
136  )
137  # Find the number of required parameters and the number of total parameters from the
138  # Doxygen XML for this member_def
139  params = maybe_member_def.findall("param")
140  num_tot_params = len(params)
141  # Calculate required params by subtracting the number of optional params (params where defval is
142  # set--defval means default value) from the number of total params
143  num_req_params = num_tot_params - sum([
144  1 if param.find("defval") is not None else 0
145  for param in params
146  ])
147 
148  # If the number of parameters in method_args_names matches neither number, eliminate this member_def
149  # This is done because wrap generates a python wrapper function twice for every function with
150  # optional parameters: one with none of the optional parameters, and one with all of the optional
151  # parameters, required.
152  if len(method_args_names) != num_req_params and len(
153  method_args_names) != num_tot_params:
154  self.print_if_verbose(
155  f"Wrong number of parameters: got {len(method_args_names)}, expected required {num_req_params} or total {num_tot_params}."
156  )
157  continue
158 
159  # If the parameter names don't match, eliminate this member_def
160  eliminate = False
161  for i, arg_name in enumerate(method_args_names):
162  # Try to find the name of the parameter in the XML
163  param_name = params[i].find(
164  "declname"
165  ) # declname is the tag that usually contains the param name
166  # If we couldn't find the declname, try the defname (used uncommonly)
167  if param_name is None:
168  param_name = params[i].find("defname")
169  if param_name is None:
170  # Can't find the name for this parameter. This may be an unreachable statement but Doxygen is
171  # not well-documented enough to rely on a <declname> or a <defname> always being defined inside a <param>.
172  eliminate = True
173  continue
174  # Eliminate if any param name doesn't match the expected name
175  if arg_name != param_name.text:
176  eliminate = True
177  if eliminate:
178  self.print_if_verbose("Names didn't match.")
179  continue
180 
181  # At this point, this member_def can be assumed to be the desired function (or is indistinguishable
182  # from it based on all of the reliable information we have--if this is the case, we need to rely on
183  # the _memory to give the correct docs for each.)
184  member_defs.append(maybe_member_def)
185  self.print_if_verbose("Confirmed as correct function.")
186 
187  # Remember which parameters to ignore, if any
188  for i in range(len(method_args_names), num_tot_params):
189  ignored_params.append(params[i].find("declname").text)
190 
191  return member_defs, ignored_params
192 
193  def determine_documenting_index(self, cpp_class: str, cpp_method: str,
194  method_args_names: list,
195  member_defs: list):
196  """
197  Determine which member definition to retrieve documentation from, if there are multiple.
198 
199  Args:
200  cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
201  cpp_method (str): The name of the C++ method whose docstring is to be extracted.
202  method_args_names (list): A list of the names of the cpp_method's parameters.
203  member_defs (list): All of the member definitions of cpp_class which match cpp_method in name
204  and whose arguments have the same names as method_args_names.
205 
206  Returns:
207  int: The index indicating which member definition to document.
208  """
209  # If there are multiple member defs that match the method args names,
210  # remember how many we've encountered already so that we can return
211  # the docs for the first one we haven't yet extracted.
212  # This is only relevant if there are overloaded functions where the
213  # parameter types are different but the parameter names are the same,
214  # e.g. foo(int bar) and foo(string bar). The parameter types cannot be
215  # relied on because they cannot be assumed to be the same between GTSAM
216  # implementation and pybind11 generated wrapper, e.g. OptionalJacobian
217  # in GTSAM becomes Eigen::Matrix in the pybind11 code.
218  documenting_index = 0
219  if len(member_defs) > 1:
220  function_key = f"{cpp_class}.{cpp_method}({','.join(method_args_names) if method_args_names else ''})"
221  if function_key in self._memory:
222  self._memory[function_key] += 1
223  documenting_index = self._memory[function_key]
224  else:
225  self._memory[function_key] = 0
226 
227  return documenting_index
228 
230  member_def: 'xml.etree.ElementTree.Element',
231  ignored_params: list):
232  """Gets the formatted docstring for the supplied XML element representing a member definition.
233 
234  Args:
235  member_def (xml.etree.ElementTree.Element): The member definition to document.
236  ignored_params (list): The optional parameters which should be ignored, if any.
237 
238  Returns:
239  str: The formatted docstring.
240  """
241  docstring = ""
242 
243  brief_description = member_def.find(".//briefdescription")
244  detailed_description = member_def.find(".//detaileddescription")
245 
246  # Add the brief description first, if it exists.
247  if brief_description is not None:
248  for para in brief_description.findall("para"):
249  docstring += "".join(t for t in para.itertext() if t.strip())
250 
251  # Add the detailed description. This includes the parameter list and the return value.
252  if detailed_description is not None:
253  docstring += "\n"
254  # Add non-parameter detailed description
255  for element in list(detailed_description):
256  if element.tag == "para" and "parameterlist" not in [
257  e.tag for e in element
258  ]:
259  docstring += "".join(
260  t for t in element.itertext() if t.strip()) + " "
261 
262  # Add parameter docs
263  parameter_list = detailed_description.find(".//parameterlist")
264  if parameter_list is not None:
265  for i, parameter_item in enumerate(
266  parameter_list.findall(".//parameteritem")):
267  name = parameter_item.find(".//parametername").text
268  desc = parameter_item.find(
269  ".//parameterdescription/para").text
270  if name not in ignored_params:
271  docstring += f"{name.strip() if name else f'[Parameter {i}]'}: {desc.strip() if desc else 'No description provided'}\n"
272 
273  # Add return value docs
274  return_sect = detailed_description.find(".//simplesect")
275  if return_sect is not None and return_sect.attrib[
276  "kind"] == "return" and return_sect.find(
277  "para").text is not None:
278  docstring += f"Returns: {return_sect.find('para').text.strip()}"
279 
280  return docstring.strip()
281 
282  def print_if_verbose(self, text: str):
283  """
284  Print text if the parser is in verbose mode.
285  """
286  if self._verbose:
287  print(text)
288 
289 
290 if __name__ == "__main__":
291  if len(sys.argv) != 5:
292  print(
293  "Usage: python xml_parser.py <doxygen_xml_folder> <cpp_class> <cpp_method> <method_args_names (comma-separated)>"
294  )
295  else:
296  parser = XMLDocParser()
297  parser._verbose = True
298  xml_file = sys.argv[1]
299  extracted_doc = parser.extract_docstring(xml_file, sys.argv[2],
300  sys.argv[3],
301  sys.argv[4].split(","))
302 
303  print()
304  print(extracted_doc.strip())
Eigen::internal::print
EIGEN_STRONG_INLINE Packet4f print(const Packet4f &a)
Definition: NEON/PacketMath.h:3115
gtwrap.xml_parser.xml_parser.XMLDocParser.__init__
def __init__(self)
Definition: xml_parser.py:12
gtwrap.xml_parser.xml_parser.XMLDocParser._memory
_memory
Definition: xml_parser.py:14
gtwrap.xml_parser.xml_parser.XMLDocParser.get_member_defs
def get_member_defs(self, str xml_folder, str cpp_class, str cpp_method)
Definition: xml_parser.py:63
gtwrap.xml_parser.xml_parser.XMLDocParser.filter_member_defs
def filter_member_defs(self, list maybe_member_defs, list method_args_names)
Definition: xml_parser.py:114
list
Definition: pytypes.h:2168
gtwrap.xml_parser.xml_parser.XMLDocParser.print_if_verbose
def print_if_verbose(self, str text)
Definition: xml_parser.py:282
gtwrap.xml_parser.xml_parser.XMLDocParser.determine_documenting_index
def determine_documenting_index(self, str cpp_class, str cpp_method, list method_args_names, list member_defs)
Definition: xml_parser.py:193
gtsam::range
Double_ range(const Point2_ &p, const Point2_ &q)
Definition: slam/expressions.h:30
gtwrap.xml_parser.xml_parser.XMLDocParser
Definition: xml_parser.py:7
gtwrap.xml_parser.xml_parser.XMLDocParser._verbose
_verbose
Definition: xml_parser.py:18
gtwrap.xml_parser.xml_parser.XMLDocParser.parse_xml
def parse_xml(self, str xml_file)
Definition: xml_parser.py:20
gtwrap.xml_parser.xml_parser.XMLDocParser.extract_docstring
def extract_docstring(self, str xml_folder, str cpp_class, str cpp_method, 'list[str]' method_args_names)
Definition: xml_parser.py:34
gtwrap.xml_parser.xml_parser.XMLDocParser.get_formatted_docstring
def get_formatted_docstring(self, 'xml.etree.ElementTree.Element' member_def, list ignored_params)
Definition: xml_parser.py:229
gtsam::split
void split(const G &g, const PredecessorMap< KEY > &tree, G &Ab1, G &Ab2)
Definition: graph-inl.h:245
len
size_t len(handle h)
Get the length of a Python object.
Definition: pytypes.h:2448


gtsam
Author(s):
autogenerated on Fri Mar 28 2025 03:09:16