Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 import fileinput
00032 import re
00033 import json
00034
00035 lines = []
00036 at_tree = False
00037 line_skip = 0
00038
00039 for line_raw in fileinput.input():
00040 if line_skip > 0:
00041 line_skip -= 1
00042 else:
00043 line = line_raw.rstrip();
00044
00045 if not at_tree:
00046 if line == "J48 pruned tree":
00047 at_tree = True
00048 line_skip = 2
00049 else:
00050 if line == "":
00051 break;
00052
00053 lines.append(line)
00054
00055 data_lines = []
00056
00057 for line in lines:
00058 level = line.count("|")
00059 unlevelled_line = line[level * 4:]
00060
00061 m = re.match("(?P<variable>[\w\-]+) (?P<operator>[<\=>\!]+) (?P<value>[0-9a-zA-Z\.\-_]+): (?P<result>[\S]+) \((?P<occurrences>[\S]+)\)", unlevelled_line)
00062
00063 if not m:
00064 m = re.match("(?P<variable>[\w\-]+) (?P<operator>[<\=>\!]+) (?P<value>[0-9a-zA-Z\.\-_]+)", unlevelled_line)
00065
00066 data = dict(m.groupdict().items() + {"level": level}.items())
00067 data_lines.append(data)
00068
00069
00070 def recTB(data_lines, level = 0):
00071 children = []
00072 index = 0
00073
00074 for data_line in data_lines:
00075 if data_line["level"] == level:
00076
00077 children.append({"data": data_line, "children": []})
00078 index += 1
00079 elif data_line["level"] > level:
00080
00081 (intres, intindex) = recTB(data_lines[index:], level + 1)
00082 index += intindex
00083
00084 for intr in intres:
00085 children[len(children) - 1]["children"].append(intr)
00086 else:
00087
00088 break;
00089
00090 return (children, index)
00091
00092 def is_number(s):
00093 try:
00094 float(s)
00095 return True
00096 except ValueError:
00097 return False
00098
00099 def formatDTree(branch_children):
00100 formatted = []
00101
00102 for child in branch_children:
00103 append_data = {}
00104 append_data["relation"] = {}
00105
00106 val = child["data"]["value"]
00107
00108 if is_number(val):
00109 if "." in val:
00110 val = float(val)
00111 else:
00112 val = int(val)
00113
00114 append_data["relation"][child["data"]["operator"]] = {"value": val,
00115 "variable": child["data"]["variable"]}
00116
00117 if "result" in child["data"]:
00118 append_data["true"] = [{"result": child["data"]["result"]}]
00119 else:
00120 append_data["true"] = formatDTree(child["children"])
00121
00122 formatted.append(append_data)
00123
00124 return formatted
00125
00126 with open("o.json", "wb") as f:
00127 (items, index) = recTB(data_lines)
00128 dtree = formatDTree(items)
00129 json.dump(dtree, f)