wekaToDTree.py
Go to the documentation of this file.
00001 #!/usr/bin/python
00002 
00003 ## Copyright (c) 2014, Jan Winkler <winkler@cs.uni-bremen.de>
00004 ## All rights reserved.
00005 ##
00006 ## Redistribution and use in source and binary forms, with or without
00007 ## modification, are permitted provided that the following conditions are met:
00008 ##
00009 ## * Redistributions of source code must retain the above copyright
00010 ##   notice, this list of conditions and the following disclaimer.
00011 ## * Redistributions in binary form must reproduce the above copyright
00012 ##   notice, this list of conditions and the following disclaimer in the
00013 ##   documentation and/or other materials provided with the distribution.
00014 ## * Neither the name of the Institute for Artificial Intelligence/
00015 ##   Universitaet Bremen nor the names of its contributors may be used to 
00016 ##   endorse or promote products derived from this software without specific 
00017 ##   prior written permission.
00018 ##
00019 ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00020 ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00021 ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00022 ## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00023 ## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00024 ## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00025 ## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00026 ## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00027 ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00028 ## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00029 ## POSSIBILITY OF SUCH DAMAGE.
00030 
00031 import fileinput
00032 import re
00033 import json
00034 
00035 lines = []
00036 at_tree = False
00037 line_skip = 0
00038 
00039 for line_raw in fileinput.input():
00040     if line_skip > 0:
00041         line_skip -= 1
00042     else:
00043         line = line_raw.rstrip();
00044         
00045         if not at_tree:
00046             if line == "J48 pruned tree":
00047                 at_tree = True
00048                 line_skip = 2
00049         else:
00050             if line == "":
00051                 break;
00052             
00053             lines.append(line)
00054 
00055 data_lines = []
00056 
00057 for line in lines:
00058     level = line.count("|")
00059     unlevelled_line = line[level * 4:]
00060     
00061     m = re.match("(?P<variable>[\w\-]+) (?P<operator>[<\=>\!]+) (?P<value>[0-9a-zA-Z\.\-_]+): (?P<result>[\S]+) \((?P<occurrences>[\S]+)\)", unlevelled_line)
00062     
00063     if not m: # this is not a result line, re-evaluate as normal line
00064         m = re.match("(?P<variable>[\w\-]+) (?P<operator>[<\=>\!]+) (?P<value>[0-9a-zA-Z\.\-_]+)", unlevelled_line)
00065     
00066     data = dict(m.groupdict().items() + {"level": level}.items())
00067     data_lines.append(data)
00068 
00069 
00070 def recTB(data_lines, level = 0):
00071     children = []
00072     index = 0
00073     
00074     for data_line in data_lines:
00075         if data_line["level"] == level:
00076             # this one is on the current level - add it to the children
00077             children.append({"data": data_line, "children": []})
00078             index += 1
00079         elif data_line["level"] > level:
00080             # this is a child of the current level, recurse
00081             (intres, intindex) = recTB(data_lines[index:], level + 1)
00082             index += intindex
00083             
00084             for intr in intres:
00085                 children[len(children) - 1]["children"].append(intr)
00086         else:
00087             # this is the end of our level, return.
00088             break;
00089     
00090     return (children, index)
00091 
00092 def is_number(s):
00093     try:
00094         float(s)
00095         return True
00096     except ValueError:
00097         return False
00098 
00099 def formatDTree(branch_children):
00100     formatted = []
00101     
00102     for child in branch_children:
00103         append_data = {}
00104         append_data["relation"] = {}
00105         
00106         val = child["data"]["value"]
00107         
00108         if is_number(val):
00109             if "." in val:
00110                 val = float(val)
00111             else:
00112                 val = int(val)
00113         
00114         append_data["relation"][child["data"]["operator"]] = {"value": val,
00115                                                               "variable": child["data"]["variable"]}
00116         
00117         if "result" in child["data"]:
00118             append_data["true"] = [{"result": child["data"]["result"]}]
00119         else:
00120             append_data["true"] = formatDTree(child["children"])
00121         
00122         formatted.append(append_data)
00123     
00124     return formatted
00125 
00126 with open("o.json", "wb") as f:
00127     (items, index) = recTB(data_lines)
00128     dtree = formatDTree(items)
00129     json.dump(dtree, f)


beliefstate
Author(s): Jan Winkler
autogenerated on Sun Oct 5 2014 22:30:15