$search
00001 /* 00002 * Copyright (C) 2010 by Moritz Tenorth 00003 * 00004 * This program is free software; you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation; either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 package edu.tum.cs.ias.knowrob.comp_germandeli; 00019 00020 import java.io.File; 00021 import java.util.HashMap; 00022 import javax.xml.parsers.*; 00023 00024 import org.semanticweb.owlapi.apibinding.OWLManager; 00025 import org.semanticweb.owlapi.io.RDFXMLOntologyFormat; 00026 import org.semanticweb.owlapi.model.*; 00027 import org.semanticweb.owlapi.util.DefaultPrefixManager; 00028 import org.semanticweb.owlapi.vocab.OWL2Datatype; 00029 import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; 00030 00031 import org.w3c.dom.*; 00032 00033 00034 00035 00036 public class GermanDeliOWLImport { 00037 00038 00040 // Set IRIs for the ontologies used here 00041 // 00042 00043 // Base IRI for KnowRob ontology 00044 public final static String KNOWROB = "http://ias.cs.tum.edu/kb/knowrob.owl#"; 00045 00046 // Base IRI for OWL ontology 00047 public final static String OWL = "http://www.w3.org/2002/07/owl#"; 00048 00049 // Base IRI for RDFS 00050 public final static String RDFS = "http://www.w3.org/2000/01/rdf-schema#"; 00051 00052 // Base IRI for semantic map ontology 00053 public final static String GERMANDELI = "http://ias.cs.tum.edu/kb/germandeli.owl#"; 00054 00055 00057 // Define names of XML tags and attributes in the input file 00058 // 00059 public final static String XML_GROUPNAME = "group"; 00060 public final static String XML_PAGENAME = "page"; 00061 public final static String XML_PRODNAME = "product"; 00062 00063 public final static String XML_PROP_DESC = "description"; 00064 public final static String XML_PROP_ID = "id"; 00065 public final static String XML_PROP_LINK = "link"; 00066 public final static String XML_PROP_NAME = "name"; 00067 public final static String XML_PROP_LOC = "location"; 00068 public final static String XML_PROP_PERISH = "perishability"; 00069 public final static String XML_PROP_COUNTRY = "country_of_origin"; 00070 public final static String XML_PROP_CODE = "product_code"; 00071 public final static String XML_PROP_PRICE = "sale_price"; 00072 public final static String XML_PROP_WEIGHT = "weight"; 00073 public final static String XML_PROP_BRAND = "brand"; 00074 00075 00076 // Prefix manager 00077 public final static DefaultPrefixManager PREFIX_MANAGER = new DefaultPrefixManager(GERMANDELI); 00078 static { 00079 PREFIX_MANAGER.setPrefix("knowrob:", KNOWROB); 00080 PREFIX_MANAGER.setPrefix("germandeli:", GERMANDELI); 00081 PREFIX_MANAGER.setPrefix("owl:", OWL); 00082 PREFIX_MANAGER.setPrefix("rdfs:", RDFS); 00083 } 00084 00085 00086 HashMap<String, OWLDataProperty> data_properties; 00087 00088 OWLDataFactory factory; 00089 OWLOntologyManager manager; 00090 DefaultPrefixManager pm; 00091 00092 int inst_counter=0; // counter to create unique instance identifiers 00093 00094 00095 00096 00100 public GermanDeliOWLImport() { 00101 00102 data_properties = new HashMap<String, OWLDataProperty>(); 00103 } 00104 00105 00106 00112 public OWLOntology import_xml(String xmlfile) { 00113 00114 00115 OWLOntology ontology = null; 00116 00117 try{ 00118 00119 // read XML file 00120 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 00121 DocumentBuilder db = dbf.newDocumentBuilder(); 00122 00123 System.out.println("Input: " + xmlfile); 00124 00125 String[] path= xmlfile.split("/"); 00126 String filename = path[path.length-1]; 00127 String mapname = filename.split("\\.")[0]; 00128 00129 Document doc = db.parse(new File(xmlfile)); 00130 00131 00132 00133 00134 // Create ontology manager and data factory 00135 manager = OWLManager.createOWLOntologyManager(); 00136 factory = manager.getOWLDataFactory(); 00137 00138 // Get prefix manager using the base IRI of the JoystickDrive ontology as default namespace 00139 pm = PREFIX_MANAGER; 00140 00141 // Create empty OWL ontology 00142 ontology = manager.createOntology(IRI.create(GERMANDELI)); 00143 manager.setOntologyFormat(ontology, new RDFXMLOntologyFormat()); 00144 00145 // Import KnowRob ontology 00146 OWLImportsDeclaration oid = factory.getOWLImportsDeclaration(IRI.create(KNOWROB)); 00147 AddImport addImp = new AddImport(ontology,oid); 00148 manager.applyChange(addImp); 00149 00150 // Get classes from the KnowRob ontology 00151 OWLClass clsVelocityCommand = factory.getOWLClass("knowrob:Action", pm); 00152 00153 00154 // fill the internal list of object properties to be used 00155 create_data_properties(ontology); 00156 00157 00158 // Iterate over all classes in the XML file and create the respective OWL representation 00159 NodeList nl = doc.getElementsByTagName(XML_GROUPNAME); 00160 00161 // create parent class 00162 OWLClass germandeli_prod = create_owl_class("GermanDeliObject", factory.getOWLClass("knowrob:HumanScaleObject", pm), ontology); 00163 00164 for(int i = 0; i < nl.getLength(); i++) { 00165 Node cn = nl.item(i); 00166 00167 if(cn.getNodeName().equals(XML_GROUPNAME)) 00168 parse_group(cn, germandeli_prod, ontology); 00169 00170 else if (cn.getNodeName().equals(XML_PAGENAME)) 00171 parse_page(cn, germandeli_prod, ontology); 00172 00173 else if (cn.getNodeName().equals(XML_PRODNAME)) 00174 parse_product(cn, germandeli_prod, ontology); 00175 } 00176 00177 00178 00179 // save the ontology to a file 00180 //SimpleDateFormat sdf = new SimpleDateFormat("yy-MM-dd_HH-mm-ss-SSS"); 00181 //String outfile = "ias_semantic_map_"+sdf.format(new Date())+".owl"; 00182 //saveOntologyToFile(ontology, outfile); 00183 File file = new File("/work/germandeli/germandeli.owl"); 00184 manager.saveOntology(ontology, IRI.create(file.toURI())); 00185 00186 } catch (Exception e) { 00187 ontology = null; 00188 e.printStackTrace(); 00189 } 00190 00191 return ontology; 00192 } 00193 00194 00195 00196 00203 protected void parse_group(Node group, OWLClass parent, OWLOntology ontology) { 00204 00205 OWLClass current_group = parent; 00206 NamedNodeMap nnm = group.getAttributes(); 00207 00208 00209 // only create a separate group if it has a name and link set 00210 // otherwise just re-use the parent 00211 00212 if((nnm.getNamedItem(XML_PROP_LINK)!=null) && (nnm.getNamedItem(XML_PROP_NAME) != null)) { 00213 00214 HashMap<String, String> attr = new HashMap<String, String>(); 00215 attr.put(XML_PROP_LINK, "http://www.germandeli.com/"+nnm.getNamedItem(XML_PROP_LINK).getTextContent()); 00216 00217 current_group = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_NAME).getTextContent()), parent, ontology); 00218 add_property_restrictions(current_group, attr, ontology); 00219 } 00220 00221 // recursively parse child nodes 00222 for(int i=0;i<group.getChildNodes().getLength();i++) { 00223 Node cn = group.getChildNodes().item(i); 00224 00225 if(cn.getNodeName().equals(XML_GROUPNAME)) 00226 parse_group(cn, current_group, ontology); 00227 00228 else if (cn.getNodeName().equals(XML_PAGENAME)) 00229 parse_page(cn, current_group, ontology); 00230 } 00231 } 00232 00233 00240 protected void parse_page(Node page, OWLClass parent, OWLOntology ontology) { 00241 00242 NamedNodeMap nnm = page.getAttributes(); 00243 HashMap<String, String> attr = new HashMap<String, String>(); 00244 00245 OWLClass current_page = parent; 00246 if( (nnm.getNamedItem(XML_PROP_LINK)!=null) && (nnm.getNamedItem(XML_PROP_NAME)!=null)) { 00247 attr.put(XML_PROP_LINK, "http://www.germandeli.com/"+nnm.getNamedItem(XML_PROP_LINK).getTextContent()); 00248 00249 // create ID from the link without '.html' 00250 current_page = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_NAME).getTextContent()), parent, ontology); 00251 add_property_restrictions(current_page, attr, ontology); 00252 } 00253 00254 // recursively parse child nodes 00255 for(int i=0;i<page.getChildNodes().getLength();i++) { 00256 Node cn = page.getChildNodes().item(i); 00257 00258 if(cn.getNodeName().equals(XML_GROUPNAME)) 00259 parse_group(cn, current_page, ontology); 00260 00261 else if (cn.getNodeName().equals(XML_PAGENAME)) 00262 parse_page(cn, current_page, ontology); 00263 00264 else if (cn.getNodeName().equals(XML_PRODNAME)) 00265 parse_product(cn, current_page, ontology); 00266 } 00267 } 00268 00269 00276 protected void parse_product(Node product, OWLClass parent, OWLOntology ontology) { 00277 00278 NamedNodeMap nnm = product.getAttributes(); 00279 HashMap<String, String> attr = new HashMap<String, String>(); 00280 00281 00282 attr.put(XML_PROP_ID, nnm.getNamedItem(XML_PROP_ID).getTextContent()); 00283 attr.put(XML_PROP_CODE, nnm.getNamedItem(XML_PROP_CODE).getTextContent()); 00284 attr.put(XML_PROP_DESC, nnm.getNamedItem(XML_PROP_DESC).getTextContent()); 00285 attr.put(XML_PROP_LINK, nnm.getNamedItem(XML_PROP_LINK).getTextContent()); 00286 attr.put(XML_PROP_LOC, nnm.getNamedItem(XML_PROP_LOC).getTextContent()); 00287 attr.put(XML_PROP_PERISH, nnm.getNamedItem(XML_PROP_PERISH).getTextContent()); 00288 attr.put(XML_PROP_COUNTRY,nnm.getNamedItem(XML_PROP_COUNTRY).getTextContent()); 00289 attr.put(XML_PROP_PRICE, nnm.getNamedItem(XML_PROP_PRICE).getTextContent()); 00290 attr.put(XML_PROP_WEIGHT, nnm.getNamedItem(XML_PROP_WEIGHT).getTextContent()); 00291 attr.put(XML_PROP_BRAND, nnm.getNamedItem(XML_PROP_BRAND).getTextContent()); 00292 00293 OWLClass current_product = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_DESC).getTextContent()), parent, ontology); 00294 add_property_restrictions(current_product, attr, ontology); 00295 } 00296 00297 00298 00306 protected OWLClass create_owl_class(String name, OWLClass parent, OWLOntology ontology) { 00307 00308 OWLClass res = factory.getOWLClass(IRI.create(GERMANDELI + name)); 00309 OWLAxiom subClassAxiom = factory.getOWLSubClassOfAxiom(res, parent); 00310 manager.addAxiom(ontology, subClassAxiom); 00311 00312 return res; 00313 } 00314 00315 00316 00323 protected void add_property_restrictions(OWLClass cl, HashMap<String, String> attr, OWLOntology ontology) { 00324 00325 00326 // handle perishability separately: subClassOf Perishable 00327 if(attr.containsKey(XML_PROP_PERISH) && !attr.get(XML_PROP_PERISH).equals("")) { 00328 00329 if(attr.get(XML_PROP_PERISH).equals("Refrigerated")) { 00330 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "Perishable")); 00331 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish)); 00332 attr.remove(XML_PROP_PERISH); 00333 } else if(attr.get(XML_PROP_PERISH).equals("Frozen")) { 00334 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "Frozen")); 00335 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish)); 00336 attr.remove(XML_PROP_PERISH); 00337 } else if(attr.get(XML_PROP_PERISH).equals("Heat-Sensitive")) { 00338 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "HeatSensitive")); 00339 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish)); 00340 attr.remove(XML_PROP_PERISH); 00341 } else { 00342 attr.remove(XML_PROP_PERISH); 00343 } 00344 } 00345 00346 // set description as RDFS:LABEL 00347 if(attr.containsKey(XML_PROP_DESC)) { 00348 00349 OWLAnnotation commentAnno = factory.getOWLAnnotation( 00350 factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()), 00351 factory.getOWLStringLiteral(prologify(attr.get(XML_PROP_DESC)), "en")); 00352 00353 manager.addAxiom(ontology, factory.getOWLAnnotationAssertionAxiom(cl.getIRI(), commentAnno)); 00354 attr.remove(XML_PROP_DESC); 00355 } 00356 00357 00358 // set link as RDFS:seeAlso 00359 if(attr.containsKey(XML_PROP_LINK)) { 00360 00361 OWLAnnotation commentAnno = factory.getOWLAnnotation( 00362 factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_SEE_ALSO.getIRI()), 00363 factory.getOWLStringLiteral(attr.get(XML_PROP_LINK))); 00364 00365 manager.addAxiom(ontology, factory.getOWLAnnotationAssertionAxiom(cl.getIRI(), commentAnno)); 00366 attr.remove(XML_PROP_LINK); 00367 } 00368 00369 00370 00371 00372 // currently not using the loc key 00373 if(attr.containsKey(XML_PROP_LOC)) { 00374 attr.remove(XML_PROP_LOC); 00375 } 00376 00377 00378 // for the other attributes: add as hasValue restriction 00379 for(String key : attr.keySet()) { 00380 00381 // don't add properties for empty values 00382 if(attr.get(key).equals("")) 00383 continue; 00384 00385 OWLLiteral val = factory.getOWLTypedLiteral(attr.get(key)); 00386 OWLDataHasValue restr = factory.getOWLDataHasValue(this.data_properties.get(key), val); 00387 00388 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, restr)); 00389 00390 } 00391 00392 } 00393 00394 00395 00402 protected void create_data_properties(OWLOntology ontology) { 00403 00404 00405 OWLDataProperty data_prop; 00406 OWLDataPropertyRangeAxiom rangeAxiom; 00407 OWLDatatype integerDatatype = factory.getIntegerOWLDatatype(); 00408 OWLDatatype stringDatatype = factory.getOWLDatatype(OWL2Datatype.XSD_STRING.getIRI()); 00409 00410 data_properties.put(XML_PROP_DESC, factory.getOWLDataProperty("rdfs:label", pm)); 00411 data_properties.put(XML_PROP_PRICE, factory.getOWLDataProperty("knowrob:purchasePrice", pm)); 00412 data_properties.put(XML_PROP_WEIGHT, factory.getOWLDataProperty("knowrob:weight", pm)); 00413 00414 00415 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "productCode")); 00416 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, integerDatatype); 00417 manager.addAxiom(ontology, rangeAxiom); 00418 data_properties.put(XML_PROP_CODE, data_prop); 00419 00420 00421 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "productID")); 00422 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype); 00423 manager.addAxiom(ontology, rangeAxiom); 00424 data_properties.put(XML_PROP_ID, data_prop); 00425 00426 00427 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "linkToProductPage")); 00428 data_properties.put(XML_PROP_LINK, data_prop); 00429 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype); 00430 manager.addAxiom(ontology, rangeAxiom); 00431 00432 00433 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "brand")); 00434 data_properties.put(XML_PROP_BRAND, data_prop); 00435 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype); 00436 manager.addAxiom(ontology, rangeAxiom); 00437 00438 00439 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "countryOfOrigin")); 00440 data_properties.put(XML_PROP_COUNTRY, data_prop); 00441 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype); 00442 manager.addAxiom(ontology, rangeAxiom); 00443 00444 } 00445 00446 00447 00448 00449 protected String prologify(String str) { 00450 00451 // first replace well-known special characters 00452 str=str.replaceAll(""", ""); 00453 str=str.replaceAll("&", "_"); 00454 str=str.replaceAll(",", "_"); 00455 str=str.replaceAll(" ", "_"); 00456 str=str.replaceAll("-", "_"); 00457 str=str.replaceAll("/", "_"); 00458 00459 // replace common umlaut characters 00460 str=str.replaceAll("Ä", "Ae"); 00461 str=str.replaceAll("ä", "ae"); 00462 str=str.replaceAll("Ö", "Oe"); 00463 str=str.replaceAll("ö", "oe"); 00464 str=str.replaceAll("Ü", "Ue"); 00465 str=str.replaceAll("ü", "ue"); 00466 str=str.replaceAll("ß","ss"); 00467 00468 str=str.replaceAll("Ä", "Ae"); 00469 str=str.replaceAll("ä", "ae"); 00470 str=str.replaceAll("Ö", "Oe"); 00471 str=str.replaceAll("ö", "oe"); 00472 str=str.replaceAll("Ü", "Ue"); 00473 str=str.replaceAll("ü", "ue"); 00474 str=str.replaceAll("ß", "ss"); 00475 00476 // replace everything else that is neither char nor num nor '_' 00477 str=str.replaceAll("[^A-Za-z0-9_]",""); 00478 00479 // remove multiple underscores 00480 str=str.replaceAll("_+", "_"); 00481 str=str.replaceAll("^_", ""); 00482 00483 return str; 00484 } 00485 00486 00487 public static void main(String[] args) { 00488 00489 GermanDeliOWLImport gdi = new GermanDeliOWLImport(); 00490 System.out.println(gdi.import_xml("/work/germandeli/download/structure.xml").toString()); 00491 00492 00493 } 00494 00495 00496 } 00497