00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 package edu.tum.cs.ias.knowrob.comp_germandeli;
00019
00020 import java.io.File;
00021 import java.util.HashMap;
00022 import javax.xml.parsers.*;
00023
00024 import org.semanticweb.owlapi.apibinding.OWLManager;
00025 import org.semanticweb.owlapi.io.RDFXMLOntologyFormat;
00026 import org.semanticweb.owlapi.model.*;
00027 import org.semanticweb.owlapi.util.DefaultPrefixManager;
00028 import org.semanticweb.owlapi.vocab.OWL2Datatype;
00029 import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
00030
00031 import org.w3c.dom.*;
00032
00033
00034
00035
00036 public class GermanDeliOWLImport {
00037
00038
00040
00041
00042
00043
00044 public final static String KNOWROB = "http://ias.cs.tum.edu/kb/knowrob.owl#";
00045
00046
00047 public final static String OWL = "http://www.w3.org/2002/07/owl#";
00048
00049
00050 public final static String RDFS = "http://www.w3.org/2000/01/rdf-schema#";
00051
00052
00053 public final static String GERMANDELI = "http://ias.cs.tum.edu/kb/germandeli.owl#";
00054
00055
00057
00058
00059 public final static String XML_GROUPNAME = "group";
00060 public final static String XML_PAGENAME = "page";
00061 public final static String XML_PRODNAME = "product";
00062
00063 public final static String XML_PROP_DESC = "description";
00064 public final static String XML_PROP_ID = "id";
00065 public final static String XML_PROP_LINK = "link";
00066 public final static String XML_PROP_NAME = "name";
00067 public final static String XML_PROP_LOC = "location";
00068 public final static String XML_PROP_PERISH = "perishability";
00069 public final static String XML_PROP_COUNTRY = "country_of_origin";
00070 public final static String XML_PROP_CODE = "product_code";
00071 public final static String XML_PROP_PRICE = "sale_price";
00072 public final static String XML_PROP_WEIGHT = "weight";
00073 public final static String XML_PROP_BRAND = "brand";
00074
00075
00076
00077 public final static DefaultPrefixManager PREFIX_MANAGER = new DefaultPrefixManager(GERMANDELI);
00078 static {
00079 PREFIX_MANAGER.setPrefix("knowrob:", KNOWROB);
00080 PREFIX_MANAGER.setPrefix("germandeli:", GERMANDELI);
00081 PREFIX_MANAGER.setPrefix("owl:", OWL);
00082 PREFIX_MANAGER.setPrefix("rdfs:", RDFS);
00083 }
00084
00085
00086 HashMap<String, OWLDataProperty> data_properties;
00087
00088 OWLDataFactory factory;
00089 OWLOntologyManager manager;
00090 DefaultPrefixManager pm;
00091
00092 int inst_counter=0;
00093
00094
00095
00096
00100 public GermanDeliOWLImport() {
00101
00102 data_properties = new HashMap<String, OWLDataProperty>();
00103 }
00104
00105
00106
00112 public OWLOntology import_xml(String xmlfile) {
00113
00114
00115 OWLOntology ontology = null;
00116
00117 try{
00118
00119
00120 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
00121 DocumentBuilder db = dbf.newDocumentBuilder();
00122
00123 System.out.println("Input: " + xmlfile);
00124
00125 String[] path= xmlfile.split("/");
00126 String filename = path[path.length-1];
00127 String mapname = filename.split("\\.")[0];
00128
00129 Document doc = db.parse(new File(xmlfile));
00130
00131
00132
00133
00134
00135 manager = OWLManager.createOWLOntologyManager();
00136 factory = manager.getOWLDataFactory();
00137
00138
00139 pm = PREFIX_MANAGER;
00140
00141
00142 ontology = manager.createOntology(IRI.create(GERMANDELI));
00143 manager.setOntologyFormat(ontology, new RDFXMLOntologyFormat());
00144
00145
00146 OWLImportsDeclaration oid = factory.getOWLImportsDeclaration(IRI.create(KNOWROB));
00147 AddImport addImp = new AddImport(ontology,oid);
00148 manager.applyChange(addImp);
00149
00150
00151 OWLClass clsVelocityCommand = factory.getOWLClass("knowrob:Action", pm);
00152
00153
00154
00155 create_data_properties(ontology);
00156
00157
00158
00159 NodeList nl = doc.getElementsByTagName(XML_GROUPNAME);
00160
00161
00162 OWLClass germandeli_prod = create_owl_class("GermanDeliObject", factory.getOWLClass("knowrob:HumanScaleObject", pm), ontology);
00163
00164 for(int i = 0; i < nl.getLength(); i++) {
00165 Node cn = nl.item(i);
00166
00167 if(cn.getNodeName().equals(XML_GROUPNAME))
00168 parse_group(cn, germandeli_prod, ontology);
00169
00170 else if (cn.getNodeName().equals(XML_PAGENAME))
00171 parse_page(cn, germandeli_prod, ontology);
00172
00173 else if (cn.getNodeName().equals(XML_PRODNAME))
00174 parse_product(cn, germandeli_prod, ontology);
00175 }
00176
00177
00178
00179
00180
00181
00182
00183 File file = new File("/work/germandeli/germandeli.owl");
00184 manager.saveOntology(ontology, IRI.create(file.toURI()));
00185
00186 } catch (Exception e) {
00187 ontology = null;
00188 e.printStackTrace();
00189 }
00190
00191 return ontology;
00192 }
00193
00194
00195
00196
00203 protected void parse_group(Node group, OWLClass parent, OWLOntology ontology) {
00204
00205 OWLClass current_group = parent;
00206 NamedNodeMap nnm = group.getAttributes();
00207
00208
00209
00210
00211
00212 if((nnm.getNamedItem(XML_PROP_LINK)!=null) && (nnm.getNamedItem(XML_PROP_NAME) != null)) {
00213
00214 HashMap<String, String> attr = new HashMap<String, String>();
00215 attr.put(XML_PROP_LINK, "http://www.germandeli.com/"+nnm.getNamedItem(XML_PROP_LINK).getTextContent());
00216
00217 current_group = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_NAME).getTextContent()), parent, ontology);
00218 add_property_restrictions(current_group, attr, ontology);
00219 }
00220
00221
00222 for(int i=0;i<group.getChildNodes().getLength();i++) {
00223 Node cn = group.getChildNodes().item(i);
00224
00225 if(cn.getNodeName().equals(XML_GROUPNAME))
00226 parse_group(cn, current_group, ontology);
00227
00228 else if (cn.getNodeName().equals(XML_PAGENAME))
00229 parse_page(cn, current_group, ontology);
00230 }
00231 }
00232
00233
00240 protected void parse_page(Node page, OWLClass parent, OWLOntology ontology) {
00241
00242 NamedNodeMap nnm = page.getAttributes();
00243 HashMap<String, String> attr = new HashMap<String, String>();
00244
00245 OWLClass current_page = parent;
00246 if( (nnm.getNamedItem(XML_PROP_LINK)!=null) && (nnm.getNamedItem(XML_PROP_NAME)!=null)) {
00247 attr.put(XML_PROP_LINK, "http://www.germandeli.com/"+nnm.getNamedItem(XML_PROP_LINK).getTextContent());
00248
00249
00250 current_page = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_NAME).getTextContent()), parent, ontology);
00251 add_property_restrictions(current_page, attr, ontology);
00252 }
00253
00254
00255 for(int i=0;i<page.getChildNodes().getLength();i++) {
00256 Node cn = page.getChildNodes().item(i);
00257
00258 if(cn.getNodeName().equals(XML_GROUPNAME))
00259 parse_group(cn, current_page, ontology);
00260
00261 else if (cn.getNodeName().equals(XML_PAGENAME))
00262 parse_page(cn, current_page, ontology);
00263
00264 else if (cn.getNodeName().equals(XML_PRODNAME))
00265 parse_product(cn, current_page, ontology);
00266 }
00267 }
00268
00269
00276 protected void parse_product(Node product, OWLClass parent, OWLOntology ontology) {
00277
00278 NamedNodeMap nnm = product.getAttributes();
00279 HashMap<String, String> attr = new HashMap<String, String>();
00280
00281
00282 attr.put(XML_PROP_ID, nnm.getNamedItem(XML_PROP_ID).getTextContent());
00283 attr.put(XML_PROP_CODE, nnm.getNamedItem(XML_PROP_CODE).getTextContent());
00284 attr.put(XML_PROP_DESC, nnm.getNamedItem(XML_PROP_DESC).getTextContent());
00285 attr.put(XML_PROP_LINK, nnm.getNamedItem(XML_PROP_LINK).getTextContent());
00286 attr.put(XML_PROP_LOC, nnm.getNamedItem(XML_PROP_LOC).getTextContent());
00287 attr.put(XML_PROP_PERISH, nnm.getNamedItem(XML_PROP_PERISH).getTextContent());
00288 attr.put(XML_PROP_COUNTRY,nnm.getNamedItem(XML_PROP_COUNTRY).getTextContent());
00289 attr.put(XML_PROP_PRICE, nnm.getNamedItem(XML_PROP_PRICE).getTextContent());
00290 attr.put(XML_PROP_WEIGHT, nnm.getNamedItem(XML_PROP_WEIGHT).getTextContent());
00291 attr.put(XML_PROP_BRAND, nnm.getNamedItem(XML_PROP_BRAND).getTextContent());
00292
00293 OWLClass current_product = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_DESC).getTextContent()), parent, ontology);
00294 add_property_restrictions(current_product, attr, ontology);
00295 }
00296
00297
00298
00306 protected OWLClass create_owl_class(String name, OWLClass parent, OWLOntology ontology) {
00307
00308 OWLClass res = factory.getOWLClass(IRI.create(GERMANDELI + name));
00309 OWLAxiom subClassAxiom = factory.getOWLSubClassOfAxiom(res, parent);
00310 manager.addAxiom(ontology, subClassAxiom);
00311
00312 return res;
00313 }
00314
00315
00316
00323 protected void add_property_restrictions(OWLClass cl, HashMap<String, String> attr, OWLOntology ontology) {
00324
00325
00326
00327 if(attr.containsKey(XML_PROP_PERISH) && !attr.get(XML_PROP_PERISH).equals("")) {
00328
00329 if(attr.get(XML_PROP_PERISH).equals("Refrigerated")) {
00330 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "Perishable"));
00331 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish));
00332 attr.remove(XML_PROP_PERISH);
00333 } else if(attr.get(XML_PROP_PERISH).equals("Frozen")) {
00334 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "Frozen"));
00335 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish));
00336 attr.remove(XML_PROP_PERISH);
00337 } else if(attr.get(XML_PROP_PERISH).equals("Heat-Sensitive")) {
00338 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "HeatSensitive"));
00339 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish));
00340 attr.remove(XML_PROP_PERISH);
00341 } else {
00342 attr.remove(XML_PROP_PERISH);
00343 }
00344 }
00345
00346
00347 if(attr.containsKey(XML_PROP_DESC)) {
00348
00349 OWLAnnotation commentAnno = factory.getOWLAnnotation(
00350 factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()),
00351 factory.getOWLStringLiteral(prologify(attr.get(XML_PROP_DESC)), "en"));
00352
00353 manager.addAxiom(ontology, factory.getOWLAnnotationAssertionAxiom(cl.getIRI(), commentAnno));
00354 attr.remove(XML_PROP_DESC);
00355 }
00356
00357
00358
00359 if(attr.containsKey(XML_PROP_LINK)) {
00360
00361 OWLAnnotation commentAnno = factory.getOWLAnnotation(
00362 factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_SEE_ALSO.getIRI()),
00363 factory.getOWLStringLiteral(attr.get(XML_PROP_LINK)));
00364
00365 manager.addAxiom(ontology, factory.getOWLAnnotationAssertionAxiom(cl.getIRI(), commentAnno));
00366 attr.remove(XML_PROP_LINK);
00367 }
00368
00369
00370
00371
00372
00373 if(attr.containsKey(XML_PROP_LOC)) {
00374 attr.remove(XML_PROP_LOC);
00375 }
00376
00377
00378
00379 for(String key : attr.keySet()) {
00380
00381
00382 if(attr.get(key).equals(""))
00383 continue;
00384
00385 OWLLiteral val = factory.getOWLTypedLiteral(attr.get(key));
00386 OWLDataHasValue restr = factory.getOWLDataHasValue(this.data_properties.get(key), val);
00387
00388 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, restr));
00389
00390 }
00391
00392 }
00393
00394
00395
00402 protected void create_data_properties(OWLOntology ontology) {
00403
00404
00405 OWLDataProperty data_prop;
00406 OWLDataPropertyRangeAxiom rangeAxiom;
00407 OWLDatatype integerDatatype = factory.getIntegerOWLDatatype();
00408 OWLDatatype stringDatatype = factory.getOWLDatatype(OWL2Datatype.XSD_STRING.getIRI());
00409
00410 data_properties.put(XML_PROP_DESC, factory.getOWLDataProperty("rdfs:label", pm));
00411 data_properties.put(XML_PROP_PRICE, factory.getOWLDataProperty("knowrob:purchasePrice", pm));
00412 data_properties.put(XML_PROP_WEIGHT, factory.getOWLDataProperty("knowrob:weight", pm));
00413
00414
00415 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "productCode"));
00416 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, integerDatatype);
00417 manager.addAxiom(ontology, rangeAxiom);
00418 data_properties.put(XML_PROP_CODE, data_prop);
00419
00420
00421 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "productID"));
00422 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00423 manager.addAxiom(ontology, rangeAxiom);
00424 data_properties.put(XML_PROP_ID, data_prop);
00425
00426
00427 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "linkToProductPage"));
00428 data_properties.put(XML_PROP_LINK, data_prop);
00429 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00430 manager.addAxiom(ontology, rangeAxiom);
00431
00432
00433 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "brand"));
00434 data_properties.put(XML_PROP_BRAND, data_prop);
00435 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00436 manager.addAxiom(ontology, rangeAxiom);
00437
00438
00439 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "countryOfOrigin"));
00440 data_properties.put(XML_PROP_COUNTRY, data_prop);
00441 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00442 manager.addAxiom(ontology, rangeAxiom);
00443
00444 }
00445
00446
00447
00448
00449 protected String prologify(String str) {
00450
00451
00452 str=str.replaceAll(""", "");
00453 str=str.replaceAll("&", "_");
00454 str=str.replaceAll(",", "_");
00455 str=str.replaceAll(" ", "_");
00456 str=str.replaceAll("-", "_");
00457 str=str.replaceAll("/", "_");
00458
00459
00460 str=str.replaceAll("Ä", "Ae");
00461 str=str.replaceAll("ä", "ae");
00462 str=str.replaceAll("Ö", "Oe");
00463 str=str.replaceAll("ö", "oe");
00464 str=str.replaceAll("Ü", "Ue");
00465 str=str.replaceAll("ü", "ue");
00466 str=str.replaceAll("ß","ss");
00467
00468 str=str.replaceAll("Ä", "Ae");
00469 str=str.replaceAll("ä", "ae");
00470 str=str.replaceAll("Ö", "Oe");
00471 str=str.replaceAll("ö", "oe");
00472 str=str.replaceAll("Ü", "Ue");
00473 str=str.replaceAll("ü", "ue");
00474 str=str.replaceAll("ß", "ss");
00475
00476
00477 str=str.replaceAll("[^A-Za-z0-9_]","");
00478
00479
00480 str=str.replaceAll("_+", "_");
00481 str=str.replaceAll("^_", "");
00482
00483 return str;
00484 }
00485
00486
00487 public static void main(String[] args) {
00488
00489 GermanDeliOWLImport gdi = new GermanDeliOWLImport();
00490 System.out.println(gdi.import_xml("/work/germandeli/download/structure.xml").toString());
00491
00492
00493 }
00494
00495
00496 }
00497