GermanDeliOWLImport.java
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2010 by Moritz Tenorth
00003  * 
00004  * This program is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 3 of the License, or
00007  * (at your option) any later version.
00008  * 
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  * 
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 package edu.tum.cs.ias.knowrob.comp_germandeli;
00019 
00020 import java.io.File;
00021 import java.util.HashMap;
00022 import javax.xml.parsers.*;
00023 
00024 import org.semanticweb.owlapi.apibinding.OWLManager;
00025 import org.semanticweb.owlapi.io.RDFXMLOntologyFormat;
00026 import org.semanticweb.owlapi.model.*;
00027 import org.semanticweb.owlapi.util.DefaultPrefixManager;
00028 import org.semanticweb.owlapi.vocab.OWL2Datatype;
00029 import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
00030 
00031 import org.w3c.dom.*;
00032 
00033 
00034 
00035 
00036 public class GermanDeliOWLImport {
00037 
00038 
00040         // Set IRIs for the ontologies used here
00041         //
00042         
00043         // Base IRI for KnowRob ontology
00044         public final static String KNOWROB = "http://ias.cs.tum.edu/kb/knowrob.owl#";
00045 
00046         // Base IRI for OWL ontology
00047         public final static String OWL = "http://www.w3.org/2002/07/owl#";
00048         
00049         // Base IRI for RDFS
00050         public final static String RDFS = "http://www.w3.org/2000/01/rdf-schema#";
00051         
00052         // Base IRI for semantic map ontology   
00053         public final static String GERMANDELI = "http://ias.cs.tum.edu/kb/germandeli.owl#";
00054         
00055 
00057         // Define names of XML tags and attributes in the input file
00058         //
00059         public final static String XML_GROUPNAME        = "group";
00060         public final static String XML_PAGENAME         = "page";
00061         public final static String XML_PRODNAME         = "product";
00062         
00063         public final static String XML_PROP_DESC        = "description";
00064         public final static String XML_PROP_ID          = "id";
00065         public final static String XML_PROP_LINK        = "link";
00066         public final static String XML_PROP_NAME        = "name";
00067         public final static String XML_PROP_LOC         = "location";
00068         public final static String XML_PROP_PERISH      = "perishability";
00069         public final static String XML_PROP_COUNTRY     = "country_of_origin";
00070         public final static String XML_PROP_CODE        = "product_code";
00071         public final static String XML_PROP_PRICE       = "sale_price";
00072         public final static String XML_PROP_WEIGHT      = "weight";
00073         public final static String XML_PROP_BRAND       = "brand";
00074         
00075         
00076         // Prefix manager
00077         public final static DefaultPrefixManager PREFIX_MANAGER = new DefaultPrefixManager(GERMANDELI);
00078         static {
00079                 PREFIX_MANAGER.setPrefix("knowrob:", KNOWROB);
00080                 PREFIX_MANAGER.setPrefix("germandeli:", GERMANDELI);
00081                 PREFIX_MANAGER.setPrefix("owl:", OWL);
00082                 PREFIX_MANAGER.setPrefix("rdfs:", RDFS);
00083         }
00084         
00085         
00086         HashMap<String, OWLDataProperty> data_properties; 
00087         
00088         OWLDataFactory factory;
00089         OWLOntologyManager manager;
00090         DefaultPrefixManager pm;
00091         
00092         int inst_counter=0;     // counter to create unique instance identifiers
00093         
00094         
00095         
00096         
00100         public GermanDeliOWLImport() {
00101         
00102                 data_properties = new HashMap<String, OWLDataProperty>();
00103         }
00104         
00105         
00106         
00112         public OWLOntology import_xml(String xmlfile) {
00113                 
00114 
00115                 OWLOntology ontology = null;
00116                 
00117                 try{
00118 
00119                         // read XML file
00120                         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
00121                         DocumentBuilder db = dbf.newDocumentBuilder();
00122                         
00123                         System.out.println("Input: " + xmlfile);
00124                         
00125                         String[] path= xmlfile.split("/");
00126                         String filename = path[path.length-1];
00127                         String mapname = filename.split("\\.")[0];
00128                           
00129                         Document doc = db.parse(new File(xmlfile));
00130 
00131                       
00132                         
00133 
00134                         // Create ontology manager and data factory
00135                         manager = OWLManager.createOWLOntologyManager();
00136                         factory = manager.getOWLDataFactory();
00137                         
00138                         // Get prefix manager using the base IRI of the JoystickDrive ontology as default namespace
00139                         pm = PREFIX_MANAGER;
00140                         
00141                         // Create empty OWL ontology
00142                         ontology = manager.createOntology(IRI.create(GERMANDELI));
00143                         manager.setOntologyFormat(ontology, new RDFXMLOntologyFormat());
00144                         
00145                         // Import KnowRob ontology
00146                         OWLImportsDeclaration oid = factory.getOWLImportsDeclaration(IRI.create(KNOWROB));
00147                         AddImport addImp = new AddImport(ontology,oid);
00148                         manager.applyChange(addImp);
00149                         
00150                         // Get classes from the KnowRob ontology
00151                         OWLClass clsVelocityCommand = factory.getOWLClass("knowrob:Action", pm);
00152                         
00153                         
00154                         // fill the internal list of object properties to be used 
00155                         create_data_properties(ontology);
00156                         
00157                         
00158                         // Iterate over all classes in the XML file and create the respective OWL representation
00159                         NodeList nl = doc.getElementsByTagName(XML_GROUPNAME);
00160                         
00161                         // create parent class
00162                         OWLClass germandeli_prod = create_owl_class("GermanDeliObject", factory.getOWLClass("knowrob:HumanScaleObject", pm), ontology);
00163                         
00164                         for(int i = 0; i < nl.getLength(); i++) {
00165                                 Node cn = nl.item(i);
00166                                 
00167                                 if(cn.getNodeName().equals(XML_GROUPNAME))
00168                                         parse_group(cn, germandeli_prod, ontology);
00169                                 
00170                                 else if (cn.getNodeName().equals(XML_PAGENAME))
00171                                         parse_page(cn, germandeli_prod, ontology);
00172                                 
00173                                 else if (cn.getNodeName().equals(XML_PRODNAME))
00174                                                 parse_product(cn, germandeli_prod, ontology);
00175                         }
00176                         
00177                         
00178                     
00179                         // save the ontology to a file
00180                         //SimpleDateFormat sdf = new SimpleDateFormat("yy-MM-dd_HH-mm-ss-SSS");
00181                         //String outfile = "ias_semantic_map_"+sdf.format(new Date())+".owl";
00182                         //saveOntologyToFile(ontology, outfile);
00183                         File file = new File("/work/germandeli/germandeli.owl"); 
00184                         manager.saveOntology(ontology, IRI.create(file.toURI())); 
00185 
00186                 } catch (Exception e) {
00187                         ontology = null;
00188                         e.printStackTrace();
00189                 }
00190 
00191                 return ontology;
00192         }
00193     
00194 
00195         
00196         
00203         protected void parse_group(Node group, OWLClass parent, OWLOntology ontology) {
00204 
00205         OWLClass current_group = parent;
00206                 NamedNodeMap nnm =  group.getAttributes();
00207 
00208                 
00209         // only create a separate group if it has a name and link set
00210         // otherwise just re-use the parent
00211         
00212                 if((nnm.getNamedItem(XML_PROP_LINK)!=null) && (nnm.getNamedItem(XML_PROP_NAME) != null)) {
00213                         
00214                 HashMap<String, String> attr = new HashMap<String, String>();
00215                 attr.put(XML_PROP_LINK,   "http://www.germandeli.com/"+nnm.getNamedItem(XML_PROP_LINK).getTextContent());
00216                         
00217                         current_group = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_NAME).getTextContent()), parent, ontology);                        
00218                         add_property_restrictions(current_group, attr, ontology);
00219                 }
00220                 
00221                 // recursively parse child nodes
00222                 for(int i=0;i<group.getChildNodes().getLength();i++) {
00223                         Node cn = group.getChildNodes().item(i);
00224                         
00225                         if(cn.getNodeName().equals(XML_GROUPNAME))
00226                                 parse_group(cn, current_group, ontology);
00227                         
00228                         else if (cn.getNodeName().equals(XML_PAGENAME))
00229                                 parse_page(cn, current_group, ontology);
00230                 }
00231         }
00232         
00233         
00240         protected void parse_page(Node page, OWLClass parent, OWLOntology ontology) {
00241                 
00242         NamedNodeMap nnm =  page.getAttributes();
00243         HashMap<String, String> attr = new HashMap<String, String>();
00244 
00245         OWLClass current_page = parent;
00246         if( (nnm.getNamedItem(XML_PROP_LINK)!=null) && (nnm.getNamedItem(XML_PROP_NAME)!=null)) {
00247                 attr.put(XML_PROP_LINK,   "http://www.germandeli.com/"+nnm.getNamedItem(XML_PROP_LINK).getTextContent());
00248         
00249                         // create ID from the link without '.html'
00250                         current_page = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_NAME).getTextContent()), parent, ontology);
00251                         add_property_restrictions(current_page, attr, ontology);
00252         }
00253         
00254                 // recursively parse child nodes
00255                 for(int i=0;i<page.getChildNodes().getLength();i++) {
00256                         Node cn = page.getChildNodes().item(i);
00257                         
00258                         if(cn.getNodeName().equals(XML_GROUPNAME))
00259                                 parse_group(cn, current_page, ontology);
00260 
00261                         else if (cn.getNodeName().equals(XML_PAGENAME))
00262                                 parse_page(cn, current_page, ontology);
00263 
00264                         else if (cn.getNodeName().equals(XML_PRODNAME))
00265                                 parse_product(cn, current_page, ontology);
00266                 }
00267         }
00268 
00269         
00276         protected void parse_product(Node product, OWLClass parent, OWLOntology ontology) {
00277                 
00278         NamedNodeMap nnm =  product.getAttributes();
00279         HashMap<String, String> attr = new HashMap<String, String>();
00280         
00281         
00282         attr.put(XML_PROP_ID,     nnm.getNamedItem(XML_PROP_ID).getTextContent());
00283         attr.put(XML_PROP_CODE,   nnm.getNamedItem(XML_PROP_CODE).getTextContent());
00284                 attr.put(XML_PROP_DESC,   nnm.getNamedItem(XML_PROP_DESC).getTextContent());
00285                 attr.put(XML_PROP_LINK,   nnm.getNamedItem(XML_PROP_LINK).getTextContent());
00286                 attr.put(XML_PROP_LOC,    nnm.getNamedItem(XML_PROP_LOC).getTextContent());
00287                 attr.put(XML_PROP_PERISH, nnm.getNamedItem(XML_PROP_PERISH).getTextContent());
00288                 attr.put(XML_PROP_COUNTRY,nnm.getNamedItem(XML_PROP_COUNTRY).getTextContent());
00289                 attr.put(XML_PROP_PRICE,  nnm.getNamedItem(XML_PROP_PRICE).getTextContent());
00290                 attr.put(XML_PROP_WEIGHT, nnm.getNamedItem(XML_PROP_WEIGHT).getTextContent());
00291                 attr.put(XML_PROP_BRAND,  nnm.getNamedItem(XML_PROP_BRAND).getTextContent());
00292                 
00293                 OWLClass current_product = create_owl_class(prologify(nnm.getNamedItem(XML_PROP_DESC).getTextContent()), parent, ontology);
00294                 add_property_restrictions(current_product, attr, ontology);        
00295         }
00296         
00297 
00298         
00306         protected OWLClass create_owl_class(String name, OWLClass parent, OWLOntology ontology) {
00307 
00308                 OWLClass res = factory.getOWLClass(IRI.create(GERMANDELI + name));
00309                 OWLAxiom subClassAxiom = factory.getOWLSubClassOfAxiom(res, parent);
00310                 manager.addAxiom(ontology, subClassAxiom); 
00311                  
00312                 return res;
00313         }
00314         
00315         
00316         
00323         protected void add_property_restrictions(OWLClass cl, HashMap<String, String> attr, OWLOntology ontology) {
00324                 
00325                 
00326                 // handle perishability separately: subClassOf Perishable
00327                 if(attr.containsKey(XML_PROP_PERISH) && !attr.get(XML_PROP_PERISH).equals("")) {
00328                         
00329                         if(attr.get(XML_PROP_PERISH).equals("Refrigerated")) {
00330                                 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "Perishable"));
00331                                 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish));
00332                                 attr.remove(XML_PROP_PERISH);   
00333                         } else if(attr.get(XML_PROP_PERISH).equals("Frozen")) {
00334                                 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "Frozen"));
00335                                 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish));
00336                                 attr.remove(XML_PROP_PERISH);   
00337                         } else if(attr.get(XML_PROP_PERISH).equals("Heat-Sensitive")) {
00338                                 OWLClass perish = factory.getOWLClass(IRI.create(KNOWROB + "HeatSensitive"));
00339                                 manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, perish));
00340                                 attr.remove(XML_PROP_PERISH);   
00341                         } else {
00342                                 attr.remove(XML_PROP_PERISH);
00343                         }
00344                 }
00345 
00346                 // set description as RDFS:LABEL
00347                 if(attr.containsKey(XML_PROP_DESC)) {
00348                         
00349                         OWLAnnotation commentAnno = factory.getOWLAnnotation(
00350                                         factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()),
00351                                         factory.getOWLStringLiteral(prologify(attr.get(XML_PROP_DESC)), "en"));
00352                         
00353                         manager.addAxiom(ontology, factory.getOWLAnnotationAssertionAxiom(cl.getIRI(), commentAnno));
00354                         attr.remove(XML_PROP_DESC);
00355                 }
00356                 
00357 
00358                 // set link as RDFS:seeAlso
00359                 if(attr.containsKey(XML_PROP_LINK)) {
00360                         
00361                         OWLAnnotation commentAnno = factory.getOWLAnnotation(
00362                                         factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_SEE_ALSO.getIRI()),
00363                                         factory.getOWLStringLiteral(attr.get(XML_PROP_LINK)));
00364                         
00365                         manager.addAxiom(ontology, factory.getOWLAnnotationAssertionAxiom(cl.getIRI(), commentAnno));
00366                         attr.remove(XML_PROP_LINK);
00367                 }
00368                 
00369                 
00370                 
00371                 
00372                 // currently not using the loc key
00373                 if(attr.containsKey(XML_PROP_LOC)) { 
00374                         attr.remove(XML_PROP_LOC);
00375                 }
00376                 
00377                 
00378                 // for the other attributes: add as hasValue restriction
00379                 for(String key : attr.keySet()) {
00380                         
00381                         // don't add properties for empty values
00382                         if(attr.get(key).equals(""))
00383                                 continue;
00384                         
00385                         OWLLiteral val = factory.getOWLTypedLiteral(attr.get(key));
00386                         OWLDataHasValue restr = factory.getOWLDataHasValue(this.data_properties.get(key), val);
00387 
00388                         manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, restr));
00389                         
00390                 }
00391                 
00392         }
00393         
00394 
00395 
00402         protected void create_data_properties(OWLOntology ontology) {
00403 
00404 
00405                 OWLDataProperty data_prop;
00406                 OWLDataPropertyRangeAxiom rangeAxiom;
00407                 OWLDatatype integerDatatype = factory.getIntegerOWLDatatype();  
00408                 OWLDatatype stringDatatype  = factory.getOWLDatatype(OWL2Datatype.XSD_STRING.getIRI());
00409                 
00410                 data_properties.put(XML_PROP_DESC,    factory.getOWLDataProperty("rdfs:label", pm));
00411                 data_properties.put(XML_PROP_PRICE,   factory.getOWLDataProperty("knowrob:purchasePrice", pm));
00412                 data_properties.put(XML_PROP_WEIGHT,  factory.getOWLDataProperty("knowrob:weight", pm));
00413                 
00414 
00415                 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "productCode"));
00416                 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, integerDatatype);
00417                 manager.addAxiom(ontology, rangeAxiom); 
00418                 data_properties.put(XML_PROP_CODE, data_prop);
00419                 
00420 
00421                 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "productID"));
00422                 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00423                 manager.addAxiom(ontology, rangeAxiom); 
00424                 data_properties.put(XML_PROP_ID, data_prop);
00425                 
00426                 
00427                 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "linkToProductPage"));
00428                 data_properties.put(XML_PROP_LINK, data_prop);
00429                 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00430                 manager.addAxiom(ontology, rangeAxiom);
00431                 
00432                 
00433                 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "brand"));
00434                 data_properties.put(XML_PROP_BRAND, data_prop);
00435                 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00436                 manager.addAxiom(ontology, rangeAxiom);
00437                 
00438                 
00439                 data_prop = factory.getOWLDataProperty(IRI.create(GERMANDELI + "countryOfOrigin"));
00440                 data_properties.put(XML_PROP_COUNTRY, data_prop);
00441                 rangeAxiom = factory.getOWLDataPropertyRangeAxiom(data_prop, stringDatatype);
00442                 manager.addAxiom(ontology, rangeAxiom);
00443                 
00444         }
00445         
00446         
00447         
00448 
00449         protected String prologify(String str) {
00450 
00451                 // first replace well-known special characters
00452                 str=str.replaceAll("&quot;", "");
00453                 str=str.replaceAll("&amp;",  "_");
00454                 str=str.replaceAll(",", "_");
00455                 str=str.replaceAll(" ", "_");
00456                 str=str.replaceAll("-", "_");
00457                 str=str.replaceAll("/", "_");
00458                 
00459                 // replace common umlaut characters
00460                 str=str.replaceAll("&Auml;", "Ae");
00461                 str=str.replaceAll("&auml;", "ae");
00462                 str=str.replaceAll("&Ouml;", "Oe");
00463                 str=str.replaceAll("&ouml;", "oe");
00464                 str=str.replaceAll("&Uuml;", "Ue");
00465                 str=str.replaceAll("&uuml;", "ue");
00466                 str=str.replaceAll("&szlig;","ss");
00467                 
00468                 str=str.replaceAll("Ä", "Ae");
00469                 str=str.replaceAll("ä", "ae");
00470                 str=str.replaceAll("Ö", "Oe");
00471                 str=str.replaceAll("ö", "oe");
00472                 str=str.replaceAll("Ü", "Ue");
00473                 str=str.replaceAll("ü", "ue");
00474                 str=str.replaceAll("ß", "ss");
00475                 
00476                 // replace everything else that is neither char nor num nor '_'
00477                 str=str.replaceAll("[^A-Za-z0-9_]","");
00478                 
00479                 // remove multiple underscores
00480                 str=str.replaceAll("_+", "_");
00481                 str=str.replaceAll("^_", "");
00482                 
00483                 return str;
00484         }
00485 
00486         
00487         public static void main(String[] args) {
00488                 
00489                 GermanDeliOWLImport gdi = new GermanDeliOWLImport();
00490                 System.out.println(gdi.import_xml("/work/germandeli/download/structure.xml").toString());
00491                 
00492                 
00493         }
00494 
00495         
00496 }
00497 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Friends


comp_germandeli
Author(s): Moritz Tenorth
autogenerated on Sun Oct 6 2013 05:07:57