WikihowWebsiteWrapper.java
Go to the documentation of this file.
00001 package instruction.wrapper;
00002 
00003 import java.io.File;
00004 import java.io.FileReader;
00005 import java.util.ArrayList;
00006 import java.util.List;
00007 import java.util.regex.Matcher;
00008 import java.util.regex.Pattern;
00009 
00010 import org.apache.http.client.HttpClient;
00011 import org.apache.http.client.methods.HttpGet;
00012 import org.apache.http.impl.client.DefaultHttpClient;
00013 import org.apache.http.util.EntityUtils;
00014 
00015 public class WikihowWebsiteWrapper implements IHowtoWebsiteWrapper {
00016         
00017         String title = "";
00018         List<String> instructions = new ArrayList<String>();
00019         String url = null;
00020 
00021         public String getHowtoTitle() {
00022                 return title;
00023         }
00024 
00025         public List<String> getInstructions() {
00026                 return instructions;
00027         }
00028 
00029 
00030         public void load( String query ) {
00031                 
00032                 String[] howto = search_wikihow( query );
00033                 
00034                 this.url = howto[0];
00035                 this.title = howto[1];
00036                 
00037                 instructions = read(url);
00038         }
00039 
00040         public String getUrl() {
00041                 return url;
00042         }
00043 
00044         
00045 
00046         public String[] search_wikihow( String query ) {
00047 
00048                 String url = "http://www.wikihow.com/Special:LSearch?search=" + query.replaceAll(" ", "\\+");
00049 
00050                 try {
00051 
00052                         HttpClient httpclient = new DefaultHttpClient();
00053                         HttpGet httpget = new HttpGet(url);
00054                         
00055                         String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00056                         String[] rows = page.split("\n");
00057                         
00058                         
00059                         String[] res = new String[2];
00060                         for ( int i = 0; i < rows.length; i++ ) {
00061                                 
00062                                 String p = "(<div class='searchresult_1'><a href=\"([a-zA-Z:./_\\-]*)\">([a-zA-Z <>/]*)</a>)";
00063                                 Matcher matcher = Pattern.compile(p).matcher(rows[i]);
00064                                 
00065                                 if(matcher.find()) {
00066                                         
00067                                         res[0] = matcher.group(2);
00068                                         res[1] = matcher.group(3).replaceAll("<[a-zA-z0-9/]*>", "");
00069 
00070                                         return res;
00071                                 }
00072                         }
00073                 }
00074                 catch ( Exception e ) {
00075                         e.printStackTrace();
00076                 }
00077                 return null;
00078         }
00079 
00080         
00081         public ArrayList<String> read(String url) {
00082 
00083                 ArrayList<String> res = new ArrayList<String>();
00084                 try {
00085                         HttpClient httpclient = new DefaultHttpClient();
00086                         HttpGet httpget = new HttpGet(url);
00087                         
00088                         String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00089                         String[] rows = page.split("\n");
00090                 
00091                         System.out.println("Steps:");
00092                         for ( int i = 0; i < rows.length; i++ ) {
00093 
00094                                 Matcher matcher = Pattern.compile("(<b class='whb'>([0-9A-Za-z \\._]*)</b>\\.)").matcher(rows[i]);
00095                                 
00096                                 if(matcher.find()) {
00097 
00098                                         res.add(matcher.group(2));
00099                                 }
00100                         }       
00101                 }
00102                 catch ( Exception e ) {
00103                         e.printStackTrace();
00104                 }
00105                 return res;
00106         }
00107         
00108 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Friends


comp_ehow
Author(s): Moritz Tenorth, Daniel Nyga
autogenerated on Tue Apr 16 2013 00:18:03