$search
00001 package instruction.wrapper; 00002 00003 import java.io.File; 00004 import java.io.FileReader; 00005 import java.util.ArrayList; 00006 import java.util.List; 00007 import java.util.regex.Matcher; 00008 import java.util.regex.Pattern; 00009 00010 import org.apache.http.client.HttpClient; 00011 import org.apache.http.client.methods.HttpGet; 00012 import org.apache.http.impl.client.DefaultHttpClient; 00013 import org.apache.http.util.EntityUtils; 00014 00015 public class WikihowWebsiteWrapper implements IHowtoWebsiteWrapper { 00016 00017 String title = ""; 00018 List<String> instructions = new ArrayList<String>(); 00019 String url = null; 00020 00021 public String getHowtoTitle() { 00022 return title; 00023 } 00024 00025 public List<String> getInstructions() { 00026 return instructions; 00027 } 00028 00029 00030 public void load( String query ) { 00031 00032 String[] howto = search_wikihow( query ); 00033 00034 this.url = howto[0]; 00035 this.title = howto[1]; 00036 00037 instructions = read(url); 00038 } 00039 00040 public String getUrl() { 00041 return url; 00042 } 00043 00044 00045 00046 public String[] search_wikihow( String query ) { 00047 00048 String url = "http://www.wikihow.com/Special:LSearch?search=" + query.replaceAll(" ", "\\+"); 00049 00050 try { 00051 00052 HttpClient httpclient = new DefaultHttpClient(); 00053 HttpGet httpget = new HttpGet(url); 00054 00055 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity()); 00056 String[] rows = page.split("\n"); 00057 00058 00059 String[] res = new String[2]; 00060 for ( int i = 0; i < rows.length; i++ ) { 00061 00062 String p = "(<div class='searchresult_1'><a href=\"([a-zA-Z:./_\\-]*)\">([a-zA-Z <>/]*)</a>)"; 00063 Matcher matcher = Pattern.compile(p).matcher(rows[i]); 00064 00065 if(matcher.find()) { 00066 00067 res[0] = matcher.group(2); 00068 res[1] = matcher.group(3).replaceAll("<[a-zA-z0-9/]*>", ""); 00069 00070 return res; 00071 } 00072 } 00073 } 00074 catch ( Exception e ) { 00075 e.printStackTrace(); 00076 } 00077 return null; 00078 } 00079 00080 00081 public ArrayList<String> read(String url) { 00082 00083 ArrayList<String> res = new ArrayList<String>(); 00084 try { 00085 HttpClient httpclient = new DefaultHttpClient(); 00086 HttpGet httpget = new HttpGet(url); 00087 00088 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity()); 00089 String[] rows = page.split("\n"); 00090 00091 System.out.println("Steps:"); 00092 for ( int i = 0; i < rows.length; i++ ) { 00093 00094 Matcher matcher = Pattern.compile("(<b class='whb'>([0-9A-Za-z \\._]*)</b>\\.)").matcher(rows[i]); 00095 00096 if(matcher.find()) { 00097 00098 res.add(matcher.group(2)); 00099 } 00100 } 00101 } 00102 catch ( Exception e ) { 00103 e.printStackTrace(); 00104 } 00105 return res; 00106 } 00107 00108 }