Go to the documentation of this file.00001 package instruction.wrapper;
00002
00003 import java.io.File;
00004 import java.io.FileReader;
00005 import java.util.ArrayList;
00006 import java.util.List;
00007 import java.util.regex.Matcher;
00008 import java.util.regex.Pattern;
00009
00010 import org.apache.http.client.HttpClient;
00011 import org.apache.http.client.methods.HttpGet;
00012 import org.apache.http.impl.client.DefaultHttpClient;
00013 import org.apache.http.util.EntityUtils;
00014
00015 public class WikihowWebsiteWrapper implements IHowtoWebsiteWrapper {
00016
00017 String title = "";
00018 List<String> instructions = new ArrayList<String>();
00019 String url = null;
00020
00021 public String getHowtoTitle() {
00022 return title;
00023 }
00024
00025 public List<String> getInstructions() {
00026 return instructions;
00027 }
00028
00029
00030 public void load( String query ) {
00031
00032 String[] howto = search_wikihow( query );
00033
00034 this.url = howto[0];
00035 this.title = howto[1];
00036
00037 instructions = read(url);
00038 }
00039
00040 public String getUrl() {
00041 return url;
00042 }
00043
00044
00045
00046 public String[] search_wikihow( String query ) {
00047
00048 String url = "http://www.wikihow.com/Special:LSearch?search=" + query.replaceAll(" ", "\\+");
00049
00050 try {
00051
00052 HttpClient httpclient = new DefaultHttpClient();
00053 HttpGet httpget = new HttpGet(url);
00054
00055 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00056 String[] rows = page.split("\n");
00057
00058
00059 String[] res = new String[2];
00060 for ( int i = 0; i < rows.length; i++ ) {
00061
00062 String p = "(<div class='searchresult_1'><a href=\"([a-zA-Z:./_\\-]*)\">([a-zA-Z <>/]*)</a>)";
00063 Matcher matcher = Pattern.compile(p).matcher(rows[i]);
00064
00065 if(matcher.find()) {
00066
00067 res[0] = matcher.group(2);
00068 res[1] = matcher.group(3).replaceAll("<[a-zA-z0-9/]*>", "");
00069
00070 return res;
00071 }
00072 }
00073 }
00074 catch ( Exception e ) {
00075 e.printStackTrace();
00076 }
00077 return null;
00078 }
00079
00080
00081 public ArrayList<String> read(String url) {
00082
00083 ArrayList<String> res = new ArrayList<String>();
00084 try {
00085 HttpClient httpclient = new DefaultHttpClient();
00086 HttpGet httpget = new HttpGet(url);
00087
00088 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00089 String[] rows = page.split("\n");
00090
00091 System.out.println("Steps:");
00092 for ( int i = 0; i < rows.length; i++ ) {
00093
00094 Matcher matcher = Pattern.compile("(<b class='whb'>([0-9A-Za-z \\._]*)</b>\\.)").matcher(rows[i]);
00095
00096 if(matcher.find()) {
00097
00098 res.add(matcher.group(2));
00099 }
00100 }
00101 }
00102 catch ( Exception e ) {
00103 e.printStackTrace();
00104 }
00105 return res;
00106 }
00107
00108 }