Go to the documentation of this file.00001 package instruction.gui.test;
00002
00003 import java.io.File;
00004 import java.io.FileReader;
00005 import java.util.ArrayList;
00006 import java.util.HashMap;
00007 import java.util.List;
00008 import java.util.Vector;
00009 import java.util.regex.Matcher;
00010 import java.util.regex.Pattern;
00011
00012 import org.apache.http.client.HttpClient;
00013 import org.apache.http.client.methods.HttpGet;
00014 import org.apache.http.impl.client.DefaultHttpClient;
00015 import org.apache.http.util.EntityUtils;
00016
00017
00018 public class HTMLStringParserTest {
00019
00020 String title = "";
00021 List<String> instructions = new ArrayList<String>();
00022 String url = null;
00023
00024
00025
00026 public String[] search_wikihow( String query ) {
00027
00028 String url = "http://www.wikihow.com/Special:LSearch?search=" + query.replaceAll(" ", "\\+");
00029
00030 try {
00031
00032 HttpClient httpclient = new DefaultHttpClient();
00033 HttpGet httpget = new HttpGet(url);
00034
00035 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00036 String[] rows = page.split("\n");
00037
00038
00039 String[] res = new String[2];
00040 for ( int i = 0; i < rows.length; i++ ) {
00041
00042 String p = "(<div class='searchresult_1'><a href=\"([a-zA-Z:./_\\-]*)\">([a-zA-Z <>/]*)</a>)";
00043 Matcher matcher = Pattern.compile(p).matcher(rows[i]);
00044
00045 if(matcher.find()) {
00046
00047 res[0] = matcher.group(2);
00048 res[1] = matcher.group(3).replaceAll("<[a-zA-z0-9/]*>", "");
00049
00050 return res;
00051 }
00052 }
00053 }
00054 catch ( Exception e ) {
00055 e.printStackTrace();
00056 }
00057 return null;
00058 }
00059
00060
00061 public void read(String url) {
00062
00063
00064 try {
00065 HttpClient httpclient = new DefaultHttpClient();
00066 HttpGet httpget = new HttpGet(url);
00067
00068 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00069 String[] rows = page.split("\n");
00070
00071 System.out.println("Steps:");
00072 for ( int i = 0; i < rows.length; i++ ) {
00073
00074 String p = "(<b class='whb'>([0-9A-Za-z \\._]*)</b>\\.)";
00075 Matcher matcher = Pattern.compile(p).matcher(rows[i]);
00076
00077 if(matcher.find()) {
00078
00079 String u = matcher.group(2);
00080 System.out.println("* " + u);
00081
00082 }
00083 }
00084
00085 }
00086 catch ( Exception e ) {
00087 e.printStackTrace();
00088 }
00089 }
00090
00091
00092 public static void main(String[] args ) {
00093
00094 HTMLStringParserTest t = new HTMLStringParserTest();
00095
00096 String[] result = t.search_wikihow("Make Pancakes Using Pancake Mix");
00097
00098 if(result!=null)
00099 t.read(result[1]);
00100 }
00101
00102 }