$search
00001 package instruction.gui.test; 00002 00003 import java.io.File; 00004 import java.io.FileReader; 00005 import java.util.ArrayList; 00006 import java.util.HashMap; 00007 import java.util.List; 00008 import java.util.Vector; 00009 import java.util.regex.Matcher; 00010 import java.util.regex.Pattern; 00011 00012 import org.apache.http.client.HttpClient; 00013 import org.apache.http.client.methods.HttpGet; 00014 import org.apache.http.impl.client.DefaultHttpClient; 00015 import org.apache.http.util.EntityUtils; 00016 00017 00018 public class HTMLStringParserTest { 00019 00020 String title = ""; 00021 List<String> instructions = new ArrayList<String>(); 00022 String url = null; 00023 00024 00025 00026 public String[] search_wikihow( String query ) { 00027 00028 String url = "http://www.wikihow.com/Special:LSearch?search=" + query.replaceAll(" ", "\\+"); 00029 00030 try { 00031 00032 HttpClient httpclient = new DefaultHttpClient(); 00033 HttpGet httpget = new HttpGet(url); 00034 00035 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity()); 00036 String[] rows = page.split("\n"); 00037 00038 00039 String[] res = new String[2]; 00040 for ( int i = 0; i < rows.length; i++ ) { 00041 00042 String p = "(<div class='searchresult_1'><a href=\"([a-zA-Z:./_\\-]*)\">([a-zA-Z <>/]*)</a>)"; 00043 Matcher matcher = Pattern.compile(p).matcher(rows[i]); 00044 00045 if(matcher.find()) { 00046 00047 res[0] = matcher.group(2); 00048 res[1] = matcher.group(3).replaceAll("<[a-zA-z0-9/]*>", ""); 00049 00050 return res; 00051 } 00052 } 00053 } 00054 catch ( Exception e ) { 00055 e.printStackTrace(); 00056 } 00057 return null; 00058 } 00059 00060 00061 public void read(String url) { 00062 00063 00064 try { 00065 HttpClient httpclient = new DefaultHttpClient(); 00066 HttpGet httpget = new HttpGet(url); 00067 00068 String page = EntityUtils.toString(httpclient.execute(httpget).getEntity()); 00069 String[] rows = page.split("\n"); 00070 00071 System.out.println("Steps:"); 00072 for ( int i = 0; i < rows.length; i++ ) { 00073 00074 String p = "(<b class='whb'>([0-9A-Za-z \\._]*)</b>\\.)"; 00075 Matcher matcher = Pattern.compile(p).matcher(rows[i]); 00076 00077 if(matcher.find()) { 00078 00079 String u = matcher.group(2); 00080 System.out.println("* " + u); 00081 00082 } 00083 } 00084 00085 } 00086 catch ( Exception e ) { 00087 e.printStackTrace(); 00088 } 00089 } 00090 00091 00092 public static void main(String[] args ) { 00093 00094 HTMLStringParserTest t = new HTMLStringParserTest(); 00095 00096 String[] result = t.search_wikihow("Make Pancakes Using Pancake Mix"); 00097 00098 if(result!=null) 00099 t.read(result[1]); 00100 } 00101 00102 }