HTMLStringParserTest.java
Go to the documentation of this file.
00001 package instruction.gui.test;
00002 
00003 import java.io.File;
00004 import java.io.FileReader;
00005 import java.util.ArrayList;
00006 import java.util.HashMap;
00007 import java.util.List;
00008 import java.util.Vector;
00009 import java.util.regex.Matcher;
00010 import java.util.regex.Pattern;
00011 
00012 import org.apache.http.client.HttpClient;
00013 import org.apache.http.client.methods.HttpGet;
00014 import org.apache.http.impl.client.DefaultHttpClient;
00015 import org.apache.http.util.EntityUtils;
00016 
00017 
00018 public class HTMLStringParserTest {
00019 
00020         String title = "";
00021         List<String> instructions = new ArrayList<String>();
00022         String url = null;
00023         
00024         
00025         
00026         public String[] search_wikihow( String query ) {
00027 
00028                 String url = "http://www.wikihow.com/Special:LSearch?search=" + query.replaceAll(" ", "\\+");
00029 
00030                 try {
00031 
00032                         HttpClient httpclient = new DefaultHttpClient();
00033                         HttpGet httpget = new HttpGet(url);
00034                         
00035                         String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00036                         String[] rows = page.split("\n");
00037                         
00038                         
00039                         String[] res = new String[2];
00040                         for ( int i = 0; i < rows.length; i++ ) {
00041                                 
00042                                 String p = "(<div class='searchresult_1'><a href=\"([a-zA-Z:./_\\-]*)\">([a-zA-Z <>/]*)</a>)";
00043                                 Matcher matcher = Pattern.compile(p).matcher(rows[i]);
00044                                 
00045                                 if(matcher.find()) {
00046                                         
00047                                         res[0] = matcher.group(2);
00048                                         res[1] = matcher.group(3).replaceAll("<[a-zA-z0-9/]*>", "");
00049 
00050                                         return res;
00051                                 }
00052                         }
00053                 }
00054                 catch ( Exception e ) {
00055                         e.printStackTrace();
00056                 }
00057                 return null;
00058         }
00059 
00060         
00061         public void read(String url) {
00062 
00063 
00064                 try {
00065                         HttpClient httpclient = new DefaultHttpClient();
00066                         HttpGet httpget = new HttpGet(url);
00067                         
00068                         String page = EntityUtils.toString(httpclient.execute(httpget).getEntity());
00069                         String[] rows = page.split("\n");
00070                 
00071                         System.out.println("Steps:");
00072                         for ( int i = 0; i < rows.length; i++ ) {
00073 
00074                                 String p = "(<b class='whb'>([0-9A-Za-z \\._]*)</b>\\.)";
00075                                 Matcher matcher = Pattern.compile(p).matcher(rows[i]);
00076                                 
00077                                 if(matcher.find()) {
00078                                         
00079                                         String u = matcher.group(2);
00080                                         System.out.println("* " + u);
00081 
00082                                 }
00083                         }
00084                         
00085                 }
00086                 catch ( Exception e ) {
00087                         e.printStackTrace();
00088                 }
00089         }
00090 
00091         
00092         public static void main(String[] args ) {
00093                 
00094                 HTMLStringParserTest t = new HTMLStringParserTest();
00095                 
00096                 String[] result = t.search_wikihow("Make Pancakes Using Pancake Mix");
00097                 
00098                 if(result!=null)
00099                         t.read(result[1]);
00100         }
00101         
00102 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Friends


comp_ehow
Author(s): Moritz Tenorth, Daniel Nyga
autogenerated on Tue Apr 16 2013 00:18:02