利用开源js引擎rhino+jsoup进行web裁制,使用javascript来解析页面。
JsEngin.java
package cn.tailor.engin; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import org.jsoup.Jsoup; import org.mozilla.javascript.Context; import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.commonjs.module.provider.SoftCachingModuleScriptProvider; public class JsEngin { public static String exedom(){ ScriptEngineManager manager = new ScriptEngineManager(); ScriptEngine engine = manager.getEngineByName("js"); try { engine.put("oText", Fetch.getHtml("http://www.baidu.com")); FileReader reader = new FileReader("D://js/yourFile.js"); engine.eval(reader); reader.close(); String name = (String) engine.get("output"); return name; } catch (Exception e) { e.printStackTrace(); return null; } } public static String exe(){ Context localContext = Context.enter(); ScriptableObject localScriptableObject = localContext.initStandardObjects(); Object Jsoup = Context.javaToJS(Jsoup.class, localScriptableObject); ScriptableObject.putProperty(localScriptableObject, "jsoup", Jsoup); ScriptableObject.putProperty(localScriptableObject, "dom", Fetch.getHtml("http://www.baidu.com")); FileReader reader; try { reader = new FileReader("D://js/yourFile.js"); localContext.evaluateReader(localScriptableObject, reader, reader.toString(), 1, null); reader.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } String html=(String) localScriptableObject.get("output"); return html; } }
TailorService.java
package cn.tailor.service; import java.io.IOException; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import cn.tailor.engin.JsEngin; public class TailorService extends HttpServlet { /** * Constructor of the object. */ public TailorService() { super(); } /** * Destruction of the servlet. <br> */ public void destroy() { super.destroy(); // Just puts "destroy" string in log } /** * The doGet method of the servlet. <br> * * This method is called when a form has its tag value method equals to get. * * @param request the request send by the client to the server * @param response the response send by the server to the client * @throws ServletException if an error occurred * @throws IOException if an error occurred */ public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=utf-8"); PrintWriter out = response.getWriter(); String html=JsEngin.exe(); out.print(html); out.flush(); out.close(); } /** * The doPost method of the servlet. <br> * * This method is called when a form has its tag value method equals to post. * * @param request the request send by the client to the server * @param response the response send by the server to the client * @throws ServletException if an error occurred * @throws IOException if an error occurred */ public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html"); PrintWriter out = response.getWriter(); out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"); out.println("<HTML>"); out.println(" <HEAD><TITLE>A Servlet</TITLE></HEAD>"); out.println(" <BODY>"); out.print(" This is "); out.print(this.getClass()); out.println(", using the POST method"); out.println(" </BODY>"); out.println("</HTML>"); out.flush(); out.close(); } /** * Initialization of the servlet. <br> * * @throws ServletException if an error occurs */ public void init() throws ServletException { } }
Fetch.java
package cn.tailor.engin; import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; public class Fetch { public static Document getHtml(String url){ try { Document dom=Jsoup.connect(url).timeout(6000).get(); return dom; } catch (IOException e) { e.printStackTrace(); return null; } } }
yourfile.js
var output=dom.getElementsByTag("title").get(0).text();
已有 0人发表留言,猛击->> 这里<<-参与讨论
ITeye推荐