读书人

利用nekohtml解析失去网站的价格

发布时间: 2012-11-10 10:48:51 作者: rapoo

利用nekohtml解析得到网站的价格

import java.io.BufferedReader;import java.io.FileReader;import java.io.InputStreamReader;import java.net.URL;import org.cyberneko.html.parsers.DOMParser;import org.htmlparser.tags.Span;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.Node;import org.w3c.dom.NodeList;import org.xml.sax.InputSource;public class JavaTest {public static String TextExtractor(Node root){   //若是文本节点的话,直接返回   if (root.getNodeType() == Node.TEXT_NODE) {    return "";   }   if(root.getNodeType() == Node.ELEMENT_NODE) {    Element elmt = (Element) root;    //抛弃脚本    if (elmt.getTagName().equals("STYLE")      || elmt.getTagName().equals("SCRIPT"))     return "";    NodeList children = elmt.getChildNodes();    StringBuilder text = new StringBuilder();    if (elmt.getTagName().equals("SPAN")) {    if (elmt.hasAttribute("id")) {     // System.out.println(elmt.getAttribute("id"));      if (elmt.getAttribute("id").equals("countOfPrd")) {      return children.item(0).getNextSibling().getNextSibling().getNextSibling().getFirstChild().getNodeValue();  }  }}       for (int i = 0; i < children.getLength(); i++) {        text.append(TextExtractor(children.item(i)));    }    return text.toString();   }   //对其它类型的节点,返回空值   return "";}public static void main(String[] args) throws Exception{   //生成html parser   DOMParser parser = new DOMParser();   //设置网页的默认编码   parser.setProperty(     "http://cyberneko.org/html/properties/default-encoding",     "utf-8");   //input file   URL a = new URL("http://www.suning.cn/webapp/wcs/stores/servlet/prd_10052_10051_-7_9173_196583_.html");   BufferedReader in = new BufferedReader(new InputStreamReader(a.openStream()));   //BufferedReader in = new BufferedReader(new FileReader("input.htm"));   parser.parse(new InputSource(in));   Document doc = parser.getDocument();   //获得body节点,以此为根,计算其文本内容   Node body = doc.getElementsByTagName("BODY").item(0);   System.out.println(TextExtractor(body));}}?//该段代码是用来解析抓取各大网站的价格,来智能化定价。实现价格战的目的赢得用户,不懂或更多探究QQ526151410

读书人网 >CSS

热点推荐