读书人

除开字符串中的HTML元素

发布时间: 2012-09-25 09:55:59 作者: rapoo

去除字符串中的HTML元素

package com.gwideal.jxwfkjlweb.util;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * 去除字符串中的HTML元素 * @author zhou_chaofei * */public class TxtWithoutHTMLElement { public static String getTxtWithoutHTMLElement (String element)     {                 if(null==element||"".equals(element.trim()))         {             return element;         }         Pattern pattern=Pattern.compile("<[^<|^>]*>");         Matcher matcher=pattern.matcher(element);         StringBuffer txt=new StringBuffer();         while(matcher.find())         {             String group=matcher.group();             if(group.matches("<[\\s]*>"))             {                 matcher.appendReplacement(txt,group);                 }             else             {                 matcher.appendReplacement(txt,"");             }         }         matcher.appendTail(txt);         repaceEntities(txt,"&","&");         repaceEntities(txt,"<","<");                 repaceEntities(txt,">",">");         repaceEntities(txt,""","\"");         repaceEntities(txt," ","");                 return txt.toString();     } private static void repaceEntities ( StringBuffer txt,String entity,String replace)     {         int pos=-1;         while(-1!=(pos=txt.indexOf(entity)))         {             txt.replace(pos,pos+entity.length(),replace);         }     }public static void main(String[] args) {   System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>"));   System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>"));}}

?

读书人网 >CSS

热点推荐