含Unicode字符XML文件解决
解析XML出现如下异常:
An invalid XML character (Unicode: 0x2) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0x2) was found in the element content of the document.
下面写了个小程序,可以对异常xml文件或目录下所有xml文件进行处理,
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileFilter;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;public class UnicodeClear { /** * 对指定的参数内容,如果存在Unicode字符,则替换为空格 * @param value * @return */ private String UnicodeStringHandler(String value) { if(value==null) return null; char[] chs = value.toCharArray(); for (int i=0; i < value.length(); ++i) { if (chs[i] > 0xFFFD) { chs[i] = ' '; } else if (chs[i] < 0x20 && chs[i] != '\t' & chs[i] != '\n' & chs[i] != '\r') { chs[i] = ' '; } } return new String(chs); } /** * 处理包含Unicode字符的文件 * @param f * @throws Exception */private void handlerFile(File f) throws Exception{if(f.exists()){System.out.println("开始处理文件:"+f.getName());File rs_f = new File(f.getParent(),f.getName()+".tmp");java.io.BufferedReader br = null;java.io.BufferedWriter bw = null;try {br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "utf-8"));bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(rs_f), "utf-8"));String str;while((str=br.readLine())!=null){String value = UnicodeStringHandler(str);bw.write(value);}} catch (Exception e) {System.out.println("err:"+e.getMessage());throw e;}finally{if(br!=null) br.close();bw.close();}f.delete();rs_f.renameTo(f);}}public boolean scanning(String fname){File f = new File(fname);try {if(!f.exists()){System.out.println("路径不存在:"+fname);return true;}if(f.isFile()){handlerFile(f);}else{File[] fs = f.listFiles(new FileFilter() {public boolean accept(File pathname) {String fname = pathname.getName();if(fname.toUpperCase().lastIndexOf(".XML")==-1) return false;String suffix = fname.substring(fname.lastIndexOf("."), fname.length());return suffix.equalsIgnoreCase(".XML");}});for(File fc : fs){handlerFile(fc);}}return true;} catch (Exception e) {e.printStackTrace();return false;}}/*** @param args*/public static void main(String[] args) {if(args==null || args.length==0){System.out.println("请输入要进行处理的文件路径...");return ; }String fname = args[0];UnicodeClear ucClear = new UnicodeClear();ucClear.scanning(fname);System.out.println("结束");}}