java解析csv文件
最近项目中,需要处理大量的CSV文件,需要将这些文件转换成txt格式,并以|进行分隔。最郁闷的是:这些文件数据格式好多都是不规范的。
由于项目使用的是JDK1.4,String的新特性没法用,写了个StringUtil类
今天又遇到新的问题,无语,重新修改了下程序
此代码可以解决字段中出现分隔符,双引号等等。。。
/** * 对于文件中字段包含逗号的文件的特殊处理 (同时可以去除掉双引号)处理完以后会在相同的路径下输出相同文件名的TXT文件 * * @throws Exception */public static void specialChar(String filePath,int starRow) throws Exception {BufferedReader br = null;File f = new File(filePath);String fileName = f.getName();if (!fileName.substring(fileName.indexOf(".") + 1).equals("csv")) {throw new Exception(filePath + "不是一个CSV文件");}File file = new File(StringUtil.replace(f.getPath(), "csv", "txt"));FileWriter filewriter = null;try {br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "utf-8"));filewriter = new FileWriter(file, false);SimpleDateFormat sd = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");System.out.println(sd.format(new Date()));String tempString = null;int i = 0;while ((tempString = br.readLine()) != null) {if (i < starRow-1) {i++;continue;}if(tempString.trim().equals(""))break;if (StringUtil.contains(tempString, "\"")) {tempString = deepParser(tempString,filePath);} elsetempString = StringUtil.replace(tempString, ",", "|");//System.out.println(tempString);filewriter.write(stringTrim(tempString, "\\|") + "\r\n");i++;}System.out.println(sd.format(new Date()));} catch (Throwable e) {log.warn("解析文件:【" + filePath + "】出错", e);e.printStackTrace();} finally {try {br.close();filewriter.close();} catch (IOException e) {e.printStackTrace();}}}public static String deepParser(String str,String filePath) {System.out.println(str);String temp = str; str = str+",";StringBuffer sb = new StringBuffer();try {int from = 0;int end = str.length();int i = 0;while (StringUtil.contains((str = str.substring(from)), "\"")) {from = str.indexOf("\"");end = str.indexOf("\"", from + 1);sb.append(StringUtil.replace(str.substring(0, from), ",", "|"));sb.append(str.substring(from + 1, end));from = end + 1;i++;}sb.append(StringUtil.replace(str, ",", "|"));} catch (Throwable e) {log.warn("解析文件:【" + filePath + "】出错,一下数据有问题:"+temp, e);e.printStackTrace();} String s = sb.toString(); s = s.substring(0, s.lastIndexOf("|")); return s;}//去除字段2边空格,可以指定分隔符public static String stringTrim(String str, String regex) {str = str+" ";String[] strs = str.split(regex);StringBuffer sb = new StringBuffer();for (int i = 0; i < strs.length; i++) {sb.append(strs[i].trim() + "|");}return sb.toString().substring(0, sb.toString().lastIndexOf("|"));}