Java实现GBK编码文件繁简体转换
最近喜欢上高品质音乐了,但是cue很多抓下来是繁体中文,看上去略不爽。大陆的windows都是用的GBK编码,准备拿来转码一下。分三步。
第一步,下载GBK中文字库。放到wps里,然后进行繁简体转换,得到繁简对应。放到txt文件中,使用ANSI编码。
第二步,对字库进行预处理。包括两部分:去重,排序。
去重指把繁简体相同的字符去掉。
代码:
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;/* * @author GT * change all tradition Chinese in the file and store the simplified version in simple+filename * all encoded by GBK * 2013.1.17 * */public class Main {static String traditional = null;static String simple = null;/** * @param args * @throws IOException */public static void main(String[] args) throws IOException {// TODO Auto-generated method stubif (args.length != 1) {System.err.println("not enough files");return;} else {// initialinitial();//BufferedReader br = new BufferedReader(new FileReader(args[0]));BufferedWriter bw = new BufferedWriter(new FileWriter("simple"+ args[0]));String line = null;while ((line = br.readLine()) != null) {simplify(line, bw);bw.newLine();}bw.flush();br.close();bw.close();}}private static void initial() throws IOException {BufferedReader br = new BufferedReader(new FileReader("traditional.txt"));traditional = br.readLine();br.close();br = new BufferedReader(new FileReader("simple.txt"));simple = br.readLine();br.close();// for (int i = 0; i < 100; ++i) {// System.out.printf("%d ", (int) tradition.charAt(i));// }}private static void simplify(String line, BufferedWriter bw)throws IOException {// TODO Auto-generated method stubint index = -1;for (int i = 0; i < line.length(); ++i) {if ((index = find(line.charAt(i))) != -1) {bw.append(simple.charAt(index));} else {bw.append(line.charAt(i));}}}/* * binary search 2013.1.18 */private static int find(char ch) {// TODO Auto-generated method stubint low, high, mid, res;low = 0;high = traditional.length();res = -1;while (low <= high) {mid = (low + high) / 2;if (traditional.charAt(mid) == ch) {res = mid;break;} else {if ((int) traditional.charAt(mid) < (int) ch) {low = mid + 1;} else {high = mid - 1;}}}return res;// return tradition.indexOf(ch);}}