读书人

JAVA I/O判断读入的资料的文件编码

发布时间: 2012-09-17 12:06:51 作者: rapoo

JAVA I/O判断读入的文件的文件编码

package encoding;import java.io.BufferedInputStream;import java.io.File;import java.io.FileInputStream;public class Encoding {public static void main(String[] args) {String charset = getCharset(new File("C://login.txt"));System.out.println(charset);}public static String getCharset(File file) {String charset = "GBK";byte[] first3Bytes = new byte[3];try {boolean checked = false;BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));bis.mark(0);int read = bis.read(first3Bytes, 0, 3);if (read == -1)return charset;if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {charset = "UTF-16LE";checked = true;} else if (first3Bytes[0] == (byte) 0xFE&& first3Bytes[1] == (byte) 0xFF) {charset = "UTF-16BE";checked = true;} else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB && first3Bytes[2] == (byte) 0xBF) {charset = "UTF-8";checked = true;}bis.reset();if (!checked) {int loc = 0;while ((read = bis.read()) != -1) {loc++;if (read >= 0xF0)break;if (0x80 <= read && read <= 0xBF)break;if (0xC0 <= read && read <= 0xDF) {read = bis.read();if (0x80 <= read && read <= 0xBF)continue;elsebreak;} else if (0xE0 <= read && read <= 0xEF) {read = bis.read();if (0x80 <= read && read <= 0xBF) {read = bis.read();if (0x80 <= read && read <= 0xBF) {charset = "UTF-8";break;} elsebreak;} elsebreak;}}System.out.println(loc + " " + Integer.toHexString(read));}bis.close();} catch (Exception e) {e.printStackTrace();}return charset;}}

读书人网 >编程

热点推荐