100G的大文件中找出100个最大的数
题目:有一个100G大小的文件里存的全是数字,并且每个数字见用逗号隔开。现在在这一大堆数字中找出100个最大的数出来。
程序:
?
public class Pick100 { //TreeSet排序效率最高 private TreeSet<Double> treeSet = null; public Pick100(){ treeSet = new TreeSet<Double>(); } /** * 读取文件并放到集合treeSet中 * @param fileName */ public void doPick(String fileName){ File file = new File(fileName); InputStream is = null; //一次读1024个字节 byte[] b = new byte[1024]; try { is = new BufferedInputStream(new FileInputStream(file)); //每次取一定长度字节 while(is.read(b) > 0){ //转换为字符串 String str = new String(b); //用逗号拆分成数组 String[] numArr = str.split(","); //为了防止一个数字被截断而不完整,先保留最后一个数字 String lastNum = ""; //把数组中的数值放到集合中 for(int i=0;i<numArr.length;i++){ String numStr = numArr[i]; Double num = new Double(numStr); if(i == 0){ if(lastNum != null){ numStr = lastNum+numStr; } }else if(i == numArr.length-1){ lastNum = numStr; }else{ treeSet.add(num); } if(treeSet.size()>100){ treeSet.remove(treeSet.first()); } } } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 打印treeSet中的数据 * */ public void print(){ Iterator<Double> it = treeSet.iterator(); while(it.hasNext()){ Double db = it.next(); System.out.println(db); } } public static void main(String[] args){ Pick100 pick = new Pick100(); pick.doPick("numbers.txt"); pick.print(); }}
?