《编程珠玑》第一章Java代码实现
我看算法的时候只看伪代码的话,感觉太抽象,总记不住,我希望能看到真正实现的代码,这样心里会感觉踏实一点。
因此,总希望能把书上的算法给具体实现了,这样一来能记住算法,加深印象,而来二来也能提高自己的编程水平。
《编程珠玑》的代码已经有C/C++的实现版本了,我在附件也贴出来了,但是,作为一个C++语言的入门者,因为还涉及到不知道如何链接C++的函数库等种种问题,一直都没法编译运行,非常郁闷。于是,产生了一个想法,用我比较擅长的Java语言把书上的算法实现。以后分几章贴出我的代码,希望牛人点评指正。
第一章《开篇》, 很经典,特别是位图法经典的经典。
它就提出了这样一个问题,如何对一个磁盘文件进行排序。这个文件之多包含10000000(7个0)条记录,每条记录是一个不超过7位的整数。
该问题还有一个限制的条件,只有1MB的可用主存,磁盘空间充足。在这里我采用java的虚拟机变量来模拟,这里采用-Xms2M(初始的内存数) -Xmx2M(最大的内存数)来限制。
再解这个问题之前,还需要解决一个问题,那就需要一个输入文件,包含了10000000条记录,而且最好是每条记录都不同。
解决这个问题的办法再编程珠玑第12章给出来了。以下贴出代码。
/** * 随机产生1-n的m个数 * * */public class RandomGenerate { public static void main(String[] args) { if (args.length == 0) { System.out.println("useage: java com.ch12.RandomGenerate 70000000 10000000 "); //f1(20, 100); //f0(m, n); return; } int m = Integer.parseInt(args[0]); int n = Integer.parseInt(args[1]); System.out.println("m:" + m + ",n:" + n); long l1 = System.currentTimeMillis(); try { PrintWriter pw = new PrintWriter("d:/input.txt"); f1(m, n, pw); pw.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } long l2 = System.currentTimeMillis(); System.out.println("time:" + (l2 - l1)); } static int randInt(int i, int j, Random rand) { if (i < j) return i + rand.nextInt(j - i + 1); else if (i > j) return j + rand.nextInt(i - j + 1); return i; } static void f0(int m, int n) { int select = m; int remaining = n; Random rand = new Random(System.currentTimeMillis()); for (int i = 0; i < n; i++) { if (rand.nextInt(remaining) < select) { System.out.println(m - select + 1 + ":" + i); select--; }remaining--; } } static void f1(int m, int n) { int[] array = new int[n]; Random rand = new Random(System.currentTimeMillis()); for (int i = 0; i < n; i++) array[i] = i + 1; for (int i = 0; i < m; i++) { int j = randInt(i, n - 1, rand); int temp = array[j]; array[j] = array[i]; array[i] = temp; } for (int i = 0; i < m; i++) { System.out.println(i + 1 + ":" + array[i]); } } static void f1(int m, int n, PrintWriter pw) { int[] array = new int[n]; Random rand = new Random(System.currentTimeMillis()); for (int i = 0; i < n; i++) array[i] = i + 1; for (int i = 0; i < m; i++) { int j = randInt(i, n - 1, rand); int temp = array[j]; array[j] = array[i]; array[i] = temp; } for (int i = 0; i < m; i++) { pw.println(array[i]); } pw.flush(); } } 方法1 位图法 主要是使用Java中的BitSet类来实现,我看了BitSet的源码,它内部也是用数组来实现的。
/** * 采用位图法 * */public class BitSort1 { /** * @param args */ public static void main(String[] args) { if (args.length < 1) { System.out.println("usage: java -Xms2M -Xmx2M com.ch1.BitSort1 d:\\input.txt d:\\output.txt"); return; } try { BitSort1 client = new BitSort1(); BufferedReader br = new BufferedReader(new FileReader(args[0])); PrintWriter pw = new PrintWriter(args[1]); long l1 = System.currentTimeMillis(); client.input(br); client.output(pw);long l2 = System.currentTimeMillis(); System.out.println("time:" + (l2 - l1)); } catch (FileNotFoundException e) { e.printStackTrace(); } } BitSet bs = new BitSet(10 ^ 7); void input(BufferedReader br) { String s = ""; try { while ((s = br.readLine()) != null) { int in = Integer.parseInt(s); bs.set(in); } } catch (IOException e) { e.printStackTrace(); } } void output(PrintWriter pw) { int size = bs.size(); for (int i = 0; i < size; i++) { if (bs.get(i)) { pw.println(i); } } pw.flush(); } } 方法2 硬盘排序法 首先将数据分成n份,如1-249999是第一份,250000到499999是第二份,以此类推,然后对每一份文件用系统排序,然后把它输出。
/** * 采用硬盘排序 * */public class BitSort2 { /** * @param args */ public static void main(String[] args) { if (args.length < 1) { System.out.println("usage: java -Xms2M -Xmx2M com.ch1.BitSort3 d:\\input.txt d:\\output.txt"); return; } BitSort2 client = new BitSort2(); long l1 = System.currentTimeMillis(); client.fun(args[0], args[1]); long l2 = System.currentTimeMillis(); System.out.println("time:" + (l2 - l1)); } public final static int SIZE = 10000000; //总大小 public final static int TIMES = 40; //次数 public final static int UNIT = SIZE / TIMES; void fun(String inputName, String outputName) { try { PrintWriter pw = new PrintWriter(outputName); for (int index = 0; index < TIMES; index++) { BufferedReader br = new BufferedReader(new FileReader(inputName)); int low = UNIT * index, high = low + UNIT; int[] arr = new int[UNIT]; int counter = 0; String s = ""; while ((s = br.readLine()) != null) { int in = Integer.parseInt(s); if (in > low && in <= high) { arr[counter++] = in; } } arr = Arrays.copyOf(arr, counter); Arrays.sort(arr); int size = arr.length; for (int i = 0; i < size; i++) { pw.println(arr[i]); } pw.flush(); br.close(); } pw.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } 方法3 基于磁盘的多路归并排序 首先把大文件平均分成n份,然后每一份都排好序,然后建一个n个数的堆,每次从堆中取出最小的数,取完之后从取走的那个数的那个文件取下一个数,如果这个文件没有数了,则取它的下一个文件的数,直到所有的文件都取完时,再把堆中的数都输出。
/** * 采用外部排序 基于磁盘的多路归并排序 */public class BitSort4 { /** * @param args */ public static void main(String[] args) { if (args.length < 1) { System.out.println("usage: java -Xms2M -Xmx2M com.ch1.BitSort4 d:\\input.txt d:\\output.txt"); return; } BitSort4 client = new BitSort4(); long l1 = System.currentTimeMillis(); client.fun(args[0], args[1]); long l2 = System.currentTimeMillis(); System.out.println("time:" + (l2 - l1)); } public final static int SIZE = 10000000; //总大小 public final static int TIMES = 80; //次数 public final static int UNIT = SIZE / TIMES; int[] heap = new int[TIMES + 1]; //堆 int[] indexs = new int[TIMES + 1]; //索引 boolean[] isNulls = new boolean[TIMES]; //int[] nums = new int[TIMES]; void fixUp(int k) { int j = 0; while ((j = k >> 1) > 0) { if (heap[k] >= heap[j]) break; int temp = heap[j]; heap[j] = heap[k]; heap[k] = temp; temp = indexs[j]; indexs[j] = indexs[k]; indexs[k] = temp; k = j; } } void fixDown(int k) { int size = TIMES; int j = 0; while ((j = k << 1) <= size && j > 0) { if (j + 1 <= size && heap[j + 1] <= heap[j]) j++; if (heap[k] <= heap[j]) break; int temp = heap[j]; heap[j] = heap[k]; heap[k] = temp; temp = indexs[j]; indexs[j] = indexs[k]; indexs[k] = temp; k = j; } } void fixDown(int k, int size) { int j = 0; while ((j = k << 1) <= size && j > 0) { if (j + 1 <= size && heap[j + 1] <= heap[j]) j++; if (heap[k] <= heap[j]) break; int temp = heap[j]; heap[j] = heap[k]; heap[k] = temp; temp = indexs[j]; indexs[j] = indexs[k]; indexs[k] = temp; k = j; } } void add(int index, int val) { indexs[1] = index; heap[1] = val; fixDown(1); } int getMin() { return heap[1]; } int getWhich() { return indexs[1]; } void fun(String inputName, String outputName) { try { PrintWriter[] pws = new PrintWriter[TIMES]; for (int i = 0; i < TIMES; i++) { pws[i] = new PrintWriter("temp\\temp" + i + ".txt"); } BufferedReader br = new BufferedReader(new FileReader(inputName)); String s = ""; int index = 0; while ((s = br.readLine()) != null) { int in = Integer.parseInt(s); PrintWriter pwt = pws[index % TIMES]; pwt.println(in); pwt.flush(); index++; } br.close(); for (int i = 0; i < TIMES; i++) { pws[i].close(); } for (int i = 0; i < TIMES; i++) { BufferedReader brt = new BufferedReader(new FileReader("temp\\temp" + i + ".txt")); index = 0; int[] arr = new int[UNIT]; while ((s = brt.readLine()) != null) { int in = Integer.parseInt(s); arr[index++] = in; } arr = Arrays.copyOf(arr, index); Arrays.sort(arr); PrintWriter pwt = new PrintWriter("temp\\temptemp" + i + ".txt"); int size = arr.length; for (int j = 0; j < size; j++) { pwt.println(arr[j]); } brt.close(); pwt.close(); new File("temp\\temp" + i + ".txt").delete(); new File("temp\\temptemp" + i + ".txt").renameTo(new File("temp\\temp" + i + ".txt")); } //System.out.println("--------------------------"); PrintWriter pw = new PrintWriter(outputName); //建堆BufferedReader[] brs = new BufferedReader[TIMES]; for (int i = 0; i < TIMES; i++) { brs[i] = new BufferedReader(new FileReader("temp\\temp" + i + ".txt")); int in = Integer.parseInt(brs[i].readLine()); heap[i + 1] = in; indexs[i + 1] = i; } for (int i = 2; i <= TIMES; i++) { fixUp(i); } String st = ""; boolean flag = false; while (true) { int in = getMin(); pw.println(in); int which = getWhich(); int orgin = which; st = null; if (!isNulls[which]) { st = brs[which].readLine(); //nums[which]++; if (st == null) { //System.out.println(which + ":" + nums[which]); isNulls[which] = true; } } while (isNulls[which]) { which++; which = which % TIMES; if (which == orgin) { //System.out.println("orgin:" + orgin); flag = true; break; } if (!isNulls[which]) { st = brs[which].readLine(); //nums[which]++; if (st == null) { //System.out.println(which + ":" + nums[which]); isNulls[which] = true; } } } if (flag) break; int val = Integer.parseInt(st); add(which, val); } //将堆中剩余的元素按顺序输出int size = TIMES; heap[1] = heap[size]; size--;fixDown(1, size); //System.out.println(Arrays.toString(heap)); while (size > 0) { int in = getMin(); pw.println(in); heap[1] = heap[size]; size--;fixDown(1, size); } pw.flush(); pw.close(); //System.out.println(Arrays.toString(nums)); //System.out.println(Arrays.toString(isNulls)); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }