读书人

java 正则表达式的应用及代码重构(优

发布时间: 2012-12-23 11:28:15 作者: rapoo

java 正则表达式的使用及代码重构(优雅代码实现)案列

java 正则表达式的使用及代码重构(优雅代码实现)案列

实现功能 将?the first item is a hamer,with a cost of $132.99. 分解成单个的单词,数字,标点。

/**The following program ?It tokenizes a string into its

?* ?textual components: words, numbers,

?* or punctuation. Although it is a simple example, it illustrates

?* the basic techniques used to tokenize any type of input.

?*/

?

?

import java.util.regex.Matcher;

import java.util.regex.Pattern;

?

/**

?* @author chenhao

?*

?*/

/**

?* @author chenhao

?*

?*/

public final class SimpleTextTokenizer {

?? ?/**

?? ? *

?? ? */

?? ?private SimpleTextTokenizer() { }

?? ? /**

?? ? ?*"\G"表示前一匹配的结尾 , "/z"表示字符串的结尾(包括)行终止符 end 用来匹配字符串的结尾.

?? ? ?*/

?? ? private static Pattern end = Pattern.compile("//G//z");

?

?? ? /**

?? ? * "\g\w+" 表示从前一批匹配的结尾开始匹配一个或多个可以成为单词的的.

?? ? ?* 一部分的字符. 如A - Z a-z 0 - 9 及下划线 (单词字符)

?? ? ?*/

?? ? private static Pattern wrod = Pattern.compile("\\G\\w+");

?

?? ? /**

?? ? ?* . "/p{Punct}" 包含所有的标点符号

*/

private static Pattern punct = Pattern.compile("\\G\\p{Punct}");

/**

*"\s" 匹配空格.

*/

private static Pattern space = Pattern.compile("\\G\\s");

?

/**

* . 匹配数字

*/

private static Pattern number = Pattern.compile("\\G\\d+\\.?\\d*");

?

/**

* the method returns the next token

* retrieved from the Matcher passed to.

* @param mat

* ? ? ? ? ? ?the Matcher of the text

* @return the next token of the text

*/

static String getTextToken(final Matcher mat) {

// First skip leading spaces

mat.usePattern(space);

mat.find();

// Next, obtain the next token in the string

// by attempting to match each pattern.

// The token found by the first matching pattern

// is returned. The order in which the patterns

// are tried matters. Checking for a word

// before checking for a number can change the results.

// First check for a number

mat.usePattern(number);

if (mat.find()) {

return mat.group();

}

// if not a number check for word

mat.usePattern(wrod);

if (mat.find()) {

return mat.group();

}

?

// check for punction

mat.usePattern(punct);

if (mat.find()) {

return mat.group();

}

?

// finall check for end of string

mat.usePattern(end);

if (mat.find()) {

return "";

}

?

// token is not recognized

return null; // invalid token

}

?

/**

* @param args

*/

public static void main(final String[] args) {

String token;

// Create a matcher

Matcher mat = end.matcher("the first item is a hamer,"

+ "with a cost of $132.99");

// display the tokens in the string

do {

token = getTextToken(mat);

if (token == null) {

System.out.println("invalid token");

break;

}

if (token.length() != 0) {

System.out.println("Token " + token);

} else {

System.out.println("End of String");

}

} while (token.length() != 0);

}

}

?

?

代码要写得优雅,但是上面这段代码很明显是不优雅的

1注释太多;

2方法不可重用

3有丑陋的条件判断句。

4可扩展性不强

?

改进方法

1利用好的命名来代替注释

2使用单一功能的方法

3利用多态,及好的设计模式来解决条件判断

综上可以写出很优雅的代码,可读性与可扩展性,可维护性都会大大提高

?

下面是代码实现

SimpleTextTokenizer.java

?

import java.util.LinkedList;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

?

/**The following program ?It tokenizes a string into its

?* ?textual components: words, numbers,

?* or punctuation. Although it is a simple example, it illustrates

?* the basic techniques used to tokenize any type of input.

?*/

?

?

/**

?* @author chenhao

?*

?*/

public final class SimpleTextTokenizer {

?

?? ?/**

?? ? *"\G"表示前一匹配的结尾 , "/z"表示字符串的结尾(包括)行终止符 end 用来匹配字符串的结尾.

?? ? */

?? ?private static Pattern end = Pattern.compile("\\G\\z");

?

?? ?/**

?? ? * "\g\w+" 表示从前一批匹配的结尾开始匹配一个或多个可以成为单词的的. 一部分的字符. 如A - Z a-z 0 - 9 及下划线

?? ? * (单词字符)

?? ? */

?? ?private static Pattern word = Pattern.compile("\\G\\w+");

?

?? ?/**

?? ? * . "/p{Punct}" 包含所有的标点符号

?? ? */

?? ?private static Pattern punct = Pattern.compile("\\G\\p{Punct}");

?? ?/**

?? ? *"\s" 匹配空格.

?? ? */

?? ?private static Pattern space = Pattern.compile("\\G\\s");

?

?? ?/**

?? ? * . 匹配数字

?? ? */

?? ?private static Pattern number = Pattern.compile("\\G\\d+\\.?\\d*");

?

?? ?private SimpleTextTokenizer() { }

?? ?class InvalidToken implements TokenPrinter {

?? ? ? ?/*

?? ? ? ? * (non-Javadoc)

?? ? ? ? * @see TokenPrinter#print()

?? ? ? ? */

?? ? ? ?public int print() {

?? ? ? ? ? ?System.out.println("invalid token");

?? ? ? ? ? ?return 0;

?? ? ? ?}

?? ?}

?

?? ?class EndToken implements TokenPrinter {

?

?? ? ? ?/* (non-Javadoc)

?? ? ? ? * @see TokenPrinter#print()

?? ? ? ? */

?? ? ? ?public int print() {

?? ? ? ? ? ?System.out.println("End of String");

?? ? ? ? ? ?return 0;

?? ? ? ?}

?

?? ?}

?

?? ?class NormalToken implements TokenPrinter {

?? ? ? ?private String token;

?

?? ? ? ?public NormalToken(final String token) {

?? ? ? ? ? ?this.token = token;

?? ? ? ?}

?

?? ? ? ?/*

?? ? ? ? * (non-Javadoc)

?? ? ? ? * @see TokenPrinter#print()

?? ? ? ? */

?? ? ? ?public int print() {

?? ? ? ? ? ?System.out.println("Token " + token);

?? ? ? ? ? ?return 1;

?? ? ? ?}

?

?? ?}

?

?? ?/**

?? ? * the method returns the next token retrieved from the Matcher passed to.

?? ? * @param mat

?? ? * ? ? ? ? ? ?the Matcher of the text

?? ? * @return the next token of the text

?? ? */

?? ?static TokenPrinter getNextTokenMtchPattern(final Matcher mat) {

?? ? ? ?skipLeadingSpaces(mat);

?? ? ? ?return nextTokenMtchPattern(mat);

?? ?}

?

?? ?/**

?? ? * @param mat

?? ? * ? ? ? ? ? ?the matcher of a string

?? ? * @return the next token of the string

?? ? */

?? ?private static TokenPrinter nextTokenMtchPattern(final Matcher mat) {

?? ? ? ?// Next, obtain the next token in the string

?? ? ? ?// by attempting to match each pattern.

?? ? ? ?// The token found by the first matching pattern

?? ? ? ?// is returned. The order in which the patterns

?? ? ? ?// are tried matters. Checking for a word

?? ? ? ?// before checking for a number can change the results.

?? ? ? ?// First check for a number

?? ? ? ?LinkedList<Pattern> patternList = createPatternList();

?? ? ? ?for (Pattern pat : patternList) {

?? ? ? ? ? ?mat.usePattern(pat);

?? ? ? ? ? ?if (mat.find()) {

?? ? ? ? ? ? ? ?return ?createTokenPrinter(mat.group());

?? ? ? ? ? ?}

?? ? ? ?}

?? ? ? ?return createTokenPrinter(null); // the patternList is null;

?? ?}

?

?? ?/**

?? ? * @return a linkedList of Pattern;

?? ? */

?? ?private static LinkedList<Pattern> createPatternList() {

?? ? ? ?LinkedList<Pattern> patternList = new LinkedList<Pattern>();

?

?? ? ? ?patternList.add(number);

?? ? ? ?patternList.add(word);

?? ? ? ?patternList.add(punct);

?? ? ? ?patternList.add(end);

?? ? ? ?return patternList;

?? ?}

?

?? ?/**

?? ? * create a TokenPrinter use the token match the Pattern.

?? ? * @param token the token match the Pattern

?? ? * @return tokenPrinter

?? ? */

?? ?private static TokenPrinter createTokenPrinter(final String token) {

?? ? ? ?SimpleTextTokenizer textTokenizer = new SimpleTextTokenizer();

?? ? ? ?if (token == null) {

?? ? ? ? ? ?return textTokenizer.new InvalidToken();

?? ? ? ?}

?? ? ? ?if (token.length() == 0) {

?? ? ? ? ? ?return textTokenizer.new EndToken();

?? ? ? ?} else {

?? ? ? ? ? ?return textTokenizer.new NormalToken(token);

?? ? ? ?}

?

?? ?}

?? ?/**

?? ? * @param mat

?? ? * ? ? ? ? ? ?the matcher of a string. we won't let the mat point to anther

?? ? * ? ? ? ? ? ?object so it is final

?? ? */

?? ?private ?static void displayTokensInString(final Matcher mat) {

?? ? ? ?TokenPrinter tokenPtinter;

?? ? ? ?do {

?? ? ? ? ? ?tokenPtinter = getNextTokenMtchPattern(mat);

?? ? ? ?} while (tokenPtinter.print() != 0);

?? ?}

?? ?/**

?? ? * @param mat

?? ? * ? ? ? ? ? ?the matcher

?? ? */

?? ?private static void skipLeadingSpaces(final Matcher mat) {

?? ? ? ?mat.usePattern(space);

?? ? ? ?mat.find();

?? ?}

?

?? ?/**

?? ? * @param args

?? ? * ?does nothing in this programs

?? ? */

?? ?public static void main(final String[] args) {

?? ? ? ?// Create a matcher

?? ? ? ?Matcher mat = end.matcher("the first item is a hamer,"

?? ? ? ? ? ? ? ?+ "with a cost of $132.99.");

?? ? ? ?displayTokensInString(mat);

?? ?}

}

?

TokenPrinter.java

/**

?* @author chenhao

?* print the token itself.

?*/

interface TokenPrinter {

?? ?/**

?? ? * print the token itself.

?? ? * @return the type of the token

?? ? */

?? ?int print();

}

?

读书人网 >编程

热点推荐