实现一算法:两个字符串 连续字符相同
有两个字符串,a和b, 实现一个算法,找出a,b中 连续相同的字符最多的那个字符串
如:String a = "abcdefghij"; String b="234efnmnghilsbwahah"; 那么应该是:ghi
先说下算法如何实现,效率比较高一些,最好能有实现的代码,但最重要的是算法实现原理 谢谢大家~!~
[解决办法]
- Java code
package com.train.first;import java.lang.reflect.Method;import java.util.ArrayList;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;public class Test{ public static void main(String[] args) throws Exception { String str1 = "abcdefghij"; String str2 = "234efabcdenmnghilsbwahah"; String pstr = str1.length() < str2.length() ? str1 : str2; String mstr = str1.length() < str2.length() ? str2 : str1; int i = 0; List<String> regex = getRegex(pstr, i); out: while (regex != null) { for (int k = 0; k < regex.size(); k++) { Pattern p = Pattern.compile(regex.get(k)); Matcher m = p.matcher(mstr); if (m.find()) { System.out.println(regex.get(k)); break out; } } regex = getRegex(pstr, ++i); } } private static List<String> getRegex(String str, int i) { if (i >= str.length()) { return null; } List<String> result = new ArrayList<String>(); result.add(str.substring(i)); result.add(str.substring(0, str.length() - i)); for (int k = i - 1; k > 0; k--) { result.add(str.substring(k, str.length() - (i - k))); } return result; }}
[解决办法]
用KMP实现了下,对于楼主的输入比用正则能快到一个数量级。
如果字符最多的字符串有多个的话,只找第一个。
不过没怎么测试,先睡觉了。
- Java code
public static void find(String string1, String string2) { String string; String pattern; if (string1.length() > string2.length()) { string = string1; pattern = string2; } else { string = string2; pattern = string1; } String find = null; int maxLength = -1; for (int i = 0; i < pattern.length(); i++) { int[] result = KMPMatch(string, pattern.substring(i)); if (result[0] != -1 && result[1] > maxLength) { find = string.substring(result[0], result[0] + result[1]); maxLength = result[1]; } } System.out.println(find); } public static int[] KMPMatch(String string, String pattern) { if (string == null || pattern == null || pattern.length() == 0) return new int[]{-1, -1}; int maxIndex = -1; int maxLength = -1; int n = string.length(); int m = pattern.length(); int[] prefixs = new int[m]; computePrefix(pattern, prefixs); for (int i = 0, q = 0; i < n; i++) { while(q > 0 && pattern.charAt(q) != string.charAt(i)) q = prefixs[q-1]; if (pattern.charAt(q) == string.charAt(i)) { q++; if (q > maxLength) { maxLength = q; maxIndex = i - q +1; } } if (q == m) { maxLength = q; maxIndex = i - q + 1; break; } } return new int[]{maxIndex, maxLength}; } public static void computePrefix(String pattern, int[] prefixs) { prefixs[0] = 0; for (int i = 1, k = 0, length = pattern.length(); i < length; i++) { while(k > 0 && pattern.charAt(k) != pattern.charAt(i)) { k = prefixs[k]; } if (pattern.charAt(k) == pattern.charAt(i)) k++; prefixs[i] = k; } }
[解决办法]
- Java code
public class Test { public static void main(String[] args) { String a = "abcdefghij"; String b = "234efnmnghilsbwahah"; System.out.println(getSameString(a, b)); } private static String getSameString(String s1, String s2) { int startPos = 0; //始终指向重复字符串的起始位置 int endPos = 0; //要截取字符串的末尾 String str1 = null; //始终存储当前重复字符最多的那个字符串 String str2 = null; //存储了当前重复的字符串 int index = 0; //遍历字符串的指针// int j = 0; //记录每次出现重复字符的起始位置 for(int i=0; i<s1.length(); i++) { char c = s1.charAt(i); while((index=s2.indexOf(c,index)) != -1) { startPos = index; endPos = startPos + 1; int m = i + 1; //m代表了前一个重复字符串的后一个位置 while(index < s2.length()-1 && m < s1.length() && s1.charAt(m) == s2.charAt(index+1)) { endPos ++; //如果有字符相等,则endPos后移一位 m++; //相应的字符串s1也后移下 index++; //游标后移 } index++; str2 = s2.substring(startPos, endPos); if(str1 == null || str1.length() < str2.length()) str1 = str2; } } return str1; }}