You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

259 lines
8.2 KiB

2 years ago
package class05;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
public class Code04_DeleteMinCost {
// 题目:
// 给定两个字符串s1和s2问s2最少删除多少字符可以成为s1的子串
// 比如 s1 = "abcde"s2 = "axbc"
// 返回 1
// 解法一
// 求出str2所有的子序列然后按照长度排序长度大的排在前面。
// 然后考察哪个子序列字符串和s1的某个子串相等(KMP),答案就出来了。
// 分析:
// 因为题目原本的样本数据中有特别说明s2的长度很小。所以这么做也没有太大问题也几乎不会超时。
// 但是如果某一次考试给定的s2长度远大于s1这么做就不合适了。
public static int minCost1(String s1, String s2) {
List<String> s2Subs = new ArrayList<>();
process(s2.toCharArray(), 0, "", s2Subs);
s2Subs.sort(new LenComp());
for (String str : s2Subs) {
if (s1.indexOf(str) != -1) { // indexOf底层和KMP算法代价几乎一样也可以用KMP代替
return s2.length() - str.length();
}
}
return s2.length();
}
public static void process(char[] str2, int index, String path, List<String> list) {
if (index == str2.length) {
list.add(path);
return;
}
process(str2, index + 1, path, list);
process(str2, index + 1, path + str2[index], list);
}
// x字符串只通过删除的方式变到y字符串
// 返回至少要删几个字符
// 如果变不成返回Integer.Max
public static int onlyDelete(char[] x, char[] y) {
if (x.length < y.length) {
return Integer.MAX_VALUE;
}
int N = x.length;
int M = y.length;
int[][] dp = new int[N + 1][M + 1];
for (int i = 0; i <= N; i++) {
for (int j = 0; j <= M; j++) {
dp[i][j] = Integer.MAX_VALUE;
}
}
dp[0][0] = 0;
// dp[i][j]表示前缀长度
for (int i = 1; i <= N; i++) {
dp[i][0] = i;
}
for (int xlen = 1; xlen <= N; xlen++) {
for (int ylen = 1; ylen <= Math.min(M, xlen); ylen++) {
if (dp[xlen - 1][ylen] != Integer.MAX_VALUE) {
dp[xlen][ylen] = dp[xlen - 1][ylen] + 1;
}
if (x[xlen - 1] == y[ylen - 1] && dp[xlen - 1][ylen - 1] != Integer.MAX_VALUE) {
dp[xlen][ylen] = Math.min(dp[xlen][ylen], dp[xlen - 1][ylen - 1]);
}
}
}
return dp[N][M];
}
public static class LenComp implements Comparator<String> {
@Override
public int compare(String o1, String o2) {
return o2.length() - o1.length();
}
}
// 解法二
// 生成所有s1的子串
// 然后考察每个子串和s2的编辑距离(假设编辑距离只有删除动作且删除一个字符的代价为1)
// 如果s1的长度较小s2长度较大这个方法比较合适
public static int minCost2(String s1, String s2) {
if (s1.length() == 0 || s2.length() == 0) {
return s2.length();
}
int ans = Integer.MAX_VALUE;
char[] str2 = s2.toCharArray();
for (int start = 0; start < s1.length(); start++) {
for (int end = start + 1; end <= s1.length(); end++) {
// str1[start....end]
// substring -> [ 0,1 )
ans = Math.min(ans, distance(str2, s1.substring(start, end).toCharArray()));
}
}
return ans == Integer.MAX_VALUE ? s2.length() : ans;
}
// 求str2到s1sub的编辑距离
// 假设编辑距离只有删除动作且删除一个字符的代价为1
public static int distance(char[] str2, char[] s1sub) {
int row = str2.length;
int col = s1sub.length;
int[][] dp = new int[row][col];
// dp[i][j]的含义:
// str2[0..i]仅通过删除行为变成s1sub[0..j]的最小代价
// 可能性一:
// str2[0..i]变的过程中,不保留最后一个字符(str2[i])
// 那么就是通过str2[0..i-1]变成s1sub[0..j]之后再最后删掉str2[i]即可 -> dp[i][j] = dp[i-1][j] + 1
// 可能性二:
// str2[0..i]变的过程中,想保留最后一个字符(str2[i])然后变成s1sub[0..j]
// 这要求str2[i] == s1sub[j]才有这种可能, 然后str2[0..i-1]变成s1sub[0..j-1]即可
// 也就是str2[i] == s1sub[j] 的条件下dp[i][j] = dp[i-1][j-1]
dp[0][0] = str2[0] == s1sub[0] ? 0 : Integer.MAX_VALUE;
for (int j = 1; j < col; j++) {
dp[0][j] = Integer.MAX_VALUE;
}
for (int i = 1; i < row; i++) {
dp[i][0] = (dp[i - 1][0] != Integer.MAX_VALUE || str2[i] == s1sub[0]) ? i : Integer.MAX_VALUE;
}
for (int i = 1; i < row; i++) {
for (int j = 1; j < col; j++) {
dp[i][j] = Integer.MAX_VALUE;
if (dp[i - 1][j] != Integer.MAX_VALUE) {
dp[i][j] = dp[i - 1][j] + 1;
}
if (str2[i] == s1sub[j] && dp[i - 1][j - 1] != Integer.MAX_VALUE) {
dp[i][j] = Math.min(dp[i][j], dp[i - 1][j - 1]);
}
}
}
return dp[row - 1][col - 1];
}
// 解法二的优化
public static int minCost3(String s1, String s2) {
if (s1.length() == 0 || s2.length() == 0) {
return s2.length();
}
char[] str2 = s2.toCharArray();
char[] str1 = s1.toCharArray();
int M = str2.length;
int N = str1.length;
int[][] dp = new int[M][N];
int ans = M;
for (int start = 0; start < N; start++) { // 开始的列数
dp[0][start] = str2[0] == str1[start] ? 0 : M;
for (int row = 1; row < M; row++) {
dp[row][start] = (str2[row] == str1[start] || dp[row - 1][start] != M) ? row : M;
}
ans = Math.min(ans, dp[M - 1][start]);
// 以上已经把start列填好
// 以下要把dp[...][start+1....N-1]的信息填好
// start...end end - start +2
for (int end = start + 1; end < N && end - start < M; end++) {
// 0... first-1 行 不用管
int first = end - start;
dp[first][end] = (str2[first] == str1[end] && dp[first - 1][end - 1] == 0) ? 0 : M;
for (int row = first + 1; row < M; row++) {
dp[row][end] = M;
if (dp[row - 1][end] != M) {
dp[row][end] = dp[row - 1][end] + 1;
}
if (dp[row - 1][end - 1] != M && str2[row] == str1[end]) {
dp[row][end] = Math.min(dp[row][end], dp[row - 1][end - 1]);
}
}
ans = Math.min(ans, dp[M - 1][end]);
}
}
return ans;
}
// 来自学生的做法时间复杂度O(N * M平方)
// 复杂度和方法三一样,但是思路截然不同
public static int minCost4(String s1, String s2) {
char[] str1 = s1.toCharArray();
char[] str2 = s2.toCharArray();
HashMap<Character, ArrayList<Integer>> map1 = new HashMap<>();
for (int i = 0; i < str1.length; i++) {
ArrayList<Integer> list = map1.getOrDefault(str1[i], new ArrayList<Integer>());
list.add(i);
map1.put(str1[i], list);
}
int ans = 0;
// 假设删除后的str2必以i位置开头
// 那么查找i位置在str1上一共有几个并对str1上的每个位置开始遍历
// 再次遍历str2一次看存在对应str1中i后续连续子串可容纳的最长长度
for (int i = 0; i < str2.length; i++) {
if (map1.containsKey(str2[i])) {
ArrayList<Integer> keyList = map1.get(str2[i]);
for (int j = 0; j < keyList.size(); j++) {
int cur1 = keyList.get(j) + 1;
int cur2 = i + 1;
int count = 1;
for (int k = cur2; k < str2.length && cur1 < str1.length; k++) {
if (str2[k] == str1[cur1]) {
cur1++;
count++;
}
}
ans = Math.max(ans, count);
}
}
}
return s2.length() - ans;
}
public static String generateRandomString(int l, int v) {
int len = (int) (Math.random() * l);
char[] str = new char[len];
for (int i = 0; i < len; i++) {
str[i] = (char) ('a' + (int) (Math.random() * v));
}
return String.valueOf(str);
}
public static void main(String[] args) {
char[] x = { 'a', 'b', 'c', 'd' };
char[] y = { 'a', 'd' };
System.out.println(onlyDelete(x, y));
int str1Len = 20;
int str2Len = 10;
int v = 5;
int testTime = 10000;
boolean pass = true;
System.out.println("test begin");
for (int i = 0; i < testTime; i++) {
String str1 = generateRandomString(str1Len, v);
String str2 = generateRandomString(str2Len, v);
int ans1 = minCost1(str1, str2);
int ans2 = minCost2(str1, str2);
int ans3 = minCost3(str1, str2);
int ans4 = minCost4(str1, str2);
if (ans1 != ans2 || ans3 != ans4 || ans1 != ans3) {
pass = false;
System.out.println(str1);
System.out.println(str2);
System.out.println(ans1);
System.out.println(ans2);
System.out.println(ans3);
System.out.println(ans4);
break;
}
}
System.out.println("test pass : " + pass);
}
}