题目
Leetcode - 10. 正则表达式匹配
解题思路
- 预处理正则表达式,合并同项,比如: "a * b * c * . * " -> " . * "
- 更加预处理后的正则表达式,构建NFA
- 通过NFA状态转移条件一个一个匹配字符串字符
- 不匹配的状态要回退
- 匹配完字符串了,且该状态是可结束的,就true,否则false
c * b * a * 这种正则,构造NFA时,标记a是可结束,但是其实c, b也可结束
a * b * c 这种正则,构造NFA时,a和b都可以是c的父节点
class Solution {public boolean isMatch(String s, String p) {if (p.equals(".*")) {return true;}// pretreatment patternp = pretreatment(p);if (p.equals(".*")) {return true;}// constructNFAState root = constructNFA(p);// match sSet<State> states = matchStates(root, s.charAt(0));for (State state : states) {boolean flag = doMatch(s, 0, state);if (flag) {return true; // it has any one matched}}return false;}private static class State {// this state's parentsSet<State> parents = new HashSet<>();// <condition, states> : the table denotes map to next state by conditionMap<Character, Set<State>> nextStates = new HashMap<>();// this state is last stateboolean isEnd = false;}private String pretreatment(String p) {// pretreatment pattern// merge follow pattern// a* b* ... c*.* -> .*// a* a* ... a*a* -> a*// .* a* ... .*b* -> .*char[] preP = new char[p.length()];int prePIndex = -1;for (int i = 0; i < p.length(); i++) {char ch = p.charAt(i);preP[++prePIndex] = ch;if (ch != '*') {continue;}// .*if (p.charAt(i - 1) == '.') { // .* a* ... .*b* -> .*while (i + 2 < p.length() && p.charAt(i + 2) == '*') {i += 2;}continue;}// a*int j = i;boolean flag = false;while (j + 2 < p.length() && p.charAt(j + 2) == '*') {if (p.charAt(j + 1) == '.') {flag = true;break;}j += 2;}if (flag) { // a* b* ... c*.* -> .*preP[prePIndex - 1] = '.';i = j + 2;while (i + 2 < p.length() && p.charAt(i + 2) == '*') {i += 2;}continue;}// a* a* ... a*a* -> a*while (i + 2 < p.length() && p.charAt(i + 2) == '*') {if (p.charAt(i + 1) != preP[prePIndex - 1]) {break;}i += 2;}}p = new String(preP, 0, prePIndex + 1);return p;}private State constructNFA(String p) {State newState = new State();State root = newState;for (int i = 0; i < p.length(); i++) {char ch = p.charAt(i);Set<State> states;if (ch != '*') {states = root.nextStates.computeIfAbsent(ch, k -> new HashSet<>());State state = new State();states.add(state);if (i >= 1 && p.charAt(i - 1) == '*') {repeatedParentAddState(root, ch, state);}state.parents.add(root);root = state;if (i == p.length() - 1) {root.isEnd = true;}continue;}// x* -> get xch = p.charAt(i - 1);states = root.nextStates.computeIfAbsent(ch, k -> new HashSet<>());states.add(root); // x+ : contain one and more oneroot.parents.add(root); // x{0} : no contain// before two steps achieve x*if (i == p.length() - 1) {root.isEnd = true;repeatedParentMarkEnd(root);}}return newState;}public boolean doMatch(String s, int cur, State state) {if (cur + 1 == s.length()) {return state.isEnd;}Set<State> states = matchStates(state, s.charAt(cur + 1));for (State state1 : states) {boolean flag = doMatch(s, cur + 1, state1);if (flag) {return true;}}return false;}public Set<State> matchStates(State root, char ch) {// get states by conditionSet<State> states = root.nextStates.get(ch);if (states == null) {states = new HashSet<>();}// get states by especial condition of '.', because it can match any charSet<State> states1 = root.nextStates.get('.');if (states1 != null) {states.addAll(states1);}return states;}private void repeatedParentMarkEnd(State root) {if (!root.parents.contains(root)) {return;}for (State parent : root.parents) {if (parent == root) {continue;}parent.isEnd = true;repeatedParentMarkEnd(parent);}}private void repeatedParentAddState(State root, char ch, State state) {if (!root.parents.contains(root)) {return;}for (State rootParent : root.parents) {Set<State> states = rootParent.nextStates.get(ch);if (states != null) {if (states.contains(state)) {continue;}states.add(state);}else {states = new HashSet<>();states.add(state);rootParent.nextStates.put(ch, states);}repeatedParentAddState(rootParent, ch, state);}}}
优化
无