all articles

leetcode war 10 - Regular Expression Matching

2016-11-22 @sunderls

leetCode js

https://leetcode.com/problems/regular-expression-matching/

'.' Matches any single character.

'*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

The function prototype should be:

bool isMatch(const char *s, const char *p)

Some examples:

isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true

analysis

well this is about regular expression, seems difficult. The first picture that came to my head is :

  • create 2 cursors for the string & regexp, math them one by one
  • if come across Character Class(.) or Quantifier(*), do some matching & recursively check tht rest

for "aab" & "c*a*b" as an example

isMatch("aab", "c*a*b")

i = 0, j = 0;
since r[0] === 'c' && r[1] === '*'
    isMatch("aab", "a*b")

isMath("aab", "a*b")
i = 0, j = 0;
since r[0] === 'a' && r[1] === '*'
    isMath("aab", "b") or isMath("ab", "b") or isMath("b", "b")
    success

code basic

var check = function(s, p, i, j, lengthS, lengthP){
    // if regexp is done, check whether input string is done
    if (j === lengthP){
        return i === lengthS;
    }

    // default to false
    var result = false;

    // if regexp has quantifier
    if (j + 1 < lengthP && p[j + 1] === '*'){
        // loop to check all the possibilities if first chara matches
        if (s[i] === p[j] || p[j] === '.'){
            for(var k = i; k < lengthS + 1; k++){
                if (result = check(s, p, k, j + 2, lengthS, lengthP) ){
                    break;
                }
                if (s[k] !== p[j] && p[j] !== '.'){
                    break;
                }
            }
        } else {
            result = check(s, p, i, j + 2, lengthS, lengthP);
        }

    // if no quantifier, check next
    } else {
        if (s[i] === p[j] || p[j] === '.'){
            result = check(s, p, i + 1, j + 1, lengthS, lengthP);
        }
    }

    return result;
}

var isMatch = function(s, p) {
    return check(s, p, 0, 0, s.length, p.length);
};

after some modification, above solution passed, with 285ms, ranking last 40% in js submissions. Oh. how can we do better?

improvement 1 - Memoization

for better performance, there should be some check we can omit, let's first print out some logs

var check = function(s, p, i, j, lengthS, lengthP){
    console.log(i, j, s.slice(0,i) + '[' + s.slice(i) + ']', p.slice(0,j) + '[' + p.slice(j) + ']');
    ...

then try on isMatch("abaaaaaa","a.*.*da"), and we can get:

0 0 '[abaaaaaa]' '[a.*.*da]'
1 1 'a[baaaaaa]' 'a[.*.*da]'
1 3 'a[baaaaaa]' 'a.*[.*da]'
1 5 'a[baaaaaa]' 'a.*.*[da]'
2 5 'ab[aaaaaa]' 'a.*.*[da]'
3 5 'aba[aaaaa]' 'a.*.*[da]'
4 5 'abaa[aaaa]' 'a.*.*[da]'
5 5 'abaaa[aaa]' 'a.*.*[da]'
6 5 'abaaaa[aa]' 'a.*.*[da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
2 3 'ab[aaaaaa]' 'a.*[.*da]'
2 5 'ab[aaaaaa]' 'a.*.*[da]'
3 5 'aba[aaaaa]' 'a.*.*[da]'
4 5 'abaa[aaaa]' 'a.*.*[da]'
5 5 'abaaa[aaa]' 'a.*.*[da]'
6 5 'abaaaa[aa]' 'a.*.*[da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
3 3 'aba[aaaaa]' 'a.*[.*da]'
3 5 'aba[aaaaa]' 'a.*.*[da]'
4 5 'abaa[aaaa]' 'a.*.*[da]'
5 5 'abaaa[aaa]' 'a.*.*[da]'
6 5 'abaaaa[aa]' 'a.*.*[da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
4 3 'abaa[aaaa]' 'a.*[.*da]'
4 5 'abaa[aaaa]' 'a.*.*[da]'
5 5 'abaaa[aaa]' 'a.*.*[da]'
6 5 'abaaaa[aa]' 'a.*.*[da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
5 3 'abaaa[aaa]' 'a.*[.*da]'
5 5 'abaaa[aaa]' 'a.*.*[da]'
6 5 'abaaaa[aa]' 'a.*.*[da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
6 3 'abaaaa[aa]' 'a.*[.*da]'
6 5 'abaaaa[aa]' 'a.*.*[da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
7 3 'abaaaaa[a]' 'a.*[.*da]'
7 5 'abaaaaa[a]' 'a.*.*[da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'
8 3 'abaaaaaa[]' 'a.*[.*da]'
8 5 'abaaaaaa[]' 'a.*.*[da]'

oh, 5 5 6 5 .etc had been checked multiple times. The reason is that there are 2 quantifiers in regexp...

to solve this , we can use memoization, solve the key pair & result

/**
 * @param {string} s
 * @param {string} p
 * @param {i} start of s
 * @param {j} start of p
 * @param {lengthS} length of s
 * @param {lengthP} length of p
 * @param {array} results array for memoization
 * @return {boolean}
 */

var check = function(s, p, i, j, lengthS, lengthP, results){
    if (typeof results[i][j] !== 'undefined'){
        return results[i][j];
    }
    // if regexp is done, check whether input string is done
    if (j === lengthP){
        results[i][j] = i === lengthS;
        return results[i][j];
    } else {
        // default to false
        var result = false;

        // if regexp has quantifier
        if (j + 1 < lengthP && p[j + 1] === '*'){
            // loop to check all the possibilities if first chara matches
            if (p[j] === '.' && i < lengthS){
                for(var k = i; k < lengthS + 1; k++){
                    if (result = check(s, p, k, j + 2, lengthS, lengthP, results) ){
                        break;
                    }
                }
            } else if (s[i] === p[j]){
                for(var k = i; k < lengthS + 1; k++){
                    if (result = check(s, p, k, j + 2, lengthS, lengthP, results) ){
                        break;
                    }
                    if (s[k] !== s[i]){
                        break;
                    }
                }

            } else {
                result = check(s, p, i, j + 2, lengthS, lengthP, results);
            }

        // if no quantifier, check next
        } else {
            if (i < lengthS && (s[i] === p[j] || p[j] === '.')){
                result = check(s, p, i + 1, j + 1, lengthS, lengthP, results);
            }
        }

        results[i][j] = result;
        return results[i][j];
    }

}

var isMatch = function(s, p) {
    var lengthS = s.length;
    var lengthP = p.length;
    var results = [];
    for (var i = 0; i < lengthS + 1; i++){
        results.push(new Array(lengthP + 1));
    }
    return check(s, p, 0, 0, lengthS, lengthP, results);
};

This time run at 122 ms, ranking top 10% of all js submisions. Much Better