/** * * Pattern.js * @version: 0.1.1 * * Pattern Matching Algorithms implemented in JavaScript * https://github.com/foo123/PatternMatchingAlgorithms * **/!function( root, name, factory ) { "use strict"; // // export the module, umd-style (no other dependencies) var isCommonJS = ("object" === typeof(module)) && module.exports, isAMD = ("function" === typeof(define)) && define.amd, m; // CommonJS, node, etc.. if ( isCommonJS ) module.exports = (module.$deps = module.$deps || {})[ name ] = module.$deps[ name ] || (factory.call( root, {NODE:module} ) || 1); // AMD, requireJS, etc.. else if ( isAMD && ("function" === typeof(require)) && ("function" === typeof(require.specified)) && require.specified(name) ) define( name, ['require', 'exports', 'module'], function( require, exports, module ){ return factory.call( root, {AMD:module} ); } ); // browser, web worker, etc.. + AMD, other loaders else if ( !(name in root) ) (root[ name ] = (m=factory.call( root, {} ) || 1)) && isAMD && define( name, [], function( ){ return m; } ); }( /* current root */ this, /* module name */ "Pattern", /* module factory */ function( exports ) { /* main code starts here */ /** * * Pattern.js * @version: 0.1.1 * * Pattern Matching Algorithms implemented in JavaScript * https://github.com/foo123/PatternMatchingAlgorithms * **/ "use strict"; // http://en.wikipedia.org/wiki/String_searching_algorithm var Pattern = exports['Pattern'] = { VERSION: "0.1.1" }, undef=undefined; /* space, digits, capital latin letters, lower latin letters */ Pattern.ALPHABET = " 0123456789ABCDEFQHIJKLMNOPQRSTUVWXYZabcdefqhijklmnopqrstuvwxyz"; Pattern.utils = { array_fill: function(len, val) { var a = new Array(len), i; if ( 'function' === typeof(val) ) { for (i=0; i= ch ) return 0; // digit if ( 48 <= ch && 57 >= ch ) return ch - 47; // -48+1 // capital latin letter if ( 65 <= ch && 90 >= ch ) return ch - 54; // -65+11 // lower latin letter return ch - 60; // -97+11+26 }, alphabet_map: function( ) { return { " ": 0 ,"0": 0 ,"1": 0 ,"2": 0 ,"3": 0 ,"4": 0 ,"5": 0 ,"6": 0 ,"7": 0 ,"8": 0 ,"9": 0 ,"A": 0 ,"B": 0 ,"C": 0 ,"D": 0 ,"E": 0 ,"F": 0 ,"G": 0 ,"H": 0 ,"I": 0 ,"J": 0 ,"K": 0 ,"L": 0 ,"M": 0 ,"N": 0 ,"O": 0 ,"P": 0 ,"Q": 0 ,"R": 0 ,"S": 0 ,"T": 0 ,"U": 0 ,"V": 0 ,"W": 0 ,"X": 0 ,"Y": 0 ,"Z": 0 ,"a": 0 ,"b": 0 ,"c": 0 ,"d": 0 ,"e": 0 ,"f": 0 ,"g": 0 ,"h": 0 ,"i": 0 ,"j": 0 ,"k": 0 ,"l": 0 ,"m": 0 ,"n": 0 ,"o": 0 ,"p": 0 ,"q": 0 ,"r": 0 ,"s": 0 ,"t": 0 ,"u": 0 ,"v": 0 ,"w": 0 ,"x": 0 ,"y": 0 ,"z": 0 }; }, reverse: function( s ) { return s.split( '' ).reverse( ).join( '' ); } }; Pattern.Matcher = function( pattern ) { this.pattern( pattern || null ); }; Pattern.Matcher.prototype = { constructor: Pattern.Matcher, reference: null, description: '', _pattern: null, dispose: function( ) { this._pattern = null; return this; }, pattern: function( pattern ) { this._pattern = pattern || null; return this; }, match: function( s, offset ) { return -1; } }; !function(Pattern, undef){ "use strict"; // // Builtin Matcher // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf // Pattern.BuiltinMatcher = function( pattern ) { Pattern.Matcher.call(this, pattern); }; Pattern.BuiltinMatcher.prototype = Object.create( Pattern.Matcher.prototype ); Pattern.BuiltinMatcher.prototype.reference = 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf'; Pattern.BuiltinMatcher.prototype.description = "This is JavaScript's builtin string search algorithm (String.prototype.indexOf), usually a variation of Knuth-Morris-Pratt or Boyer-Moore algorithms."; Pattern.BuiltinMatcher.prototype.match = function( s, offset ) { var p = this._pattern, m = p.length, n = s.length; if ( arguments.length < 2 ) offset = 0; if ( offset < 0 ) offset += n; if ( n > 0 && m > 0 && n >= offset+m ) { return s.indexOf( p, offset ); } return -1; }; }(Pattern);!function(Pattern, undef){ "use strict"; // // "Naive" String Matcher // http://en.wikipedia.org/wiki/String_searching_algorithm // Pattern.NaiveMatcher = function( pattern ) { Pattern.Matcher.call(this, pattern); }; Pattern.NaiveMatcher.prototype = Object.create( Pattern.Matcher.prototype ); Pattern.NaiveMatcher.prototype.reference = 'http://en.wikipedia.org/wiki/String_searching_algorithm'; Pattern.NaiveMatcher.prototype.description = "This is a "naive" string search algorithm, in that it tests each succesive position of the input text to see if the pattern matches and does not use information about the pattern (or the text) in order to speed up the search."; Pattern.NaiveMatcher.prototype.match = function( s, offset ) { var p = this._pattern, n = s.length, m = p.length, i; if ( arguments.length < 2 ) offset = 0; if ( offset < 0 ) offset += n; if ( n > 0 && m > 0 && n >= offset+m ) { n = n-m+1; for (i=offset; i 0 && m > 0 && n >= offset+m ) { h = pow(D, m-1) % Q; pq = 0; sq = 0; // pre-processing for (i=0; i "naive" matching if ( pq === sq ) { if ( s.slice(i, i+m) === p ) return i; } // update text hash for next char using Horner algorithm if ( i < n ) { sq = ( D*(sq - h*alphabet_index(s.charAt(i))) + alphabet_index(s.charAt(i+m)) ) % Q; if ( sq < 0 ) sq += Q; } } } return -1; }; }(Pattern);!function(Pattern, undef){ "use strict"; // // Finite State Automaton (FSA) Matcher // http://en.wikipedia.org/wiki/Finite-state_machine // http://en.wikipedia.org/wiki/String_searching_algorithm // var min = Math.min, array_fill = Pattern.utils.array_fill, alphabet_map = Pattern.utils.alphabet_map, ALPHABET = Pattern.ALPHABET, ALPHABET_LEN = ALPHABET.length ; function isSuffix( s1, s2 ) { return s1 === s2.slice( -s1.length ); } function computeTransitionMatrix( p ) { var m = p.length, q, a, aa, k, delta; delta = array_fill( m, alphabet_map ); for (q=0; q 0 && !isSuffix(p.slice(0, k), p.slice(0, q)+aa) ) k--; delta[ q ][ aa ] = k; } } return delta; } Pattern.FSAMatcher = function( pattern ) { Pattern.Matcher.call(this, pattern); }; Pattern.FSAMatcher.prototype = Object.create( Pattern.Matcher.prototype ); Pattern.FSAMatcher.prototype.reference = 'http://en.wikipedia.org/wiki/Finite-state_machine'; Pattern.FSAMatcher.prototype.description = "The Finite State Automaton matcher (or FSA matcher) searches a text for a pattern, by creating a deterministic finite automaton (DFA) which is then used to parse the text. The FSA method is also used for matching a regular expression pattern." Pattern.FSAMatcher.prototype._delta = null; Pattern.FSAMatcher.prototype.dispose = function( ) { this._pattern = null; this._delta = null; return this; }; Pattern.FSAMatcher.prototype.pattern = function( pattern ) { this._pattern = pattern || null; if ( this._pattern ) { this._delta = computeTransitionMatrix( this._pattern ); } else { this._delta = null; } return this; }; Pattern.FSAMatcher.prototype.match = function( s, offset ) { var p = this._pattern, delta = this._delta, n = s.length, m = p.length, q, i; if ( arguments.length < 2 ) offset = 0; if ( offset < 0 ) offset += n; if ( n > 0 && m > 0 && n >= offset+m ) { q = 0; for (i=offset; i 0 && p.charAt( k+1 ) !== p.charAt( q ) ) k = prefix[ k ]; if ( p.charAt( k+1 ) === p.charAt( q ) ) k++; prefix[ q ] = k; } return prefix; } Pattern.KnuthMorrisPrattMatcher = function( pattern ) { Pattern.Matcher.call(this, pattern); }; Pattern.KnuthMorrisPrattMatcher.prototype = Object.create( Pattern.Matcher.prototype ); Pattern.KnuthMorrisPrattMatcher.prototype.reference = 'http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm'; Pattern.KnuthMorrisPrattMatcher.prototype.description = "The Knuth–Morris–Pratt algorithm (or KMP algorithm) searches for occurrences of a \"word\" W within a main \"text string\" S by employing the observation that when a mismatch occurs, the word itself embodies sufficient information to determine where the next match could begin, thus bypassing re-examination of previously matched characters." Pattern.KnuthMorrisPrattMatcher.prototype._prefix = null; Pattern.KnuthMorrisPrattMatcher.prototype.dispose = function( ) { this._pattern = null; this._prefix = null; return this; }; Pattern.KnuthMorrisPrattMatcher.prototype.pattern = function( pattern ) { this._pattern = pattern || null; if ( this._pattern ) { this._prefix = computePrefix( this._pattern ); } else { this._prefix = null; } return this; }; Pattern.KnuthMorrisPrattMatcher.prototype.match = function( s, offset ) { var p = this._pattern, prefix = this._prefix, n = s.length, m = p.length, i, q ; if ( arguments.length < 2 ) offset = 0; if ( offset < 0 ) offset += n; if ( n > 0 && m > 0 && n >= offset+m ) { q = 0; // number of characters matched for (i=offset; i 0 && p.charAt( q ) !== s.charAt( i ) ) { q = prefix[ q ]+1; } if ( p.charAt( q ) === s.charAt( i ) ) q++; // next character matches if ( m === q ) return i-m+1; } } return -1; }; }(Pattern);!function(Pattern, undef){ "use strict"; // // Boyer-Moore Matcher // http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm // var max = Math.max, array_fill = Pattern.utils.array_fill, reverse = Pattern.utils.reverse, alphabet_index = Pattern.utils.alphabet_index, ALPHABET = Pattern.ALPHABET, ALPHABET_LEN = ALPHABET.length ; function match_length( s, idx1, idx2 ) { // Returns the length of the match of the substrings of S beginning at idx1 and idx2. var sl = s.length, match_count; if ( idx1 === idx2 ) { return sl - idx1; } match_count = 0; while ( idx1 < sl && idx2 < sl && s.charAt( idx1 ) === s.charAt( idx2 ) ) { match_count++; idx1++; idx2++; } return match_count; } function fundamental_preprocess( s ) { // Returns Z, the Fundamental Preprocessing of S. Z[i] is the length of the substring // beginning at i which is also a prefix of S. This pre-processing is done in O(n) time, // where n is the length of S. var sl = s.length, i, z, l, r, a, b, k; if ( 0 === sl ) // Handles case of empty string { return [ ]; } if ( 1 === sl ) // Handles case of single-character string { return [ 1 ]; } z = array_fill( sl, 0 ); z[ 0 ] = sl; z[ 1 ] = match_length(s, 0, 1); for (i=2; i<1+z[1]; i++) // Optimization from exercise 1-5 { z[ i ] = z[ 1 ]-i+1; } // Defines lower and upper limits of z-box l = 0; r = 0; for (i=2+z[1]; i 0 ) { l = i; r = i+z[ i ]-1; } } } return z; } function bad_character_table( s ) { // Generates R for S, which is an array indexed by the position of some character c in the // English alphabet. At that index in R is an array of length |S|+1, specifying for each // index i in S (plus the index after S) the next location of character c encountered when // traversing S from right to left starting at i. This is used for a constant-time lookup // for the bad character rule in the Boyer-Moore string search algorithm, although it has // a much larger size than non-constant-time solutions. var sl = s.length, c, i, j, R, alpha; if (0 === sl) { return array_fill(ALPHABET_LEN, function( ){ return [ ]; }); } R = array_fill(ALPHABET_LEN, function( ){ return [ -1 ]; }); alpha = array_fill(ALPHABET_LEN, function( ){ return -1 ; }); for (i=0; i 0 && m > 0 && n >= offset+m ) { //matches = [] k = offset + m - 1; // Represents alignment of end of P relative to T previous_k = offset -1; // Represents alignment in previous phase (Galil's rule) while ( k < n ) { i = m - 1; // Character to compare in P h = k; // Character to compare in T while (i >= 0 && h > previous_k && p.charAt(i) === s.charAt(h)) // Matches starting from end of P { i--; h--; } if (-1 === i || h === previous_k) // Match has been found (Galil's rule) { return k - m + 1; //matches.append(k - len(P) + 1) //k += len(P)-F[1] if len(P) > 1 else 1 } else // No match, shift by max of bad character and good suffix rules { char_shift = i - R[ alphabet_index( s.charAt( h ) ) ][ i ]; if (i+1 === m) // Mismatch happened on first attempt { suffix_shift = 1; } else if (-1 === L[i+1]) // Matched suffix does not appear anywhere in P { suffix_shift = m - F[i+1]; } else // Matched suffix appears in P { suffix_shift = m - L[i+1]; } shift = max( char_shift, suffix_shift ); if ( shift >= i+1 ) previous_k = k; // Galil's rule k += shift; } } } return -1; }; }(Pattern); /* main code ends here */ /* export the module */ return exports["Pattern"]; });