helper_diff.js (9359B)
1 /** 2 * This diff utility is taken from: 3 * https://github.com/Slava/diff.js 4 * 5 * The MIT License (MIT) 6 * 7 * Copyright (c) 2014 Slava 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy 10 * of this software and associated documentation files (the "Software"), to deal 11 * in the Software without restriction, including without limitation the rights 12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 * copies of the Software, and to permit persons to whom the Software is 14 * furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included in 17 * all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 * THE SOFTWARE. 26 */ 27 28 /** 29 * USAGE: 30 * diff(text1, text2); 31 */ 32 33 /** 34 * Longest Common Subsequence 35 * 36 * @param A - sequence of atoms - Array 37 * @param B - sequence of atoms - Array 38 * @param equals - optional comparator of atoms - returns true or false, 39 * if not specified, triple equals operator is used 40 * @returns Array - sequence of atoms, one of LCSs, edit script from A to B 41 */ 42 var LCS = function (A, B, /* optional */ equals) { 43 // We just compare atoms with default equals operator by default 44 if (equals === undefined) 45 equals = function (a, b) { return a === b; }; 46 47 // NOTE: all intervals from now on are both sides inclusive 48 // Get the points in Edit Graph, one of the LCS paths goes through. 49 // The points are located on the same diagonal and represent the middle 50 // snake ([D/2] out of D+1) in the optimal edit path in edit graph. 51 // @param startA, endA - substring of A we are working on 52 // @param startB, endB - substring of B we are working on 53 // @returns Array - [ 54 // [x, y], - beginning of the middle snake 55 // [u, v], - end of the middle snake 56 // D, - optimal edit distance 57 // LCS ] - length of LCS 58 var findMidSnake = function (startA, endA, startB, endB) { 59 var N = endA - startA + 1; 60 var M = endB - startB + 1; 61 var Max = N + M; 62 var Delta = N - M; 63 var halfMaxCeil = (Max + 1) / 2 | 0; 64 65 var foundOverlap = false; 66 var overlap = null; 67 68 // Maps -Max .. 0 .. +Max, diagonal index to endpoints for furthest reaching 69 // D-path on current iteration. 70 var V = {}; 71 // Same but for reversed paths. 72 var U = {}; 73 74 // Special case for the base case, D = 0, k = 0, x = y = 0 75 V[1] = 0; 76 // Special case for the base case reversed, D = 0, k = 0, x = N, y = M 77 U[Delta - 1] = N; 78 79 // Iterate over each possible length of edit script 80 for (var D = 0; D <= halfMaxCeil; D++) { 81 // Iterate over each diagonal 82 for (var k = -D; k <= D && !overlap; k += 2) { 83 // Positions in sequences A and B of furthest going D-path on diagonal k. 84 var x, y; 85 86 // Choose from each diagonal we extend 87 if (k === -D || (k !== D && V[k - 1] < V[k + 1])) 88 // Extending path one point down, that's why x doesn't change, y 89 // increases implicitly 90 x = V[k + 1]; 91 else 92 // Extending path one point to the right, x increases 93 x = V[k - 1] + 1; 94 95 // We can calculate the y out of x and diagonal index. 96 y = x - k; 97 98 if (isNaN(y) || x > N || y > M) 99 continue; 100 101 var xx = x; 102 // Try to extend the D-path with diagonal paths. Possible only if atoms 103 // A_x match B_y 104 while (x < N && y < M // if there are atoms to compare 105 && equals(A[startA + x], B[startB + y])) { 106 x++; y++; 107 } 108 109 // We can safely update diagonal k, since on every iteration we consider 110 // only even or only odd diagonals and the result of one depends only on 111 // diagonals of different iteration. 112 V[k] = x; 113 114 // Check feasibility, Delta is checked for being odd. 115 if ((Delta & 1) === 1 && inRange(k, Delta - (D - 1), Delta + (D - 1))) 116 // Forward D-path can overlap with reversed D-1-path 117 if (V[k] >= U[k]) 118 // Found an overlap, the middle snake, convert X-components to dots 119 overlap = [xx, x].map(toPoint, k); // XXX ES5 120 } 121 122 if (overlap) 123 var SES = D * 2 - 1; 124 125 // Iterate over each diagonal for reversed case 126 for (var k = -D; k <= D && !overlap; k += 2) { 127 // The real diagonal we are looking for is k + Delta 128 var K = k + Delta; 129 var x, y; 130 if (k === D || (k !== -D && U[K - 1] < U[K + 1])) 131 x = U[K - 1]; 132 else 133 x = U[K + 1] - 1; 134 135 y = x - K; 136 if (isNaN(y) || x < 0 || y < 0) 137 continue; 138 var xx = x; 139 while (x > 0 && y > 0 && equals(A[startA + x - 1], B[startB + y - 1])) { 140 x--; y--; 141 } 142 U[K] = x; 143 144 if (Delta % 2 === 0 && inRange(K, -D, D)) 145 if (U[K] <= V[K]) 146 overlap = [x, xx].map(toPoint, K); // XXX ES5 147 } 148 149 if (overlap) { 150 SES = SES || D * 2; 151 // Remember we had offset of each sequence? 152 for (var i = 0; i < 2; i++) for (var j = 0; j < 2; j++) 153 overlap[i][j] += [startA, startB][j] - i; 154 return overlap.concat([ SES, (Max - SES) / 2 ]); 155 } 156 } 157 }; 158 159 var lcsAtoms = []; 160 var lcs = function (startA, endA, startB, endB) { 161 var N = endA - startA + 1; 162 var M = endB - startB + 1; 163 164 if (N > 0 && M > 0) { 165 var middleSnake = findMidSnake(startA, endA, startB, endB); 166 // A[x;u] == B[y,v] and is part of LCS 167 var x = middleSnake[0][0], y = middleSnake[0][1]; 168 var u = middleSnake[1][0], v = middleSnake[1][1]; 169 var D = middleSnake[2]; 170 171 if (D > 1) { 172 lcs(startA, x - 1, startB, y - 1); 173 if (x <= u) { 174 [].push.apply(lcsAtoms, A.slice(x, u + 1)); 175 } 176 lcs(u + 1, endA, v + 1, endB); 177 } else if (M > N) 178 [].push.apply(lcsAtoms, A.slice(startA, endA + 1)); 179 else 180 [].push.apply(lcsAtoms, B.slice(startB, endB + 1)); 181 } 182 }; 183 184 lcs(0, A.length - 1, 0, B.length - 1); 185 return lcsAtoms; 186 }; 187 188 // Helpers 189 var inRange = function (x, l, r) { 190 return (l <= x && x <= r) || (r <= x && x <= l); 191 }; 192 193 // Takes X-component as argument, diagonal as context, 194 // returns array-pair of form x, y 195 var toPoint = function (x) { 196 return [x, x - this]; // XXX context is not the best way to pass diagonal 197 }; 198 199 // Wrappers 200 LCS.StringLCS = function (A, B) { 201 return LCS(A.split(''), B.split('')).join(''); 202 }; 203 204 /** 205 * Diff sequence 206 * 207 * @param A - sequence of atoms - Array 208 * @param B - sequence of atoms - Array 209 * @param equals - optional comparator of atoms - returns true or false, 210 * if not specified, triple equals operator is used 211 * @returns Array - sequence of objects in a form of: 212 * - operation: one of "none", "add", "delete" 213 * - atom: the atom found in either A or B 214 * Applying operations from diff sequence you should be able to transform A to B 215 */ 216 function diff(A, B, equals) { 217 // We just compare atoms with default equals operator by default 218 if (equals === undefined) 219 equals = function (a, b) { return a === b; }; 220 221 var diff = []; 222 var i = 0, j = 0; 223 var N = A.length, M = B.length, K = 0; 224 225 while (i < N && j < M && equals(A[i], B[j])) 226 i++, j++; 227 228 while (i < N && j < M && equals(A[N-1], B[M-1])) 229 N--, M--, K++; 230 231 [].push.apply(diff, A.slice(0, i).map(function (atom) { 232 return { operation: "none", atom: atom }; })); 233 234 var lcs = LCS(A.slice(i, N), B.slice(j, M), equals); 235 236 for (var k = 0; k < lcs.length; k++) { 237 var atom = lcs[k]; 238 var ni = customIndexOf.call(A, atom, i, equals); 239 var nj = customIndexOf.call(B, atom, j, equals); 240 241 // XXX ES5 map 242 // Delete unmatched atoms from A 243 [].push.apply(diff, A.slice(i, ni).map(function (atom) { 244 return { operation: "delete", atom: atom }; 245 })); 246 247 // Add unmatched atoms from B 248 [].push.apply(diff, B.slice(j, nj).map(function (atom) { 249 return { operation: "add", atom: atom }; 250 })); 251 252 // Add the atom found in both sequences 253 diff.push({ operation: "none", atom: atom }); 254 255 i = ni + 1; 256 j = nj + 1; 257 } 258 259 // Don't forget about the rest 260 261 [].push.apply(diff, A.slice(i, N).map(function (atom) { 262 return { operation: "delete", atom: atom }; 263 })); 264 265 [].push.apply(diff, B.slice(j, M).map(function (atom) { 266 return { operation: "add", atom: atom }; 267 })); 268 269 [].push.apply(diff, A.slice(N, N + K).map(function (atom) { 270 return { operation: "none", atom: atom }; })); 271 272 return diff; 273 }; 274 275 // Accepts custom comparator 276 var customIndexOf = function(item, start, equals){ 277 var arr = this; 278 for (var i = start; i < arr.length; i++) 279 if (equals(item, arr[i])) 280 return i; 281 return -1; 282 }; 283 284 function textDiff(text1, text2) { 285 return diff(text1.split("\n"), text2.split("\n")); 286 }