RecipeExecutor.mjs (32853B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /** 6 * RecipeExecutor is the core feature engineering pipeline for the in-browser 7 * personalization work. These pipelines are called "recipes". A recipe is an 8 * array of objects that define a "step" in the recipe. A step is simply an 9 * object with a field "function" that specifies what is being done in the step 10 * along with other fields that are semantically defined for that step. 11 * 12 * There are two types of recipes "builder" recipes and "combiner" recipes. Builder 13 * recipes mutate an object until it matches some set of critera. Combiner 14 * recipes take two objects, (a "left" and a "right"), and specify the steps 15 * to merge the right object into the left object. 16 * 17 * A short nonsense example recipe is: 18 * [ {"function": "get_url_domain", "path_length": 1, "field": "url", "dest": "url_domain"}, 19 * {"function": "nb_tag", "fields": ["title", "description"]}, 20 * {"function": "conditionally_nmf_tag", "fields": ["title", "description"]} ] 21 * 22 * Recipes are sandboxed by the fact that the step functions must be explicitly 23 * allowed. Functions allowed for builder recipes are specifed in the 24 * RecipeExecutor.ITEM_BUILDER_REGISTRY, while combiner functions are allowed 25 * in RecipeExecutor.ITEM_COMBINER_REGISTRY . 26 */ 27 export class RecipeExecutor { 28 constructor(nbTaggers, nmfTaggers, tokenize) { 29 this.ITEM_BUILDER_REGISTRY = { 30 nb_tag: this.naiveBayesTag, 31 conditionally_nmf_tag: this.conditionallyNmfTag, 32 accept_item_by_field_value: this.acceptItemByFieldValue, 33 tokenize_url: this.tokenizeUrl, 34 get_url_domain: this.getUrlDomain, 35 tokenize_field: this.tokenizeField, 36 copy_value: this.copyValue, 37 keep_top_k: this.keepTopK, 38 scalar_multiply: this.scalarMultiply, 39 elementwise_multiply: this.elementwiseMultiply, 40 vector_multiply: this.vectorMultiply, 41 scalar_add: this.scalarAdd, 42 vector_add: this.vectorAdd, 43 make_boolean: this.makeBoolean, 44 allow_fields: this.allowFields, 45 filter_by_value: this.filterByValue, 46 l2_normalize: this.l2Normalize, 47 prob_normalize: this.probNormalize, 48 set_default: this.setDefault, 49 lookup_value: this.lookupValue, 50 copy_to_map: this.copyToMap, 51 scalar_multiply_tag: this.scalarMultiplyTag, 52 apply_softmax_tags: this.applySoftmaxTags, 53 }; 54 this.ITEM_COMBINER_REGISTRY = { 55 combiner_add: this.combinerAdd, 56 combiner_max: this.combinerMax, 57 combiner_collect_values: this.combinerCollectValues, 58 }; 59 this.nbTaggers = nbTaggers; 60 this.nmfTaggers = nmfTaggers; 61 this.tokenize = tokenize; 62 } 63 64 /** 65 * Determines the type of a field. Valid types are: 66 * string 67 * number 68 * array 69 * map (strings to anything) 70 */ 71 _typeOf(data) { 72 let t = typeof data; 73 if (t === "object") { 74 if (data === null) { 75 return "null"; 76 } 77 if (Array.isArray(data)) { 78 return "array"; 79 } 80 return "map"; 81 } 82 return t; 83 } 84 85 /** 86 * Returns a scalar, either because it was a constant, or by 87 * looking it up from the item. Allows for a default value if the lookup 88 * fails. 89 */ 90 _lookupScalar(item, k, dfault) { 91 if (this._typeOf(k) === "number") { 92 return k; 93 } else if ( 94 this._typeOf(k) === "string" && 95 k in item && 96 this._typeOf(item[k]) === "number" 97 ) { 98 return item[k]; 99 } 100 return dfault; 101 } 102 103 /** 104 * Simply appends all the strings from a set fields together. If the field 105 * is a list, then the cells of the list are append. 106 */ 107 _assembleText(item, fields) { 108 let textArr = []; 109 for (let field of fields) { 110 if (field in item) { 111 let type = this._typeOf(item[field]); 112 if (type === "string") { 113 textArr.push(item[field]); 114 } else if (type === "array") { 115 for (let ele of item[field]) { 116 textArr.push(String(ele)); 117 } 118 } else { 119 textArr.push(String(item[field])); 120 } 121 } 122 } 123 return textArr.join(" "); 124 } 125 126 /** 127 * Runs the naive bayes text taggers over a set of text fields. Stores the 128 * results in new fields: 129 * nb_tags: a map of text strings to probabilites 130 * nb_tokens: the tokenized text that was tagged 131 * 132 * Config: 133 * fields: an array containing a list of fields to concatenate and tag 134 */ 135 naiveBayesTag(item, config) { 136 let text = this._assembleText(item, config.fields); 137 let tokens = this.tokenize(text); 138 let tags = {}; 139 let extended_tags = {}; 140 141 for (let nbTagger of this.nbTaggers) { 142 let result = nbTagger.tagTokens(tokens); 143 if (result.label !== null && result.confident) { 144 extended_tags[result.label] = result; 145 tags[result.label] = Math.exp(result.logProb); 146 } 147 } 148 item.nb_tags = tags; 149 item.nb_tags_extended = extended_tags; 150 item.nb_tokens = tokens; 151 return item; 152 } 153 154 /** 155 * Selectively runs NMF text taggers depending on which tags were found 156 * by the naive bayes taggers. Writes the results in into new fields: 157 * nmf_tags_parent_weights: map of pareent tags to probabilites of those parent tags 158 * nmf_tags: map of strings to maps of strings to probabilities 159 * nmf_tags_parent map of child tags to parent tags 160 * 161 * Config: 162 * Not configurable 163 */ 164 conditionallyNmfTag(item) { 165 let nestedNmfTags = {}; 166 let parentTags = {}; 167 let parentWeights = {}; 168 169 if (!("nb_tags" in item) || !("nb_tokens" in item)) { 170 return null; 171 } 172 173 Object.keys(item.nb_tags).forEach(parentTag => { 174 let nmfTagger = this.nmfTaggers[parentTag]; 175 if (nmfTagger !== undefined) { 176 nestedNmfTags[parentTag] = {}; 177 parentWeights[parentTag] = item.nb_tags[parentTag]; 178 let nmfTags = nmfTagger.tagTokens(item.nb_tokens); 179 Object.keys(nmfTags).forEach(nmfTag => { 180 nestedNmfTags[parentTag][nmfTag] = nmfTags[nmfTag]; 181 parentTags[nmfTag] = parentTag; 182 }); 183 } 184 }); 185 186 item.nmf_tags = nestedNmfTags; 187 item.nmf_tags_parent = parentTags; 188 item.nmf_tags_parent_weights = parentWeights; 189 190 return item; 191 } 192 193 /** 194 * Checks a field's value against another value (either from another field 195 * or a constant). If the test passes, then the item is emitted, otherwise 196 * the pipeline is aborted. 197 * 198 * Config: 199 * field Field to read the value to test. Left side of operator. 200 * op one of ==, !=, <, <=, >, >= 201 * rhsValue Constant value to compare against. Right side of operator. 202 * rhsField Field to read value to compare against. Right side of operator. 203 * 204 * NOTE: rhsValue takes precidence over rhsField. 205 */ 206 acceptItemByFieldValue(item, config) { 207 if (!(config.field in item)) { 208 return null; 209 } 210 let rhs = null; 211 if ("rhsValue" in config) { 212 rhs = config.rhsValue; 213 } else if ("rhsField" in config && config.rhsField in item) { 214 rhs = item[config.rhsField]; 215 } 216 if (rhs === null) { 217 return null; 218 } 219 220 if ( 221 // eslint-disable-next-line eqeqeq 222 (config.op === "==" && item[config.field] == rhs) || 223 // eslint-disable-next-line eqeqeq 224 (config.op === "!=" && item[config.field] != rhs) || 225 (config.op === "<" && item[config.field] < rhs) || 226 (config.op === "<=" && item[config.field] <= rhs) || 227 (config.op === ">" && item[config.field] > rhs) || 228 (config.op === ">=" && item[config.field] >= rhs) 229 ) { 230 return item; 231 } 232 233 return null; 234 } 235 236 /** 237 * Splits a URL into text-like tokens. 238 * 239 * Config: 240 * field Field containing a URL 241 * dest Field to write the tokens to as an array of strings 242 * 243 * NOTE: Any initial 'www' on the hostname is removed. 244 */ 245 tokenizeUrl(item, config) { 246 if (!(config.field in item)) { 247 return null; 248 } 249 250 let url = new URL(item[config.field]); 251 let domain = url.hostname; 252 if (domain.startsWith("www.")) { 253 domain = domain.substring(4); 254 } 255 let toks = this.tokenize(domain); 256 let pathToks = this.tokenize( 257 decodeURIComponent(url.pathname.replace(/\+/g, " ")) 258 ); 259 for (let tok of pathToks) { 260 toks.push(tok); 261 } 262 for (let pair of url.searchParams.entries()) { 263 let k = this.tokenize(decodeURIComponent(pair[0].replace(/\+/g, " "))); 264 for (let tok of k) { 265 toks.push(tok); 266 } 267 if (pair[1] !== null && pair[1] !== "") { 268 let v = this.tokenize(decodeURIComponent(pair[1].replace(/\+/g, " "))); 269 for (let tok of v) { 270 toks.push(tok); 271 } 272 } 273 } 274 item[config.dest] = toks; 275 276 return item; 277 } 278 279 /** 280 * Gets the hostname (minus any initial "www." along with the left most 281 * directories on the path. 282 * 283 * Config: 284 * field Field containing the URL 285 * dest Field to write the array of strings to 286 * path_length OPTIONAL (DEFAULT: 0) Number of leftmost subdirectories to include 287 */ 288 getUrlDomain(item, config) { 289 if (!(config.field in item)) { 290 return null; 291 } 292 293 let url = new URL(item[config.field]); 294 let domain = url.hostname.toLocaleLowerCase(); 295 if (domain.startsWith("www.")) { 296 domain = domain.substring(4); 297 } 298 item[config.dest] = domain; 299 let pathLength = 0; 300 if ("path_length" in config) { 301 pathLength = config.path_length; 302 } 303 if (pathLength > 0) { 304 item[config.dest] += url.pathname 305 .toLocaleLowerCase() 306 .split("/") 307 .slice(0, pathLength + 1) 308 .join("/"); 309 } 310 311 return item; 312 } 313 314 /** 315 * Splits a field into tokens. 316 * Config: 317 * field Field containing a string to tokenize 318 * dest Field to write the array of strings to 319 */ 320 tokenizeField(item, config) { 321 if (!(config.field in item)) { 322 return null; 323 } 324 325 item[config.dest] = this.tokenize(item[config.field]); 326 327 return item; 328 } 329 330 /** 331 * Deep copy from one field to another. 332 * Config: 333 * src Field to read from 334 * dest Field to write to 335 */ 336 copyValue(item, config) { 337 if (!(config.src in item)) { 338 return null; 339 } 340 341 item[config.dest] = JSON.parse(JSON.stringify(item[config.src])); 342 343 return item; 344 } 345 346 /** 347 * Converts a field containing a map of strings to a map of strings 348 * to numbers, to a map of strings to numbers containing at most k elements. 349 * This operation is performed by first, promoting all the subkeys up one 350 * level, and then taking the top (or bottom) k values. 351 * 352 * Config: 353 * field Points to a map of strings to a map of strings to numbers 354 * k Maximum number of items to keep 355 * descending OPTIONAL (DEFAULT: True) Sorts score in descending order 356 * (i.e. keeps maximum) 357 */ 358 keepTopK(item, config) { 359 if (!(config.field in item)) { 360 return null; 361 } 362 let k = this._lookupScalar(item, config.k, 1048576); 363 let descending = !("descending" in config) || config.descending !== false; 364 365 // we can't sort by the values in the map, so we have to convert this 366 // to an array, and then sort. 367 let sortable = []; 368 Object.keys(item[config.field]).forEach(outerKey => { 369 let innerType = this._typeOf(item[config.field][outerKey]); 370 if (innerType === "map") { 371 Object.keys(item[config.field][outerKey]).forEach(innerKey => { 372 sortable.push({ 373 key: innerKey, 374 value: item[config.field][outerKey][innerKey], 375 }); 376 }); 377 } else { 378 sortable.push({ key: outerKey, value: item[config.field][outerKey] }); 379 } 380 }); 381 382 sortable.sort((a, b) => { 383 if (descending) { 384 return b.value - a.value; 385 } 386 return a.value - b.value; 387 }); 388 389 // now take the top k 390 let newMap = {}; 391 let i = 0; 392 for (let pair of sortable) { 393 if (i >= k) { 394 break; 395 } 396 newMap[pair.key] = pair.value; 397 i++; 398 } 399 item[config.field] = newMap; 400 401 return item; 402 } 403 404 /** 405 * Scalar multiplies a vector by some constant 406 * 407 * Config: 408 * field Points to: 409 * a map of strings to numbers 410 * an array of numbers 411 * a number 412 * k Either a number, or a string. If it's a number then This 413 * is the scalar value to multiply by. If it's a string, 414 * the value in the pointed to field is used. 415 * default OPTIONAL (DEFAULT: 0), If k is a string, and no numeric 416 * value is found, then use this value. 417 */ 418 scalarMultiply(item, config) { 419 if (!(config.field in item)) { 420 return null; 421 } 422 let k = this._lookupScalar(item, config.k, config.dfault); 423 424 let fieldType = this._typeOf(item[config.field]); 425 if (fieldType === "number") { 426 item[config.field] *= k; 427 } else if (fieldType === "array") { 428 for (let i = 0; i < item[config.field].length; i++) { 429 item[config.field][i] *= k; 430 } 431 } else if (fieldType === "map") { 432 Object.keys(item[config.field]).forEach(key => { 433 item[config.field][key] *= k; 434 }); 435 } else { 436 return null; 437 } 438 439 return item; 440 } 441 442 /** 443 * Elementwise multiplies either two maps or two arrays together, storing 444 * the result in left. If left and right are of the same type, results in an 445 * error. 446 * 447 * Maps are special case. For maps the left must be a nested map such as: 448 * { k1: { k11: 1, k12: 2}, k2: { k21: 3, k22: 4 } } and right needs to be 449 * simple map such as: { k1: 5, k2: 6} . The operation is then to mulitply 450 * every value of every right key, to every value every subkey where the 451 * parent keys match. Using the previous examples, the result would be: 452 * { k1: { k11: 5, k12: 10 }, k2: { k21: 18, k22: 24 } } . 453 * 454 * Config: 455 * left 456 * right 457 */ 458 elementwiseMultiply(item, config) { 459 if (!(config.left in item) || !(config.right in item)) { 460 return null; 461 } 462 let leftType = this._typeOf(item[config.left]); 463 if (leftType !== this._typeOf(item[config.right])) { 464 return null; 465 } 466 if (leftType === "array") { 467 if (item[config.left].length !== item[config.right].length) { 468 return null; 469 } 470 for (let i = 0; i < item[config.left].length; i++) { 471 item[config.left][i] *= item[config.right][i]; 472 } 473 } else if (leftType === "map") { 474 Object.keys(item[config.left]).forEach(outerKey => { 475 let r = 0.0; 476 if (outerKey in item[config.right]) { 477 r = item[config.right][outerKey]; 478 } 479 Object.keys(item[config.left][outerKey]).forEach(innerKey => { 480 item[config.left][outerKey][innerKey] *= r; 481 }); 482 }); 483 } else if (leftType === "number") { 484 item[config.left] *= item[config.right]; 485 } else { 486 return null; 487 } 488 489 return item; 490 } 491 492 /** 493 * Vector multiplies (i.e. dot products) two vectors and stores the result in 494 * third field. Both vectors must either by maps, or arrays of numbers with 495 * the same length. 496 * 497 * Config: 498 * left A field pointing to either a map of strings to numbers, 499 * or an array of numbers 500 * right A field pointing to either a map of strings to numbers, 501 * or an array of numbers 502 * dest The field to store the dot product. 503 */ 504 vectorMultiply(item, config) { 505 if (!(config.left in item) || !(config.right in item)) { 506 return null; 507 } 508 509 let leftType = this._typeOf(item[config.left]); 510 if (leftType !== this._typeOf(item[config.right])) { 511 return null; 512 } 513 514 let destVal = 0.0; 515 if (leftType === "array") { 516 if (item[config.left].length !== item[config.right].length) { 517 return null; 518 } 519 for (let i = 0; i < item[config.left].length; i++) { 520 destVal += item[config.left][i] * item[config.right][i]; 521 } 522 } else if (leftType === "map") { 523 Object.keys(item[config.left]).forEach(key => { 524 if (key in item[config.right]) { 525 destVal += item[config.left][key] * item[config.right][key]; 526 } 527 }); 528 } else { 529 return null; 530 } 531 532 item[config.dest] = destVal; 533 return item; 534 } 535 536 /** 537 * Adds a constant value to all elements in the field. Mathematically, 538 * this is the same as taking a 1-vector, scalar multiplying it by k, 539 * and then vector adding it to a field. 540 * 541 * Config: 542 * field A field pointing to either a map of strings to numbers, 543 * or an array of numbers 544 * k Either a number, or a string. If it's a number then This 545 * is the scalar value to multiply by. If it's a string, 546 * the value in the pointed to field is used. 547 * default OPTIONAL (DEFAULT: 0), If k is a string, and no numeric 548 * value is found, then use this value. 549 */ 550 scalarAdd(item, config) { 551 let k = this._lookupScalar(item, config.k, config.dfault); 552 if (!(config.field in item)) { 553 return null; 554 } 555 556 let fieldType = this._typeOf(item[config.field]); 557 if (fieldType === "array") { 558 for (let i = 0; i < item[config.field].length; i++) { 559 item[config.field][i] += k; 560 } 561 } else if (fieldType === "map") { 562 Object.keys(item[config.field]).forEach(key => { 563 item[config.field][key] += k; 564 }); 565 } else if (fieldType === "number") { 566 item[config.field] += k; 567 } else { 568 return null; 569 } 570 571 return item; 572 } 573 574 /** 575 * Adds two vectors together and stores the result in left. 576 * 577 * Config: 578 * left A field pointing to either a map of strings to numbers, 579 * or an array of numbers 580 * right A field pointing to either a map of strings to numbers, 581 * or an array of numbers 582 */ 583 vectorAdd(item, config) { 584 if (!(config.left in item)) { 585 return this.copyValue(item, { src: config.right, dest: config.left }); 586 } 587 if (!(config.right in item)) { 588 return null; 589 } 590 591 let leftType = this._typeOf(item[config.left]); 592 if (leftType !== this._typeOf(item[config.right])) { 593 return null; 594 } 595 if (leftType === "array") { 596 if (item[config.left].length !== item[config.right].length) { 597 return null; 598 } 599 for (let i = 0; i < item[config.left].length; i++) { 600 item[config.left][i] += item[config.right][i]; 601 } 602 return item; 603 } else if (leftType === "map") { 604 Object.keys(item[config.right]).forEach(key => { 605 let v = 0; 606 if (key in item[config.left]) { 607 v = item[config.left][key]; 608 } 609 item[config.left][key] = v + item[config.right][key]; 610 }); 611 return item; 612 } 613 614 return null; 615 } 616 617 /** 618 * Converts a vector from real values to boolean integers. (i.e. either 1/0 619 * or 1/-1). 620 * 621 * Config: 622 * field Field containing either a map of strings to numbers or 623 * an array of numbers to convert. 624 * threshold OPTIONAL (DEFAULT: 0) Values above this will be replaced 625 * with 1.0. Those below will be converted to 0. 626 * keep_negative OPTIONAL (DEFAULT: False) If true, values below the 627 * threshold will be converted to -1 instead of 0. 628 */ 629 makeBoolean(item, config) { 630 if (!(config.field in item)) { 631 return null; 632 } 633 let threshold = this._lookupScalar(item, config.threshold, 0.0); 634 let type = this._typeOf(item[config.field]); 635 if (type === "array") { 636 for (let i = 0; i < item[config.field].length; i++) { 637 if (item[config.field][i] > threshold) { 638 item[config.field][i] = 1.0; 639 } else if (config.keep_negative) { 640 item[config.field][i] = -1.0; 641 } else { 642 item[config.field][i] = 0.0; 643 } 644 } 645 } else if (type === "map") { 646 Object.keys(item[config.field]).forEach(key => { 647 let value = item[config.field][key]; 648 if (value > threshold) { 649 item[config.field][key] = 1.0; 650 } else if (config.keep_negative) { 651 item[config.field][key] = -1.0; 652 } else { 653 item[config.field][key] = 0.0; 654 } 655 }); 656 } else if (type === "number") { 657 let value = item[config.field]; 658 if (value > threshold) { 659 item[config.field] = 1.0; 660 } else if (config.keep_negative) { 661 item[config.field] = -1.0; 662 } else { 663 item[config.field] = 0.0; 664 } 665 } else { 666 return null; 667 } 668 669 return item; 670 } 671 672 /** 673 * Removes all keys from the item except for the ones specified. 674 * 675 * fields An array of strings indicating the fields to keep 676 */ 677 allowFields(item, config) { 678 let newItem = {}; 679 for (let ele of config.fields) { 680 if (ele in item) { 681 newItem[ele] = item[ele]; 682 } 683 } 684 return newItem; 685 } 686 687 /** 688 * Removes all keys whose value does not exceed some threshold. 689 * 690 * Config: 691 * field Points to a map of strings to numbers 692 * threshold Values must exceed this value, otherwise they are removed. 693 */ 694 filterByValue(item, config) { 695 if (!(config.field in item)) { 696 return null; 697 } 698 let threshold = this._lookupScalar(item, config.threshold, 0.0); 699 let filtered = {}; 700 Object.keys(item[config.field]).forEach(key => { 701 let value = item[config.field][key]; 702 if (value > threshold) { 703 filtered[key] = value; 704 } 705 }); 706 item[config.field] = filtered; 707 708 return item; 709 } 710 711 /** 712 * Rewrites a field so that its values are now L2 normed. 713 * 714 * Config: 715 * field Points to a map of strings to numbers, or an array of numbers 716 */ 717 l2Normalize(item, config) { 718 if (!(config.field in item)) { 719 return null; 720 } 721 let data = item[config.field]; 722 let type = this._typeOf(data); 723 if (type === "array") { 724 let norm = 0.0; 725 for (let datum of data) { 726 norm += datum * datum; 727 } 728 norm = Math.sqrt(norm); 729 if (norm !== 0) { 730 for (let i = 0; i < data.length; i++) { 731 data[i] /= norm; 732 } 733 } 734 } else if (type === "map") { 735 let norm = 0.0; 736 Object.keys(data).forEach(key => { 737 norm += data[key] * data[key]; 738 }); 739 norm = Math.sqrt(norm); 740 if (norm !== 0) { 741 Object.keys(data).forEach(key => { 742 data[key] /= norm; 743 }); 744 } 745 } else { 746 return null; 747 } 748 749 item[config.field] = data; 750 751 return item; 752 } 753 754 /** 755 * Rewrites a field so that all of its values sum to 1.0 756 * 757 * Config: 758 * field Points to a map of strings to numbers, or an array of numbers 759 */ 760 probNormalize(item, config) { 761 if (!(config.field in item)) { 762 return null; 763 } 764 let data = item[config.field]; 765 let type = this._typeOf(data); 766 if (type === "array") { 767 let norm = 0.0; 768 for (let datum of data) { 769 norm += datum; 770 } 771 if (norm !== 0) { 772 for (let i = 0; i < data.length; i++) { 773 data[i] /= norm; 774 } 775 } 776 } else if (type === "map") { 777 let norm = 0.0; 778 Object.keys(item[config.field]).forEach(key => { 779 norm += item[config.field][key]; 780 }); 781 if (norm !== 0) { 782 Object.keys(item[config.field]).forEach(key => { 783 item[config.field][key] /= norm; 784 }); 785 } 786 } else { 787 return null; 788 } 789 790 return item; 791 } 792 793 /** 794 * Stores a value, if it is not already present 795 * 796 * Config: 797 * field field to write to if it is missing 798 * value value to store in that field 799 */ 800 setDefault(item, config) { 801 let val = this._lookupScalar(item, config.value, config.value); 802 if (!(config.field in item)) { 803 item[config.field] = val; 804 } 805 806 return item; 807 } 808 809 /** 810 * Selctively promotes an value from an inner map up to the outer map 811 * 812 * Config: 813 * haystack Points to a map of strings to values 814 * needle Key inside the map we should promote up 815 * dest Where we should write the value of haystack[needle] 816 */ 817 lookupValue(item, config) { 818 if (config.haystack in item && config.needle in item[config.haystack]) { 819 item[config.dest] = item[config.haystack][config.needle]; 820 } 821 822 return item; 823 } 824 825 /** 826 * Demotes a field into a map 827 * 828 * Config: 829 * src Field to copy 830 * dest_map Points to a map 831 * dest_key Key inside dest_map to copy src to 832 */ 833 copyToMap(item, config) { 834 if (config.src in item) { 835 if (!(config.dest_map in item)) { 836 item[config.dest_map] = {}; 837 } 838 item[config.dest_map][config.dest_key] = item[config.src]; 839 } 840 841 return item; 842 } 843 844 /** 845 * Config: 846 * field Points to a string to number map 847 * k Scalar to multiply the values by 848 * log_scale Boolean, if true, then the values will be transformed 849 * by a logrithm prior to multiplications 850 */ 851 scalarMultiplyTag(item, config) { 852 let EPSILON = 0.000001; 853 if (!(config.field in item)) { 854 return null; 855 } 856 let k = this._lookupScalar(item, config.k, 1); 857 let type = this._typeOf(item[config.field]); 858 if (type === "map") { 859 Object.keys(item[config.field]).forEach(parentKey => { 860 Object.keys(item[config.field][parentKey]).forEach(key => { 861 let v = item[config.field][parentKey][key]; 862 if (config.log_scale) { 863 v = Math.log(v + EPSILON); 864 } 865 item[config.field][parentKey][key] = v * k; 866 }); 867 }); 868 } else { 869 return null; 870 } 871 872 return item; 873 } 874 875 /** 876 * Independently applies softmax across all subtags. 877 * 878 * Config: 879 * field Points to a map of strings with values being another map of strings 880 */ 881 applySoftmaxTags(item, config) { 882 let type = this._typeOf(item[config.field]); 883 if (type !== "map") { 884 return null; 885 } 886 887 let abort = false; 888 let softmaxSum = {}; 889 Object.keys(item[config.field]).forEach(tag => { 890 if (this._typeOf(item[config.field][tag]) !== "map") { 891 abort = true; 892 return; 893 } 894 if (abort) { 895 return; 896 } 897 softmaxSum[tag] = 0; 898 Object.keys(item[config.field][tag]).forEach(subtag => { 899 if (this._typeOf(item[config.field][tag][subtag]) !== "number") { 900 abort = true; 901 return; 902 } 903 let score = item[config.field][tag][subtag]; 904 softmaxSum[tag] += Math.exp(score); 905 }); 906 }); 907 if (abort) { 908 return null; 909 } 910 911 Object.keys(item[config.field]).forEach(tag => { 912 Object.keys(item[config.field][tag]).forEach(subtag => { 913 item[config.field][tag][subtag] = 914 Math.exp(item[config.field][tag][subtag]) / softmaxSum[tag]; 915 }); 916 }); 917 918 return item; 919 } 920 921 /** 922 * Vector adds a field and stores the result in left. 923 * 924 * Config: 925 * field The field to vector add 926 */ 927 combinerAdd(left, right, config) { 928 if (!(config.field in right)) { 929 return left; 930 } 931 let type = this._typeOf(right[config.field]); 932 if (!(config.field in left)) { 933 if (type === "map") { 934 left[config.field] = {}; 935 } else if (type === "array") { 936 left[config.field] = []; 937 } else if (type === "number") { 938 left[config.field] = 0; 939 } else { 940 return null; 941 } 942 } 943 if (type !== this._typeOf(left[config.field])) { 944 return null; 945 } 946 if (type === "map") { 947 Object.keys(right[config.field]).forEach(key => { 948 if (!(key in left[config.field])) { 949 left[config.field][key] = 0; 950 } 951 left[config.field][key] += right[config.field][key]; 952 }); 953 } else if (type === "array") { 954 for (let i = 0; i < right[config.field].length; i++) { 955 if (i < left[config.field].length) { 956 left[config.field][i] += right[config.field][i]; 957 } else { 958 left[config.field].push(right[config.field][i]); 959 } 960 } 961 } else if (type === "number") { 962 left[config.field] += right[config.field]; 963 } else { 964 return null; 965 } 966 967 return left; 968 } 969 970 /** 971 * Stores the maximum value of the field in left. 972 * 973 * Config: 974 * field The field to vector add 975 */ 976 combinerMax(left, right, config) { 977 if (!(config.field in right)) { 978 return left; 979 } 980 let type = this._typeOf(right[config.field]); 981 if (!(config.field in left)) { 982 if (type === "map") { 983 left[config.field] = {}; 984 } else if (type === "array") { 985 left[config.field] = []; 986 } else if (type === "number") { 987 left[config.field] = 0; 988 } else { 989 return null; 990 } 991 } 992 if (type !== this._typeOf(left[config.field])) { 993 return null; 994 } 995 if (type === "map") { 996 Object.keys(right[config.field]).forEach(key => { 997 if ( 998 !(key in left[config.field]) || 999 right[config.field][key] > left[config.field][key] 1000 ) { 1001 left[config.field][key] = right[config.field][key]; 1002 } 1003 }); 1004 } else if (type === "array") { 1005 for (let i = 0; i < right[config.field].length; i++) { 1006 if (i < left[config.field].length) { 1007 if (left[config.field][i] < right[config.field][i]) { 1008 left[config.field][i] = right[config.field][i]; 1009 } 1010 } else { 1011 left[config.field].push(right[config.field][i]); 1012 } 1013 } 1014 } else if (type === "number") { 1015 if (left[config.field] < right[config.field]) { 1016 left[config.field] = right[config.field]; 1017 } 1018 } else { 1019 return null; 1020 } 1021 1022 return left; 1023 } 1024 1025 /** 1026 * Associates a value in right with another value in right. This association 1027 * is then stored in a map in left. 1028 * 1029 * For example: If a sequence of rights is: 1030 * { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 41 } 1031 * { 'tags': {}, 'url_domain': 'mbusa.com/mercedes', 'time': 21 } 1032 * { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 34 } 1033 * 1034 * Then assuming a 'sum' operation, left can build a map that would look like: 1035 * { 1036 * 'maseratiusa.com/maserati': 75, 1037 * 'mbusa.com/mercedes': 21, 1038 * } 1039 * 1040 * Fields: 1041 * left_field field in the left to store / update the map 1042 * right_key_field Field in the right to use as a key 1043 * right_value_field Field in the right to use as a value 1044 * operation One of "sum", "max", "overwrite", "count" 1045 */ 1046 combinerCollectValues(left, right, config) { 1047 let op; 1048 if (config.operation === "sum") { 1049 op = (a, b) => a + b; 1050 } else if (config.operation === "max") { 1051 op = (a, b) => (a > b ? a : b); 1052 } else if (config.operation === "overwrite") { 1053 op = (a, b) => b; 1054 } else if (config.operation === "count") { 1055 op = a => a + 1; 1056 } else { 1057 return null; 1058 } 1059 if (!(config.left_field in left)) { 1060 left[config.left_field] = {}; 1061 } 1062 if ( 1063 !(config.right_key_field in right) || 1064 !(config.right_value_field in right) 1065 ) { 1066 return left; 1067 } 1068 1069 let key = right[config.right_key_field]; 1070 let rightValue = right[config.right_value_field]; 1071 let leftValue = 0.0; 1072 if (key in left[config.left_field]) { 1073 leftValue = left[config.left_field][key]; 1074 } 1075 1076 left[config.left_field][key] = op(leftValue, rightValue); 1077 1078 return left; 1079 } 1080 1081 /** 1082 * Executes a recipe. Returns an object on success, or null on failure. 1083 */ 1084 executeRecipe(item, recipe) { 1085 let newItem = item; 1086 if (recipe) { 1087 for (let step of recipe) { 1088 let op = this.ITEM_BUILDER_REGISTRY[step.function]; 1089 if (op === undefined) { 1090 return null; 1091 } 1092 newItem = op.call(this, newItem, step); 1093 if (newItem === null) { 1094 break; 1095 } 1096 } 1097 } 1098 return newItem; 1099 } 1100 1101 /** 1102 * Executes a recipe. Returns an object on success, or null on failure. 1103 */ 1104 executeCombinerRecipe(item1, item2, recipe) { 1105 let newItem1 = item1; 1106 for (let step of recipe) { 1107 let op = this.ITEM_COMBINER_REGISTRY[step.function]; 1108 if (op === undefined) { 1109 return null; 1110 } 1111 newItem1 = op.call(this, newItem1, item2, step); 1112 if (newItem1 === null) { 1113 break; 1114 } 1115 } 1116 1117 return newItem1; 1118 } 1119 }