Files
mongo/jstests/with_mongot/e2e_lib/search_e2e_utils.js
Zac 591928c619 SERVER-108478 JS formatted by prettier and remove clang-format (#39656)
GitOrigin-RevId: 6c8f6aded47f260aa4f7c231b17dae3302cb1e04
2025-08-21 17:27:09 +00:00

498 lines
21 KiB
JavaScript

/**
* Contains common test utilities for e2e search tests involving mongot.
*/
import {stringifyArray} from "jstests/aggregation/extras/utils.js";
import {createSearchIndex, dropSearchIndex} from "jstests/libs/search.js";
import {getMovieData, getMovieDataWithEnrichedTitle} from "jstests/with_mongot/e2e_lib/data/movies.js";
import {getRentalData} from "jstests/with_mongot/e2e_lib/data/rentals.js";
import {assertViewAppliedCorrectly, assertViewNotApplied} from "jstests/with_mongot/e2e_lib/explain_utils.js";
/**
* This function is used in place of direct assertions between expected and actual document array
* results of search queries, because the search scores (and therefore orderings) of documents can
* differ for the same collection and query across different cluster configurations. The reason
* search scores can differ is that the search score for a document is influenced by which other
* documents are on the same rather than across all shards.
*
* This function allows the same test running across different cluster configuration to pass if the
* documents are in a "close enough" ordering to what is expected.
*
* @param {Object[]} expectedDocArr The expected array of documents in the expected order. Each
* document must have a "_id" key uniquely identifying it. No duplicate keys allowed.
* @param {Object[]} actualDocArr The actual array of documents that are being tested that they are
* in a "close enough" order. Each document must also have an "_id" key, and each id key in the
* expected array must appear in this actual array.
* @param {float} tolerancePercentage A floating point number between 0 and 1 (inclusive) that
* indicates by what percentage of the array length each document has an allowance to drift from
* its expected position by. i.e. Array length of 9 with a tolerance percentage of 0.3 means
* each document has an allowance of (0.3 * 9) = 3 positions. Note that the actual enforcement
* of fuzzing depends on the fuzzing strategy. For example, one strategy enforces that each
* document is within its individual allowance, whereas another allows documents to share their
* allowances in a global pool. Any tolerance greater than 0 will result in an allowance of at
* least 1. Other fractional numbers are rounded up or down to the nearest whole number.
* WARNING: large tolerancePercentages (generally above 0.5) are discouraged as a random
* ordering of docs has a reasonable chance of passing the fuzzing.
* @param {FuzzingStrategy} fuzzingStrategy one of a pre-set number of possible options that affect
* how fuzziness or "close enough"-ness is decided.
* 'EnforceTolerancePerDoc': a drift allowance / tolerance in terms of number of positions is
* computed based on the tolerancePercentage and array length. Then, each document in the actual
* array is checked that it is within this allowance. If any document in the actual array is
* out of its alloted tolerance, the entire assertion fails.
* 'ShareToleranceAcrossDocs': Similar to 'EnforceTolerancePerDoc' except that each documents
* drift / tolerance allowance can be shared with other documents by placing all the allowances
* in a global pool that all documents deduct from. This is useful in cases where the
* actual array is otherwise in a good order except for an outlier. This way the tolerance
* can be kept low, but an outlier can still be accepted, without opening up the tolerance to
* something needlessly large for all docs.
*/
export const defaultTolerancePercentage = 0.3;
export const FuzzingStrategy = Object.freeze({
EnforceTolerancePerDoc: 0,
ShareToleranceAcrossDocs: 1,
});
export const defaultFuzzingStrategy = FuzzingStrategy.EnforceTolerancePerDoc;
export function assertDocArrExpectedFuzzy(
expectedDocArr,
actualDocArr,
showVerboseResults = false,
tolerancePercentage = defaultTolerancePercentage,
fuzzingStrategy = defaultFuzzingStrategy,
) {
// Helper functions that stringify input arrays for developer observablity in assertion logs.
function stringifyExpectedArray(showFullArray) {
if (showFullArray) {
return stringifyArray(expectedDocArr, "expected");
}
return stringifyArray(
expectedDocArr.map((obj) => obj._id),
"expected",
);
}
function stringifyActualArray(showFullArray) {
if (showFullArray) {
return stringifyArray(actualDocArr, "actual");
}
return stringifyArray(
actualDocArr.map((obj) => obj._id),
"actual",
);
}
function stringifyArrays(showFullArray) {
return stringifyExpectedArray(showFullArray) + stringifyActualArray(showFullArray);
}
// Validate user inputs.
assert(Array.isArray(expectedDocArr), "'expectedDocArr' must be of type array.");
assert(Array.isArray(actualDocArr), "'actualDocArr' must be of type array.");
assert(
tolerancePercentage >= 0 && tolerancePercentage <= 1,
"'tolerancePercentage' must be between 0 and 1 (inclusive), but instead is: '" + tolerancePercentage + "'.",
);
assert(
fuzzingStrategy == FuzzingStrategy.EnforceTolerancePerDoc ||
fuzzingStrategy == FuzzingStrategy.ShareToleranceAcrossDocs,
"invalid FuzzingStrategy requested.",
);
// Results can never be as expected if array lengths don't match.
assert.eq(
expectedDocArr.length,
actualDocArr.length,
"expected and actual array lengths are not equal. Expected array len = '" +
expectedDocArr.length +
"' and actual array len = '" +
actualDocArr.length +
"'.\n" +
stringifyArrays(showVerboseResults),
);
// Construct a map about the known information of each doc in the expected array,
// searchable by id.
// This map is then used to enforce each document in the expected array is found in the actual
// array, and that there are no duplicates in the expected or actual array.
// First key is expected position for this id.
// Second key is if this id has been seen in the actual array to enforce no duplicates.
// Third key is the index in the actual array this document has already been seen at
// (if it has been seen)
let expectedDocMap = new Map();
for (let i = 0; i < expectedDocArr.length; i++) {
let id = expectedDocArr[i]["_id"];
assert.neq(
id,
undefined,
"'_id' field of document at index '" +
i +
"' in expected array is undefined.\n" +
"document with undefined key: " +
tojson(expectedDocArr[i]) +
"\n" +
stringifyExpectedArray(showVerboseResults),
);
// Ensure this key has never been seen (no duplicates allowed in expected array).
{
let expectedDocMapEntry = expectedDocMap.get(id);
// This conditional is placed so that the map entry can be accessed in the logging
// message when the assertion is triggered.
if (expectedDocMapEntry != undefined) {
let dupPos = expectedDocMapEntry.expectedPos;
assert.eq(
expectedDocMapEntry,
undefined,
"duplicate '_id' key of '" +
id +
"' found in expected array at indicies '" +
dupPos +
"' and '" +
i +
"'.\n" +
"duplicated document at index '" +
dupPos +
"': " +
tojson(expectedDocArr[dupPos]) +
"\n" +
"duplicated document at index '" +
i +
"': " +
tojson(expectedDocArr[i]) +
"\n" +
stringifyExpectedArray(showVerboseResults),
);
}
}
expectedDocMap.set(id, {expectedPos: i, seenInActual: false, actualPos: -1});
}
// Compute the discrete positional tolerance amount alloted per document.
// Depends on the tolerancePercentage and length of the input arrays.
let positionalTolerancePerDoc = 0;
if (tolerancePercentage != 0) {
// If tolerance percentage is not 0, positional tolerance per doc should be at least 1.
// Otherwise, round the resulting decimal to the nearest whole number.
positionalTolerancePerDoc = Math.max(Math.round(expectedDocArr.length * tolerancePercentage), 1);
}
// Helper function when the FuzzingStrategy is 'EnforceTolerancePerDoc'.
// Returns a boolean for if each doc is within positional tolerance.
function withinTolerance(expectedPos, actualPos) {
let lowerLimit = Math.max(0, expectedPos - positionalTolerancePerDoc);
let upperLimit = Math.min(expectedDocArr.length - 1, expectedPos + positionalTolerancePerDoc);
if (actualPos < lowerLimit || actualPos > upperLimit) {
return false;
}
return true;
}
// Variables / helper function when the FuzzingStrategy is 'ShareToleranceAcrossDocs'.
// Global counter for all the positional drifts all documents have jointly accumulated.
let totalPositionalDrift = 0;
// Total drift tolerance / cap the entire array must stay under (or at).
const positionalDriftTolerance = positionalTolerancePerDoc * expectedDocArr.length;
// Adds the incremental positional drift this document contributes to the global total.
function accumulatePositionalDrift(expectedPos, actualPos) {
totalPositionalDrift += Math.max(expectedPos, actualPos) - Math.min(expectedPos, actualPos);
}
// For each actual document check that this document should exist, is equal to its expected
// counterpart, and is within the alloted positional tolerance (depending on fuzzing strategy).
for (let i = 0; i < actualDocArr.length; i++) {
let actualId = actualDocArr[i]["_id"];
assert.neq(
actualId,
undefined,
"'_id' field of document at index '" +
i +
"' in actual array is not defined.\n" +
"document with undefined key: " +
tojson(actualDocArr[i]) +
"\n" +
stringifyActualArray(showVerboseResults),
);
let expectedDocEntry = expectedDocMap.get(actualId);
assert.neq(
expectedDocEntry,
undefined,
"actual array document with '_id' of '" +
actualId +
"' at index '" +
i +
"' is not found in expected array.\n" +
"document with not found id in expected array: " +
tojson(actualDocArr[i]) +
"\n" +
stringifyArrays(showVerboseResults),
);
assert(
!expectedDocEntry.seenInActual,
"duplicate '_id' key of '" +
actualId +
"' found in actual array at indices '" +
expectedDocEntry.actualPos +
"' and " +
"'" +
i +
"'.\n" +
"duplicate document at index '" +
expectedDocEntry.actualPos +
"': " +
tojson(actualDocArr[expectedDocEntry.actualPos]) +
"\n" +
"duplicate document at index '" +
i +
"': " +
tojson(actualDocArr[i]) +
"\n" +
stringifyActualArray(showVerboseResults),
);
// Set this entry back as seen for future duplication checks.
expectedDocMap.set(actualId, {pos: expectedDocEntry.expectedPos, seenInActual: true, actualPos: i});
// Ensure that the entire actual document matches the expected document.
assert.docEq(
expectedDocArr[expectedDocEntry.expectedPos],
actualDocArr[i],
"document with '_id' of '" +
actualId +
"' does not match the fields of its expected document counterpart.\n" +
"expected array doc at index '" +
expectedDocEntry.expectedPos +
"': " +
tojson(expectedDocArr[expectedDocEntry.expectedPos]) +
"\n" +
"actual array doc at index '" +
i +
"': " +
tojson(actualDocArr[i]) +
"\n",
);
// Tolerance check depends on fuzzing strategy.
if (fuzzingStrategy == FuzzingStrategy.EnforceTolerancePerDoc) {
// This document must individually be within its positional tolerance.
assert(
withinTolerance(expectedDocEntry.expectedPos, i),
"actual array document with '_id' of '" +
actualId +
"' at index '" +
i +
"' is not within the tolerance of its associated expected document " +
"at expected array index '" +
expectedDocEntry.expectedPos +
"'. The tolerance amount is '" +
positionalTolerancePerDoc +
"' position(s).\n" +
stringifyArrays(showVerboseResults),
);
} else if (fuzzingStrategy == FuzzingStrategy.ShareToleranceAcrossDocs) {
accumulatePositionalDrift(expectedDocEntry.expectedPos, i);
// Assert total positional drift is under tolerance once all docs have been
// computed so that the toal gap between needed and actual drift can be
// reported upon assertion.
}
}
if (fuzzingStrategy == FuzzingStrategy.ShareToleranceAcrossDocs) {
// Total positional drift aggregated across all documents has been computed.
assert.lte(
totalPositionalDrift,
positionalDriftTolerance,
"total positional drift across all docs is above the alloted tolerance. " +
"Total positional drift is '" +
totalPositionalDrift +
"', but the alloted tolerance is '" +
positionalDriftTolerance +
"'.\n" +
stringifyArrays(showVerboseResults),
);
}
// All assertions passed.
// Expected and actual arrays have the same documents in a "close enough" ordering.
}
/**
* Blocks the execution of this thread until we can see the document with the given _id in the
* result set for the given query. It is expected that the caller has already inserted this document
* into the colleciton. This is expected to be used if you want to alter the data in any $search or
* $vectorSearch index, since they are eventaully consistent.
*
* It is important to see the doc with the given ID _via_ some specific $search or $vectorSearch
* query of interest, since we want the document to be visible in that search's specific index -
* which is replicated on its own schedule.
*
* @param {*} docId The target "_id" value for the document you want to see replicated.
* @param {Collection} coll The Collection object that should hold this document. It is expected
* that the collection already has this document, but it may not yet be replicated to a search
* index.
* @param {Object[]} queryPipeline A pipeline with a $search or $vectorSearch stage which we want to
* later use to examine this document.
*/
export function waitUntilDocIsVisibleByQuery({docId, coll, queryPipeline}) {
assert.soon(() => coll.aggregate(queryPipeline.concat([{$match: {_id: docId}}])).itcount() === 1);
}
export const datasets = {
MOVIES: {id: 1, indexName: "moviesIndex"},
RENTALS: {id: 2},
MOVIES_WITH_ENRICHED_TITLE: {id: 3, viewName: "moviesWithEnrichedTitle", indexName: "moviesWithEnrichedTitleIndex"},
ACTION_MOVIES: {id: 4, viewName: "actionMovies", indexName: "actionMoviesIndex"},
// Nested view.
ACTION_MOVIES_WITH_ENRICHED_TITLE: {
id: 5,
viewName: "actionMoviesWithEnrichedTitle",
indexName: "actionMoviesWithEnrichedTitleIndex",
},
};
/**
* @param idArray is an array of _ids used to build an array of documents.
* @param dataset is an element of the 'datasets' enum used to determine which dataset the results
* belong to.
*
* @returns An array of documents from a dataset.
*/
export function buildExpectedResults(idArray, dataset) {
let results = [];
let data = [];
if (dataset === datasets.MOVIES) {
data = getMovieData();
} else if (dataset === datasets.RENTALS) {
data = getRentalData();
} else if (dataset === datasets.MOVIES_WITH_ENRICHED_TITLE) {
data = getMovieDataWithEnrichedTitle();
}
for (const id of idArray) {
results.push(data[id]);
}
return results;
}
/**
* Creates one or more search indexes with the specified storedSource option attached and returns a
* cleanup function to delete the search indexes.
*
* @param {Object|Array} config - Either a single {collection, definition} object or an array of
* such objects.
* @param {boolean} isStoredSource - Whether storedSource should be enabled on the search indexes.
* @returns {Function} A unified cleanup function for all created indexes.
*/
export function createSearchIndexesWithCleanup(config, isStoredSource = true) {
// Normalize input to array format.
const configs = Array.isArray(config) ? config : [config];
const cleanupFunctions = [];
configs.forEach(({coll, definition}) => {
// Deep copy to avoid modifying the original.
const indexDef = JSON.parse(JSON.stringify(definition));
// Ensure required structure exists.
if (!indexDef.definition) {
indexDef.definition = {};
}
if (!indexDef.definition.mappings) {
indexDef.definition.mappings = {dynamic: true};
}
// Set storedSource value.
indexDef.definition.storedSource = isStoredSource;
// Create the index.
createSearchIndex(coll, indexDef);
// Add cleanup function.
cleanupFunctions.push(() => {
dropSearchIndex(coll, {name: indexDef.name});
});
});
// Return a unified cleanup function.
return () => {
cleanupFunctions.forEach((cleanupFn) => {
cleanupFn();
});
};
}
/**
* Executes a test function with search indexes two times: once with storedSource and once without.
* Cleanup of search indexes will occur even if a test fails. This utility encapsulates the common
* try/finally pattern used in search-on-view tests.
*
* @param {Object|Array} indexConfig - Index configuration to pass to
* createSearchIndexesWithCleanup.
* @param {Function} testFn - The test function to execute with the created indexes. This function
* must take in one parameter which specifies whether the tests are storedSource or not.
*/
export function createSearchIndexesAndExecuteTests(indexConfig, testFn, runWithStoredSource = true) {
// Create indexes with cleanup function.
const cleanup = createSearchIndexesWithCleanup(indexConfig);
try {
if (runWithStoredSource) {
testFn(true);
}
testFn(false);
} finally {
cleanup();
}
}
/**
* Executes a search pipeline and handles validation based on storedSource setting and specified
* explain validation function.
*
* @param {Object} coll - The collection to query.
* @param {Array} userPipeline - User pipeline to run on the collection.
* @param {boolean} isStoredSource - Whether storedSource is enabled.
* @param {Array} viewPipeline - Optional view pipeline's definition for validation.
* @param {Function} explainValidationFn - Optional additional function to validate explain output.
*/
export function validateSearchExplain(
coll,
userPipeline,
isStoredSource,
viewPipeline = null,
explainValidationFn = null,
) {
const explain = assert.commandWorked(coll.explain().aggregate(userPipeline));
// If coll is a view, assert that the view is applied correctly based on the storedSource value
// specified.
if (viewPipeline) {
if (isStoredSource) {
assertViewNotApplied(explain, userPipeline, viewPipeline);
} else {
assertViewAppliedCorrectly(explain, userPipeline, viewPipeline);
}
}
// Validate explain output if a function was provided.
if (explainValidationFn) {
explainValidationFn(explain);
}
}
/**
* @param {*} coll - The collection to check for an existing index.
* @param {*} indexName - The name of the index to check for.
* @returns True if the index exists and is queryable, false otherwise.
*/
export function checkForExistingIndex(coll, indexName) {
const initial = coll.aggregate([{$listSearchIndexes: {name: indexName}}]).toArray();
if (initial.length === 1) {
if (initial[0].queryable === true) {
return true;
}
// Wait for the index to be queryable.
assert.soon(() => {
const curr = coll.aggregate([{$listSearchIndexes: {name: indexName}}]).toArray();
assert.eq(curr.length, 1, curr);
return curr[0].queryable;
});
return true;
}
return false;
}