Files
mongo/jstests/libs/load_ce_test_data.js
Timour Katchaounov 3ade0b9fe3 SERVER-72236 Generate random integer data for CE
Generate random data with integers. The approach is as follows:
- There is one collection for each different cardinality. All collections contain the same fields.
- Each field contains the data generated from a certain data distribution. The data could be anything - same type, mixed types, same mathematical distribution (e.g. normal), or a mixed distribution.
- The committed configuration file, and the corresponding data file are reduced to only two small collections. For actual experiments one needs to add more data sizes, and re-generate the data locally. This is done so that Evergreen tests can run fast, and to reduce the size of the git repository.
- All data is saved in a single JavaScript file: jstests/query_golden/libs/data/ce_accuracy_test.data, with a corresponding schema file jstests/query_golden/libs/data/ce_accuracy_test.schema.
- The data file is a JavaScript file that can be loaded directly inside a JS test. When loading this file, it creates a global variable dataSet. The reason is that this is the only way to load an external JSON file that doesn't need to install external tools in Evergreen.
2023-01-10 12:51:54 +00:00

72 lines
2.6 KiB
JavaScript

load("jstests/libs/ce_stats_utils.js");
/**
* Analyze all fields and create statistics.
* Create single-field indexes on the fields with indexed flag.
*/
function analyzeAndIndexEnabledFields(db, coll, fields) {
for (const field of fields) {
assert.commandWorked(db.runCommand({analyze: coll.getName(), key: field.fieldName}));
if (field.indexed) {
assert.commandWorked(coll.createIndex({[field.fieldName]: 1}));
}
}
}
/**
* Load a dataset described in the 'dbMetadata' global variable.
*/
function importDataset(dbName, dataDir, dbMetadata) {
const testDB = db.getSiblingDB(dbName);
print("Running mongoimport\n");
for (const collMetadata of dbMetadata) {
const collName = collMetadata.collectionName;
const coll = testDB[collName];
print(`Importing ${collName}\n`);
const restore_rc = runProgram('mongoimport',
'--db',
dbName,
'--verbose',
'--host',
'localhost:20000',
'--file',
`${dataDir}${collName}.dat`,
'--drop');
assert.eq(restore_rc, 0);
// Create single-field indexes
analyzeAndIndexEnabledFields(testDB, coll, collMetadata.fields);
// TODO: Create compound indexes. I doubt we will need it for CE testing.
// for (indexFields of collMetadata.compound_indexes) {
//}
}
print("Done mongorestore\n");
}
/**
* Load a JSON dataset stored as an array of pairs of collection name, and data.
* For instance:
* [{collName: "physical_scan_5000", collData: [{_id: 3, field1: "some_string"}, ...]} ...]
*/
function loadJSONDataset(db, dataSet, dbMetadata) {
assert.commandWorked(
db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));
for (dataElem of dataSet) {
print(`\nInserting collection: ${dataElem.collName}`);
coll = db[dataElem.collName];
coll.drop();
assert.commandWorked(coll.insertMany(dataElem.collData, {ordered: false}));
}
// TODO: check that each dataSet field is present in collMetadata.
// Create single-field indexes
for (const collMetadata of dbMetadata) {
print(`\nIndexing collection: ${collMetadata.collectionName}`);
coll = db[collMetadata.collectionName];
analyzeAndIndexEnabledFields(db, coll, collMetadata.fields);
}
}