Files
mongo/jstests/libs/load_ce_test_data.js
Timour Katchaounov 7979eacf75 SERVER-73979 Partition large random data files into chunks
Save randomly generated data into files split into chunks
with a limited number of documents per chunk. This is done
to avoid the 2GB limit on JS files imposed by Node.js.
2023-02-17 08:19:00 +00:00

78 lines
3.0 KiB
JavaScript

load("jstests/libs/ce_stats_utils.js");
/**
* Analyze all fields and create statistics.
* Create single-field indexes on the fields with indexed flag.
*/
function analyzeAndIndexEnabledFields(db, coll, fields) {
for (const field of fields) {
assert.commandWorked(db.runCommand({analyze: coll.getName(), key: field.fieldName}));
if (field.indexed) {
assert.commandWorked(coll.createIndex({[field.fieldName]: 1}));
}
}
}
/**
* Load a dataset described in the 'dbMetadata' global variable.
*/
function importDataset(dbName, dataDir, dbMetadata) {
const testDB = db.getSiblingDB(dbName);
print("Running mongoimport\n");
for (const collMetadata of dbMetadata) {
const collName = collMetadata.collectionName;
const coll = testDB[collName];
print(`Importing ${collName}\n`);
const restore_rc = runProgram('mongoimport',
'--db',
dbName,
'--verbose',
'--host',
'localhost:20000',
'--file',
`${dataDir}${collName}.dat`,
'--drop');
assert.eq(restore_rc, 0);
// Create single-field indexes and analyze each field.
analyzeAndIndexEnabledFields(testDB, coll, collMetadata.fields);
}
print("Done mongorestore\n");
}
/**
* Load a JSON dataset stored as an array of names of data files, where each file contains
* a variable that holds an object with the properties{collName, collData}.
* For instance:
* ce_data_20_1 = {collName: "ce_data_20",
* collData: [{"_id": 0, "uniform_int_0-1000-1": 899, ...}, ...]}
*/
function loadJSONDataset(db, dataSet, dataDir, dbMetadata) {
assert.commandWorked(
db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));
for (const collMetadata of dbMetadata) {
coll = db[collMetadata.collectionName];
coll.drop();
}
for (const chunkName of dataSet) {
chunkFilePath = `${dataDir}${chunkName}`;
print(`Loading chunk file: ${chunkFilePath}\n`);
load(chunkFilePath);
// At this point there is a variable named as the value of chunkName.
coll = eval(`db[${chunkName}.collName]`);
eval(`assert.commandWorked(coll.insertMany(${chunkName}.collData, {ordered: false}));`);
// Free the chunk memory after insertion into the DB
eval(`${chunkName} = null`);
}
// TODO: This is better done by the CE-testing script because it knows better what fields to
// analyze. Create single-field indexes and analyze each field. for (const collMetadata of
// dbMetadata) {
// print(`\nIndexing collection: ${collMetadata.collectionName}`);
// coll = db[collMetadata.collectionName];
// analyzeAndIndexEnabledFields(db, coll, collMetadata.fields);
// }
}