# Copyright (C) 2022-present MongoDB, Inc. # # This program is free software: you can redistribute it and/or modify # it under the terms of the Server Side Public License, version 1, # as published by MongoDB, Inc. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # Server Side Public License for more details. # # You should have received a copy of the Server Side Public License # along with this program. If not, see # . # # As a special exception, the copyright holders give permission to link the # code of portions of this program with the OpenSSL library under certain # conditions as described in each individual source file and distribute # linked combinations including the program with the OpenSSL library. You # must comply with the Server Side Public License in all respects for # all of the code used other than as permitted herein. If you modify file(s) # with this exception, you may extend this exception to your version of the # file(s), but you are not obligated to do so. If you do not wish to do so, # delete this exception statement from your version. If you delete this # exception statement from all source files in the program, then also delete # it in the license file. # """Data generation entry point.""" import asyncio import dataclasses import json import os import subprocess from pathlib import Path from bson.json_util import dumps from config import CollectionTemplate, FieldTemplate, DataType from data_generator import CollectionInfo, DataGenerator from database_instance import DatabaseInstance import parameters_extractor #from ce_generate_data_settings import database_config, data_generator_config from ce_data_settings import database_config, data_generator_config __all__ = [] class CollectionTemplateEncoder(json.JSONEncoder): def default(self, o): if isinstance(o, CollectionTemplate): collections = [] for card in o.cardinalities: name = f'{o.name}_{card}' collections.append( dict(collectionName=name, fields=o.fields, compound_indexes=o.compound_indexes, cardinality=card)) return collections elif isinstance(o, FieldTemplate): return dict(fieldName=o.name, data_type=o.data_type, indexed=o.indexed) elif isinstance(o, DataType): return o.name.lower() # Let the base class default method raise the TypeError return super(CollectionTemplateEncoder, self).default(o) class OidEncoder(json.JSONEncoder): cur_oid = -1 def default(self, o): # TODO: doesn't work, what is the type of ObjectIds? #if isinstance(o, OectId): if hasattr(o, '__str__'): # This will handle ObjectIds #return str(o) this is the real OID of the document # Replace the OID with a consequtive int number as needed by the query generator OidEncoder.cur_oid += 1 return OidEncoder.cur_oid return super(OidEncoder, self).default(o) async def dump_collection_to_json(db, dump_path, database_name, collections): with open(Path(dump_path) / f'{database_name}.data', "w") as data_file: data_file.write('// This is a generated file.\n') data_file.write('const dataSet = [\n') coll_pos = 1 for coll_name in collections: collection = db[coll_name] doc_count = await collection.count_documents({}) doc_pos = 1 data_file.write(f'{{collName: "{coll_name}", collData: [\n') async for doc in collection.find({}): #data_file.write(dumps(doc)) data_file.write(json.dumps(doc, cls=OidEncoder)) if doc_pos < doc_count: data_file.write(',') data_file.write("\n") doc_pos += 1 data_file.write(']}') if coll_pos < len(collections): data_file.write(",") data_file.write("]\n") async def main(): """Entry point function.""" script_directory = os.path.abspath(os.path.dirname(__file__)) os.chdir(script_directory) # 1. Database Instance provides connectivity to a MongoDB instance, it loads data optionally # from the dump on creating and stores data optionally to the dump on closing. with DatabaseInstance(database_config) as database_instance: # 2. Generate random data and populate collections with it. old_db_collections = await database_instance.database.list_collection_names() for coll_name in old_db_collections: collection = database_instance.database[coll_name] collection.drop() generator = DataGenerator(database_instance, data_generator_config) await generator.populate_collections() # 3. Export all collections in the database into json files. db_collections = await database_instance.database.list_collection_names() #for coll_name in db_collections: # subprocess.run([ # 'mongoexport', f'--db={database_config.database_name}', f'--collection={coll_name}', # f'--out={coll_name}.dat' # ], cwd=database_config.dump_path, check=True) await dump_collection_to_json(database_instance.database, database_config.dump_path, database_config.database_name, db_collections) # 4. Export the collection templates used to create the test collections into JSON file with open(Path(database_config.dump_path) / f'{database_config.database_name}.schema', "w") as metadata_file: collections = [] for coll_template in data_generator_config.collection_templates: for card in coll_template.cardinalities: name = f'{coll_template.name}_{card}' collections.append( dict(collectionName=name, fields=coll_template.fields, compound_indexes=coll_template.compound_indexes, cardinality=card)) json_metadata = json.dumps(collections, indent=4, cls=CollectionTemplateEncoder) metadata_file.write("// This is a generated file.\nconst dbMetadata = ") metadata_file.write(json_metadata) metadata_file.write(";") print("DONE!") if __name__ == '__main__': loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) asyncio.run(main())