// import.cpp /** * Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "stdafx.h" #include "client/dbclient.h" #include "db/json.h" #include "tool.h" #include #include #include using namespace mongo; namespace po = boost::program_options; class Import : public Tool { enum Type { JSON , CSV , TSV }; Type _type; const char * _sep; bool _ignoreBlanks; bool _appendNumber( BSONObjBuilder& b , const string& fieldName , const string& data ){ if ( data.size() == 0 ) return false; unsigned int pos=0; if ( data[0] == '-' ) pos++; bool hasDec = false; for ( ; pos() , "type of file to import. default: json (json,csv,tsv)") ("file",po::value() , "file to import from; if not specified stdin is used" ) ("drop", "drop collection first " ) ; addPositionArg( "file" , 1 ); _type = JSON; _ignoreBlanks = false; } int run(){ string filename = getParam( "file" ); long long fileSize = -1; istream * in = &cin; ifstream file( filename.c_str() , ios_base::in | ios_base::binary); if ( filename.size() > 0 && filename != "-" ){ if ( ! exists( filename ) ){ cerr << "file doesn't exist: " << filename << endl; return -1; } in = &file; fileSize = file_size( filename ); } string ns; try { ns = getNS(); } catch (...) { printHelp(cerr); return -1; } log(1) << "ns: " << ns << endl; auth(); if ( hasParam( "drop" ) ){ cout << "dropping: " << ns << endl; conn().dropCollection( ns.c_str() ); } if ( hasParam( "ignoreBlanks" ) ){ _ignoreBlanks = true; } if ( hasParam( "type" ) ){ string type = getParam( "type" ); if ( type == "json" ) _type = JSON; else if ( type == "csv" ){ _type = CSV; _sep = ","; } else if ( type == "tsv" ){ _type = TSV; _sep = "\t"; } else { cerr << "don't know what type [" << type << "] is" << endl; return -1; } } if ( _type == CSV || _type == TSV ){ needFields(); } int errors = 0; int num = 0; time_t start = time(0); log(1) << "filesize: " << fileSize << endl; ProgressMeter pm( fileSize ); const int BUF_SIZE = 1024 * 1024 * 4; char line[ (1024 * 1024 * 4) + 128]; while ( *in ){ in->getline( line , BUF_SIZE ); log(1) << "got line:" << line << endl; char * buf = line; while( isspace( buf[0] ) ) buf++; int len = strlen( buf ); if ( ! len ) continue; if ( in->rdstate() == ios_base::eofbit ) break; assert( in->rdstate() == 0 ); try { BSONObj o = parseLine( buf ); conn().insert( ns.c_str() , o ); } catch ( std::exception& e ){ cout << "exception:" << e.what() << endl; cout << buf << endl; errors++; } num++; if ( pm.hit( len + 1 ) ){ cout << "\t\t\t" << num << "\t" << ( num / ( time(0) - start ) ) << "/second" << endl; } } cout << "imported " << num << " objects" << endl; if ( errors == 0 ) return 0; cerr << "encountered " << errors << " error" << ( errors == 1 ? "" : "s" ) << endl; return -1; } }; int main( int argc , char ** argv ) { Import import; return import.main( argc , argv ); }