From f2920fa2a87ec8d11ee730faa3d0d47aec590b5e Mon Sep 17 00:00:00 2001 From: Aaron Date: Wed, 27 May 2009 14:31:23 -0400 Subject: [PATCH] sm encoding checkpoint --- dbtests/jstests.cpp | 24 ++++++------- scripting/engine_spidermonkey.cpp | 38 ++++++--------------- util/encoding.h | 56 ------------------------------- 3 files changed, 22 insertions(+), 96 deletions(-) delete mode 100644 util/encoding.h diff --git a/dbtests/jstests.cpp b/dbtests/jstests.cpp index a70d98f1864..e84ded38bd4 100644 --- a/dbtests/jstests.cpp +++ b/dbtests/jstests.cpp @@ -416,21 +416,21 @@ namespace JSTests { Encoding() { reset(); } ~Encoding() { reset(); } void run() { - string utf8ObjSpec = "{'_id':'\\u0001\\u007f\\u07ff\\uffff'}"; - BSONObj utf8Obj = fromjson( utf8ObjSpec ); - string code = string( "db.jstests.encoding.insert(" ) + utf8ObjSpec + ");"; + string unicodeSpec = "{'_id':'\\u0001\\u007f\\u07ff\\uffff'}"; +// BSONObj unicodeObj = fromjson( unicodeSpec ); + string code = string( "db.jstests.encoding.insert(" ) + unicodeSpec + ");"; cout << "code: " << code << endl; BSONObj info; BSONElement ret; - ASSERT( client.eval( "unittest", code, info, ret, 0 ) ); - char expected[] = { 1, 127, 0xC3, 0xBF, 0xC3, 0xBF, 0 }; // this is 1, 127, 255, 255 as utf-8 - ASSERT_EQUALS( string( expected ), client.findOne( "unittest.jstests.encoding", BSONObj() ).getStringField( "_id" ) ); - - reset(); - Scope * s = globalScriptEngine->createScope(); - s->localConnect( "unittest" ); - ASSERT( s->exec( code, "foo", true, true, true ) ); - ASSERT_EQUALS( string( expected ), client.findOne( "unittest.jstests.encoding", BSONObj() ).getStringField( "_id" ) ); + ASSERT( !client.eval( "unittest", code, info, ret, 0 ) ); +// char expected[] = { 1, 127, 0xC3, 0xBF, 0xC3, 0xBF, 0 }; // this is 1, 127, 255, 255 as utf-8 +// ASSERT_EQUALS( string( expected ), client.findOne( "unittest.jstests.encoding", BSONObj() ).getStringField( "_id" ) ); +// +// reset(); +// Scope * s = globalScriptEngine->createScope(); +// s->localConnect( "unittest" ); +// ASSERT( s->exec( code, "foo", true, true, true ) ); +// ASSERT_EQUALS( string( expected ), client.findOne( "unittest.jstests.encoding", BSONObj() ).getStringField( "_id" ) ); } private: void check( const BSONObj &one, const BSONObj &two ) { diff --git a/scripting/engine_spidermonkey.cpp b/scripting/engine_spidermonkey.cpp index 9afa3443cad..24a0cc86625 100644 --- a/scripting/engine_spidermonkey.cpp +++ b/scripting/engine_spidermonkey.cpp @@ -3,8 +3,6 @@ #include "stdafx.h" #include "engine_spidermonkey.h" -#include "../util/encoding.h" - #include "../client/dbclient.h" namespace mongo { @@ -75,22 +73,13 @@ namespace mongo { return new BSONFieldIterator( this ); } + class Convertor : boost::noncopyable { public: Convertor( JSContext * cx ){ _context = cx; } - static void smToMoStr( string &in ) { - if ( !JS_CStringsAreUTF8() ) - in = latin1ToUtf8( in ); - } - - static void moToSmStr( string &in ) { - if ( !JS_CStringsAreUTF8() ) - in = utf8ToLatin1( in ); - } - string toString( JSString * so ){ jschar * s = JS_GetStringChars( so ); size_t srclen = JS_GetStringLength( so ); @@ -106,7 +95,9 @@ namespace mongo { string ss( dst , len ); free( dst ); - smToMoStr( ss ); + if ( !JS_CStringsAreUTF8() ) + for( string::const_iterator i = ss.begin(); i != ss.end(); ++i ) + uassert( "non ascii character detected", (unsigned char)(*i) <= 127 ); return ss; } @@ -262,9 +253,7 @@ namespace mongo { return true; } - JSFunction * compileFunction( string code ){ - moToSmStr( code ); - + JSFunction * compileFunction( const char * code ){ if ( ! hasFunctionIdentifier( code ) ){ string s = code; if ( isSimpleStatement( s ) ){ @@ -303,9 +292,7 @@ namespace mongo { } jsval toval( const char * c ){ - string str( c ); - moToSmStr( str ); - JSString * s = JS_NewStringCopyZ( _context , str.c_str() ); + JSString * s = JS_NewStringCopyZ( _context , c ); assert( s ); return STRING_TO_JSVAL( s ); } @@ -321,7 +308,7 @@ namespace mongo { JSObject * o = toJSObject( obj , readOnly ); return OBJECT_TO_JSVAL( o ); } - + jsval toval( const BSONElement& e ){ switch( e.type() ){ @@ -382,9 +369,8 @@ namespace mongo { } flags++; } - string regex( e.regex() ); - moToSmStr( regex ); - JSObject * r = JS_NewRegExpObject( _context , (char*)regex.c_str() , regex.length() , flagNumber ); + + JSObject * r = JS_NewRegExpObject( _context , (char*)e.regex() , strlen( e.regex() ) , flagNumber ); assert( r ); return OBJECT_TO_JSVAL( r ); } @@ -420,9 +406,7 @@ namespace mongo { JSObject * getJSObject( JSObject * o , const char * name ){ jsval v; - string nameStr( name ); - moToSmStr( nameStr ); - assert( JS_GetProperty( _context , o , nameStr.c_str() , &v ) ); + assert( JS_GetProperty( _context , o , name , &v ) ); return JSVAL_TO_OBJECT( v ); } @@ -726,8 +710,6 @@ namespace mongo { assert( JS_DefineFunction( _context , _convertor->getGlobalPrototype( "Object" ) , "keySet" , object_keyset , 0 , JSPROP_READONLY ) ); - log() << "utf8? " << JS_CStringsAreUTF8() << endl; - _this = 0; } diff --git a/util/encoding.h b/util/encoding.h deleted file mode 100644 index 3152f75f083..00000000000 --- a/util/encoding.h +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Copyright (C) 2008 10gen Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#pragma once - -namespace mongo { - - string latin1ToUtf8( const string &in ) { - stringstream out; - for( size_t i = 0; i < in.size(); ++i ) { - unsigned char c = in[ i ]; - if ( c < 0x80 ) { - out << char( c ); - } else { - out << char( 0xc0 | ( c >> 6 ) ); - out << char( 0x80 | ( ~0xc0 & c ) ); - } - } - return out.str(); - } - - string utf8ToLatin1( const string &in ) { - stringstream out; - for( size_t i = 0; i < in.size(); ++i ) { - unsigned char c = in[ i ]; - if ( c < 0x80 ) { - out << char( c ); - } else if ( c < 0xC4 ) { - unsigned char first = c; - ++i; - massert( "invalid utf8 input", i < in.size() ); - unsigned char second = in[ i ]; - massert( "invalid utf8 input", second < 0xC0 ); - out << char( ( first << 6 ) | ( ~0xc0 & second ) ); - } else { - out << char( 255 ); // this is the value spidermonkey uses - for( ; i < in.size() && (unsigned char)in[ i ] > 0x7F; ++i ); - } - } - return out.str(); - } - -} // namespace mongo \ No newline at end of file