Compare commits

...

1 Commits

Author SHA1 Message Date
Jackson Xie
706f319fd9 SERVER-57643: Create Basic Decoder and Encoder Functions 2021-06-16 14:14:38 +00:00
4 changed files with 276 additions and 0 deletions

View File

@@ -14,15 +14,27 @@ env.Library(
],
)
env.Library(
target='simple8b',
source=[
'simple8b.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
],
)
env.CppUnitTest(
target='bson_util_test',
source=[
'bson_check_test.cpp',
'bson_extract_test.cpp',
'builder_test.cpp',
'simple8b_test.cpp'
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
'bson_extract',
'simple8b',
],
)

View File

@@ -0,0 +1,95 @@
/**
* Copyright (C) 2021-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/bson/util/simple8b.h"
namespace mongo {
namespace {
static constexpr uint8_t _maxSelector = 15;
static constexpr uint8_t _minSelector = 2;
static constexpr uint64_t _selectorMask = 0xF000000000000000;
static constexpr uint8_t _selectorSize = 4;
static constexpr uint8_t _dataSize = 60;
// Pass the selector as the index to get the corresponding mask.
// Get the maskSize by getting the number of bits for the selector. Then 2^maskSize - 1.
const uint64_t _selectorForMask[16] = {0, 0, 1, 3, 7, 15, 31, 63, 127, 255, 1023, 4095, 32767, 1048575, 1073741823, 1152921504606846975};
// Pass the selector value as the index to get the number of bits per integer in the Simple8b block.
const uint8_t _selectorForBitsPerInteger[16] = {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 60};
// Pass the selector value as the index to get the number of integers coded in the Simple8b block.
const uint8_t _selectorForIntegersCoded[16] = {240, 120, 60, 30, 20, 15, 12, 10, 8, 7, 6, 5, 4, 3, 2, 1};
}
uint64_t Simple8b::encodeSimple8b(uint8_t selector, const std::vector<uint64_t>& values) {
if (selector > _maxSelector || selector < _minSelector)
return errCode;
uint8_t bitsPerInteger = _selectorForBitsPerInteger[selector];
uint8_t integersCoded = _selectorForIntegersCoded[selector];
uint64_t encodedWord = (uint64_t) selector << _dataSize;
for (uint8_t i = 0; i < integersCoded; ++i) {
uint8_t shiftSize = _dataSize - bitsPerInteger * (i + 1);
encodedWord += values[i] << shiftSize;
}
return encodedWord;
}
std::vector<uint64_t> Simple8b::decodeSimple8b(const uint64_t simple8bWord) {
std::vector<uint64_t> values;
uint8_t selector = (simple8bWord & _selectorMask) >> _dataSize;
if (selector < _minSelector)
return values;
uint8_t bitsPerInteger = _selectorForBitsPerInteger[selector];
uint8_t integersCoded = _selectorForIntegersCoded[selector];
for (int8_t i = integersCoded - 1; i >= 0; --i) {
uint8_t startIdx = bitsPerInteger * i;
uint64_t mask = _selectorForMask[selector] << startIdx;
uint64_t value = (simple8bWord & mask) >> startIdx;
values.push_back(value);
}
return values;
}
} // namespace mongo

View File

@@ -0,0 +1,64 @@
/**
* Copyright (C) 2021-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include <cmath>
#include <iostream>
#include <vector>
namespace mongo {
/**
* As of now, Simple8b is a static class that can encode a series of integers and a selector value
* into a single 64 bit Simple8b word.
*/
class Simple8b {
public:
// A valid Simple8b should never have a selector value of 1, so we will use it as an error code.
static const uint64_t errCode = 0x1000000000000000;
/**
* encodeSimple8b takes a selector and a vector of integers to be compressed into a 64 bit word.
* If there are wasted bits, it will be at the very right side.
* For now, we will assume that all ints in the vector are greater or equal to zero.
* We will also assume that the selector and all values will fit into the 64 bit word.
* Returns the encoded Simple8b word if the inputs are valid and errCode otherwise.
*/
static uint64_t encodeSimple8b(uint8_t selector, const std::vector<uint64_t>& values);
/**
* decodeSimple8b decodes a simple8b word into a vector of integers.
* Only when the selector is invalid will the returned vector be empty.
*/
static std::vector<uint64_t> decodeSimple8b(uint64_t simple8bWord);
};
} // namespace mongo

View File

@@ -0,0 +1,105 @@
/**
* Copyright (C) 2021-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include <vector>
#include "mongo/bson/util/simple8b.h"
#include "mongo/unittest/unittest.h"
using namespace mongo;
void areVectorsEqual(std::vector<uint64_t> actualVector, std::vector<uint64_t> expectedVector) {
ASSERT_EQ(actualVector.size(), expectedVector.size());
for (unsigned i = 0; i < actualVector.size(); ++i) {
ASSERT_EQ(actualVector[i], expectedVector[i]);
}
}
TEST(Simple8b, EncodeOneValueTest) {
uint8_t selector = 15;
std::vector<uint64_t> values = {1};
uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
ASSERT_EQUALS(simple8bWord, 0xF000000000000001);
}
TEST(Simple8b, DecodeOneValueTest) {
uint64_t simple8bWord = 0xF000000000000001;
std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
std::vector<uint64_t> expectedValues = {1};
areVectorsEqual(values, expectedValues);
}
TEST(Simple8b, EncodeMultipleValuesTest) {
uint8_t selector = 13;
std::vector<uint64_t> values = {1, 2, 3};
uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
ASSERT_EQUALS(simple8bWord, 0xD000010000200003);
}
TEST(Simple8b, DecodeMultipleValuesTest) {
uint64_t simple8bWord = 0xD000010000200003;
std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
std::vector<uint64_t> expectedValues = {1, 2, 3};
areVectorsEqual(values, expectedValues);
}
TEST(Simple8b, EncodeMaxValuesTest) {
uint8_t selector = 2;
std::vector<uint64_t> values(60, 1);
uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
ASSERT_EQUALS(simple8bWord, 0x2FFFFFFFFFFFFFFF);
}
TEST(Simple8b, DecodeMaxValuesTest) {
uint64_t simple8bWord = 0x2FFFFFFFFFFFFFFF;
std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
std::vector<uint64_t> expectedValues(60, 1);
areVectorsEqual(values, expectedValues);
}
TEST(Simple8b, InvalidEncodeOneSelectorTest) {
uint8_t selector = 0;
std::vector<uint64_t> values = {};
uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
ASSERT_EQUALS(simple8bWord, 0x1000000000000000);
}
TEST(Simple8b, InvalidEncodeSixteenSelectorTest) {
uint8_t selector = 16;
std::vector<uint64_t> values = {};
uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
ASSERT_EQUALS(simple8bWord, 0x1000000000000000);
}
TEST(Simple8b, InvalidDecodeOneSelectorTest) {
uint64_t simple8bWord = 0x1000000000000000;
std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
ASSERT_EQUALS(values.size(), 0);
}