Files
mongo/lang/python/wiredtiger/packing.py

209 lines
6.9 KiB
Python

#!/usr/bin/env python
#
# Public Domain 2014-2015 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# WiredTiger variable-length packing and unpacking functions
"""Packing and unpacking functions
The format string uses the following conversions:
Format Python Notes
x N/A pad byte, no associated value
b int signed byte
B int unsigned byte
h int signed 16-bit
H int unsigned 16-bit
i int signed 32-bit
I int unsigned 32-bit
l int signed 32-bit
L int unsigned 32-bit
q int signed 64-bit
Q int unsigned 64-bit
r int record number
s str fixed-length string
S str NUL-terminated string
t int fixed-length bit field
u str raw byte array
"""
from intpacking import pack_int, unpack_int
def __get_type(fmt):
if not fmt:
return None, fmt
# Variable-sized encoding is the default (and only supported format in v1)
if fmt[0] in '.@<>':
tfmt = fmt[0]
fmt = fmt[1:]
else:
tfmt = '.'
return tfmt, fmt
def __unpack_iter_fmt(fmt):
size = 0
havesize = 0
for offset, char in enumerate(fmt):
if char.isdigit():
size = (size * 10) + int(char)
havesize = 1
else:
yield offset, havesize, size, char
size = 0
havesize = 0
def unpack(fmt, s):
tfmt, fmt = __get_type(fmt)
if not fmt:
return ()
if tfmt != '.':
raise ValueError('Only variable-length encoding is currently supported')
result = []
for offset, havesize, size, f in __unpack_iter_fmt(fmt):
if f == 'x':
if not havesize:
size = 1
s = s[size:]
# Note: no value, don't increment i
elif f in 'SsUu':
if not havesize:
if f == 's':
size = 1
elif f == 'S':
size = s.find('\0')
elif f == 'u' and offset == len(fmt) - 1:
size = len(s)
else:
# Note: 'U' is used internally, and may be exposed to us.
# It indicates that the size is always stored unless there
# is a size in the format.
size, s = unpack_int(s)
result.append(s[:size])
if f == 'S' and not havesize:
size += 1
s = s[size:]
elif f in 't':
# bit type, size is number of bits
if not havesize:
size = 1
result.append(ord(s[0:1]))
s = s[1:]
elif f in 'Bb':
# byte type
if not havesize:
size = 1
for i in xrange(size):
v = ord(s[0:1])
if f != 'B':
v -= 0x80
result.append(v)
s = s[1:]
else:
# integral type
if not havesize:
size = 1
for j in xrange(size):
v, s = unpack_int(s)
result.append(v)
return result
def __pack_iter_fmt(fmt, values):
index = 0
for offset, havesize, size, char in __unpack_iter_fmt(fmt):
if char == 'x': # padding no value
yield offset, havesize, size, char, None
elif char in 'SsUut':
yield offset, havesize, size, char, values[index]
index += 1
else: # integral type
size = size if havesize else 1
for i in xrange(size):
value = values[index]
yield offset, havesize, 1, char, value
index = index + 1
def pack(fmt, *values):
tfmt, fmt = __get_type(fmt)
if not fmt:
return ()
if tfmt != '.':
raise ValueError('Only variable-length encoding is currently supported')
result = ''
i = 0
for offset, havesize, size, f, val in __pack_iter_fmt(fmt, values):
if f == 'x':
if not havesize:
result += '\0'
else:
result += '\0' * size
# Note: no value, don't increment i
elif f in 'SsUu':
if f == 'S' and '\0' in val:
l = val.find('\0')
else:
l = len(val)
if havesize:
if l > size:
l = size
elif f == 's':
havesize = size = 1
elif (f == 'u' and offset != len(fmt) - 1) or f == 'U':
result += pack_int(l)
if type(val) is unicode and f in 'Ss':
result += str(val[:l])
else:
result += val[:l]
if f == 'S' and not havesize:
result += '\0'
elif size > l:
result += '\0' * (size - l)
elif f in 't':
# bit type, size is number of bits
if not havesize:
size = 1
if size > 8:
raise ValueError("bit count cannot be greater than 8 for 't' encoding")
mask = (1 << size) - 1
if (mask & val) != val:
raise ValueError("value out of range for 't' encoding")
result += chr(val)
elif f in 'Bb':
# byte type
if not havesize:
size = 1
for i in xrange(size):
if f == 'B':
v = val
else:
# Translate to maintain ordering with the sign bit.
v = val + 0x80
if v > 255 or v < 0:
raise ValueError("value out of range for 'B' encoding")
result += chr(v)
else:
# integral type
result += pack_int(val)
return result