Files
mongo/buildscripts/gdb/wt_dump_table.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

232 lines
6.8 KiB
Python
Raw Normal View History

import os
import sys
from pathlib import Path
from pprint import pprint
import bson
import gdb
if not gdb:
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo import lookup_type
DEBUGGING = False
"""
Public API to be called by users. The input `ident` is a string of the form:
'collection-2--4547167393143767234'.
From within gdb type:
python dump_pages_for_table('collection-2--4547167393143767234')
Some behaviors/limitations:
* Disk images of data are not deserialized into their separate key/value pairs.
* If update chain WT_UPDATEs are valid bson, the values will be parsed and output as BSON maps.
* If updates are not bson (e.g: index entries), they will be output as a raw byte array.
* WT_UPDATE structures have a pretty printer registered. Disabling pretty printers will result in
more raw output.
* Any `file:*.wt` can be output, e.g: `_mdb_catalog` or `WiredTiger`. Though the output may be less
supported/of lower quality.
"""
def dump_pages_for_table(ident):
conn_impl_type = lookup_type("WT_CONNECTION_IMPL")
if not conn_impl_type:
print(
"WT_CONNECTION_IMPL type not found. Try invoking this function from a different \
thread and frame."
)
return
conn_impl_ptr_type = conn_impl_type.pointer()
dbg("impl", conn_impl_ptr_type)
conn_ptr = None
try:
conn_ptr = gdb.parse_and_eval("session->iface->connection")
except gdb.error:
pass
if not conn_ptr or not conn_ptr.address:
print(
"Failed to find a suitable `WT_SESSION session` object to extract a connection object \
from. Try finding an eviction thread and frame, e.g: `__wt_evict_thread_run`. If the session is \
optimized out, try going up stack frames until the variable is in a local scope rather than a \
function input."
)
return
conn = conn_ptr.reinterpret_cast(conn_impl_ptr_type).dereference()
dbg("conn", conn)
data_handle, all_dhs = get_data_handle(conn, "file:{}.wt".format(ident))
if not data_handle:
print("Data handle not found for ident. Ident: `{}`".format(ident))
print("All known data handles:")
pprint(all_dhs)
return
dump_handle(data_handle)
# Private API.
def dbg(ident, var):
if not DEBUGGING:
return
print("----------")
if type(var) == gdb.Value:
print("{}: ({}*){}".format(ident, var.type, var.address))
else:
print(ident)
print(" " + str(type(var)))
methods = dir(var)
out = [name for name in methods if not name.startswith("__")]
for item in out:
print(" " + item)
if type(var) == gdb.Value:
print("\n Fields:")
print("\t" + "\n\t".join(str(var).split("\n")))
def walk_wt_list(lst):
ret = []
node = lst["tqh_first"]
dbg("node", node)
while True:
if not node:
break
ret.append(node.dereference())
node = node["q"]["tqe_next"]
return ret
def get_data_handle(conn, handle_name):
dbg("datahandles", conn["dhqh"])
ret = None
all_file_dhs = []
for handle in walk_wt_list(conn["dhqh"]):
if handle["name"].string().startswith("file:"):
all_file_dhs.append(handle["name"].string()[5:-3])
if handle["name"].string() == handle_name:
ret = handle
return ret, all_file_dhs
def get_btree_handle(dhandle):
btree = lookup_type("WT_BTREE").pointer()
return dhandle["handle"].reinterpret_cast(btree).dereference()
def dump_update_chain(update_chain):
while True:
if not update_chain:
print(" λ (End of update chain)")
break
dbg("update", update_chain)
wt_val = update_chain.dereference()
obj = None
dbg("wt_val", wt_val)
val_bytes = gdb.selected_inferior().read_memory(wt_val["data"], wt_val["size"])
can_bson = wt_val["type"] == 3
if can_bson:
try:
obj = bson.decode_all(val_bytes)[0]
except:
pass
print(" " + "\n ".join(str(wt_val).split("\n")) + " " + str(obj) + " =>")
update_chain = update_chain["next"]
def dump_insert_list(wt_insert):
key_struct = wt_insert["u"]["key"]
key = (
gdb.selected_inferior()
.read_memory(int(wt_insert.address) + key_struct["offset"], key_struct["size"])
.tobytes()
)
print("Key: " + str(key))
print("Value:")
update_chain = wt_insert["upd"]
dump_update_chain(update_chain)
def dump_skip_list(wt_insert_head):
if not wt_insert_head["head"].address:
return
wt_insert = wt_insert_head["head"][0]
idx = 0
while True:
if not wt_insert:
break
dump_insert_list(wt_insert.dereference())
dbg("insert" + str(idx), wt_insert.dereference())
idx += 1
wt_insert = wt_insert["next"][0]
def dump_modified(leaf_page):
print("Modify:")
if not leaf_page["modify"]:
print("No modifies")
return
leaf_modify = leaf_page["modify"].dereference()
dbg("modify", leaf_modify)
row_leaf_insert = leaf_modify["u2"]["row_leaf"]["insert"]
dbg("row store", row_leaf_insert)
if not row_leaf_insert:
print("No insert list")
else:
print("Insert list:")
dump_skip_list(row_leaf_insert.dereference().dereference())
row_leaf_update = leaf_modify["u2"]["row_leaf"]["update"]
if not row_leaf_update:
print("No update list")
else:
print("Update list:")
leaf_num_entries = int(leaf_page["entries"])
for i in range(0, leaf_num_entries):
dump_update_chain(row_leaf_update[i])
def dump_disk(leaf_page):
dbg("in-memory page:", leaf_page)
dsk = leaf_page["dsk"].dereference()
if int(dsk.address) == 0:
print("No page loaded from disk.")
return
dbg("on-disk page:", dsk)
wt_page_header_size = 28
wt_block_header_size = 12
page_bytes = (
gdb.selected_inferior()
.read_memory(
int(dsk.address) + wt_page_header_size + wt_block_header_size, int(dsk["mem_size"])
)
.tobytes()
)
print("Dsk:\n" + str(page_bytes))
def dump_handle(dhandle):
print("Dumping: " + dhandle["name"].string())
btree = get_btree_handle(dhandle)
root = btree["root"]
root_page = root["page"].dereference()
dbg("btree", btree)
dbg("root", btree["root"])
dbg("root page", root_page)
rpindex = root_page["u"]["intl"]["__index"].dereference()
leaf_num_entries = int(rpindex["entries"])
for idx in range(0, leaf_num_entries):
dbg("rpindex", rpindex)
dbg("rp-pre-index", rpindex["index"].dereference().dereference())
leaf_page = rpindex["index"][idx].dereference()["page"].dereference()
dbg("leaf", leaf_page)
dump_disk(leaf_page)
dump_modified(leaf_page)