Skip to content

Commit

Permalink
many changes
Browse files Browse the repository at this point in the history
* fix bugs in traceConv scripts and improve usability
* add lcs_reader.py
* change block trace to use LBN as object_id
  • Loading branch information
1a1a11a committed Dec 16, 2024
1 parent 082b3b0 commit 1deb8c3
Show file tree
Hide file tree
Showing 10 changed files with 809 additions and 214 deletions.
189 changes: 133 additions & 56 deletions scripts/lcs_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,27 +74,64 @@


# typedef struct __attribute__((packed)) lcs_req_v3 {
# int64_t clock_time;
# uint32_t clock_time;
# uint64_t obj_id;
# int64_t obj_size;
# uint32_t op : 8;
# uint32_t tenant : 24;
# int32_t ttl;
# int64_t next_access_vtime;
# } lcs_req_v3_t;


import struct

from utils.const import OP_NAMES

LCS_FORMAT_NAME = [None] + [f"lcs_v{i}" for i in range(1, 9)]

LCS_FORMAT_STR = [
None,
"<IQIq",
"<IQIIq",
"<IQqIIq",
"<IQqIIqI",
"<IQqIIqII",
"<IQqIIqIIII",
"<IQqIIqIIIIIIII",
"<IQqIIq" + "I" * 16,
]

BASIC_INFO = [
"clock_time",
"obj_id",
"obj_size",
"ttl",
"op",
"tenant",
"next_access_vtime",
]


LCS_REQUEST_HEADER = [
None,
["clock_time", "obj_id", "obj_size", "next_access_vtime"],
["clock_time", "obj_id", "obj_size", "op", "tenant", "next_access_vtime"],
BASIC_INFO,
BASIC_INFO[:-1] + [f"feature_{i}" for i in range(1)] + BASIC_INFO[-1:],
BASIC_INFO[:-1] + [f"feature_{i}" for i in range(2)] + BASIC_INFO[-1:],
BASIC_INFO[:-1] + [f"feature_{i}" for i in range(4)] + BASIC_INFO[-1:],
BASIC_INFO[:-1] + [f"feature_{i}" for i in range(8)] + BASIC_INFO[-1:],
BASIC_INFO[:-1] + [f"feature_{i}" for i in range(16)] + BASIC_INFO[-1:],
]

LCS_HEADER_SIZE = 1024 * 8
LCS_TRACE_STAT_SIZE = 1000 * 8
LCS_STRAT_MAGIC = 0x123456789ABCDEF0
LCS_END_MAGIC = 0x123456789ABCDEF0
N_MOST_COMMON = 16

N_MOST_COMMON_PRINT = 4

def parse_stat(b, print_stat=True):

# basic info
(
ver,
Expand Down Expand Up @@ -130,9 +167,34 @@ def parse_stat(b, print_stat=True):
)
skewness = struct.unpack("<d", b[544 : 544 + 8])[0]

# tenant
n_tenant = struct.unpack(
"<I", b[544 + 8 : 544 + 8 + 4]
)[0]
most_common_tenant = struct.unpack(
"<" + "I" * N_MOST_COMMON, b[556 : 556 + N_MOST_COMMON * 4]
)
most_common_tenant_ratio = struct.unpack(
"<" + "f" * N_MOST_COMMON, b[620 : 620 + N_MOST_COMMON * 4]
)

# ttl
n_ttl, smallest_ttl, largest_ttl = struct.unpack(
"<III", b[684: 684 + 12]
)
most_common_ttl = struct.unpack(
"<" + "I" * N_MOST_COMMON, b[696 : 696 + N_MOST_COMMON * 4]
)
most_common_ttl_ratio = struct.unpack(
"<" + "f" * N_MOST_COMMON, b[760 : 760 + N_MOST_COMMON * 4]
)



if print_stat:
print("####################### trace stat ########################")
print(
f"version: {ver}, n_req: {n_req}, n_obj: {n_obj}, n_req_byte: {n_req_byte}, n_obj_byte: {n_obj_byte}"
f"stat version: {ver}, n_req: {n_req}, n_obj: {n_obj}, n_req_byte: {n_req_byte}, n_obj_byte: {n_obj_byte}"
)
print(
f"start_ts: {start_ts}, end_ts: {end_ts}, duration: {(end_ts-start_ts)/86400:.2f} days, n_read: {n_read}, n_write: {n_write}, n_delete: {n_delete}"
Expand All @@ -141,22 +203,40 @@ def parse_stat(b, print_stat=True):
f"smallest_obj_size: {smallest_obj_size}, largest_obj_size: {largest_obj_size}"
)
print(f"most_common_obj_sizes: ", end="")
for i in range(N_MOST_COMMON):
for i in range(N_MOST_COMMON_PRINT):
if most_common_obj_size_ratio[i] == 0:
break
print(
f"{most_common_obj_sizes[i]}({most_common_obj_size_ratio[i]:.4f}), ",
end="",
)
print()
print("....")

print(f"highest_freq: {highest_freq}, skewness: {skewness:.4f}")
print(f"most_common_freq: ", end="")
for i in range(N_MOST_COMMON):
for i in range(N_MOST_COMMON_PRINT):
if most_common_freq_ratio[i] == 0:
break
print(f"{most_common_freq[i]}({most_common_freq_ratio[i]:.4f}), ", end="")
print()
print("....")

if n_tenant > 0:
print(f"n_tenant: {n_tenant}, most_common_tenant: ", end="")
for i in range(N_MOST_COMMON_PRINT):
if most_common_tenant_ratio[i] == 0:
break
print(f"{most_common_tenant[i]}({most_common_tenant_ratio[i]:.4f}), ", end="")
print("....")

if n_ttl > 1:
print(f"n_ttl: {n_ttl}, smallest_ttl: {smallest_ttl}, largest_ttl: {largest_ttl}")
print(f"most_common_ttl: ", end="")
for i in range(N_MOST_COMMON_PRINT):
if most_common_ttl_ratio[i] == 0:
break
print(f"{most_common_ttl[i]}({most_common_ttl_ratio[i]:.4f}), ", end="")
print("....")
print("###########################################################")


def read_header(ifile, print_stat=True):
Expand All @@ -168,12 +248,13 @@ def read_header(ifile, print_stat=True):
if end_magic != LCS_END_MAGIC:
raise RuntimeError(f"Invalid trace file end magic {end_magic:016x}")

print("lcs format version:", version)
parse_stat(header[16:-176], print_stat=print_stat)

return version


def read_trace(ifilepath, n_max_req=-1):
def print_trace(ifilepath, n_max_req=-1, print_stat=True, print_header=True):
if ifilepath.endswith(".zst"):
import zstandard as zstd

Expand All @@ -183,65 +264,61 @@ def read_trace(ifilepath, n_max_req=-1):
ifile = open(ifilepath, "rb")
reader = ifile

version = read_header(reader)
s = [
struct.Struct("<IQIq"),
struct.Struct("<IQIIq"),
struct.Struct("<qQqIq"),
][version - 1]
version = read_header(reader, print_stat)
s = struct.Struct(LCS_FORMAT_STR[version])

n_req = 0
if print_header:
print(",".join(LCS_REQUEST_HEADER[version]))

n_req = 0
while True:
b = reader.read(s.size)
if not b:
break
req = s.unpack(b)
print(req)
ts, obj, size = req[:3]
print(f"{ts},{obj},{size}", end="")
if version == 2:
op = OP_NAMES[req[3] & 0xFF]
tenant = (req[3]>>8) & 0xFFFFFF
next_access_vtime = req[4]
print(f",{op},{tenant}", end="")
elif version >= 3:
op = OP_NAMES[req[3] & 0xFF]
tenant = (req[3]>>8) & 0xFFFFFF
ttl = req[4]
next_access_vtime = req[5]
features = req[6:]
print(f",{ttl},{op},{tenant}", end="")
for f in features:
print(f",{f}", end="")

print(f",{next_access_vtime}")

n_req += 1
if n_max_req > 0 and n_req >= n_max_req:
break

reader.close()


def test_block_trace(ifilepath):
if ifilepath.endswith(".zst"):
import zstandard as zstd

decompressor = zstd.ZstdDecompressor()
reader = decompressor.stream_reader(open(ifilepath, "rb"))
else:
ifile = open(ifilepath, "rb")
reader = ifile

version = read_header(reader, print_stat=False)
s = [
struct.Struct("<IQIq"),
struct.Struct("<IQIIq"),
struct.Struct("<qQqIq"),
][version - 1]

while True:
b = reader.read(s.size)
if not b:
break
req = s.unpack(b)
if req[1] % 4096 != 0:
raise RuntimeError(
f"lba is not multiple of block size {req[1]} % 4096 = {req[1] % 4096} {ifilepath}"
)

reader.close()
print(f"LBA test passed {ifilepath}")


if __name__ == "__main__":
import sys

if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} /path/trace [n_req]")
sys.exit(1)
from argparse import ArgumentParser

p = ArgumentParser()
p.add_argument("trace", help="trace file path")
p.add_argument(
"-n",
type=int,
help="number of requests to read",
required=False,
default=-1,
)
p.add_argument("--print-stat", action="store_true", help="print stat", default=True)
p.add_argument(
"--print-header", action="store_true", help="print header", default=True
)
args = p.parse_args()

test_block_trace(sys.argv[1])
# read_trace(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else -1)
# test_block_trace(sys.argv[1])
print_trace(args.trace, args.n, args.print_stat, args.print_header)
27 changes: 27 additions & 0 deletions scripts/traceConv/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

## What is this about
This folder contains the scripts that convert the orignal traces and produce a [lcs](https://github.com/1a1a11a/libCacheSim/blob/develop/libCacheSim/traceReader/customizedReader/lcs.h) trace.

The convert process has two steps:
1. It pre-processes the original trace and normalize it into a human readable csv file
2. It runs `traceConv` to convert the csv trace to lcs format


The pre-processing is the major part of scripts and perform the following functions
* converts from any format to csv files of standard format, i.e., "timestamp, id, size, op, tenant, ttl" where `op`, `tenant` and `ttl` are optional
* change timestamp to use second as the time unit
* for key-value traces
* it expands the line to multiple requests if a trace has `op_cnt` field
* it backfills cache miss object size from a later set request because cache miss has size zero, if the object is not SET in the trace, we filter out size zero requests
* it backfills ttl from a later set request (we use the last object size to avoid size change problem)
* it provides a sampling function to provide sampled lcs traces
* for block traces
* it splits a large request for multiple blocks to 4K blocks
* it uses logical block address (LBA) as the object id and aligns the LBA to 4K blocks, and it uses bytes as the request size. Note that some traces use logical block number (LBN) as id, we convert it to LBA by multiplying BLOCK_SIZE and some use the nubmer of sectors as request size
* it maps the same LBA from different volumes to different LBAs by adding vol_id * 100 TiB


To print the trace, you can use `bin/tracePrint` from libCacheSim or `scripts/lcs_reader.py`



Loading

0 comments on commit 1deb8c3

Please sign in to comment.