Skip to content

Commit

Permalink
update reader
Browse files Browse the repository at this point in the history
  • Loading branch information
1a1a11a committed Dec 14, 2024
1 parent 8be32b5 commit 7ae1fac
Show file tree
Hide file tree
Showing 15 changed files with 259 additions and 555 deletions.
2 changes: 1 addition & 1 deletion libCacheSim/include/libCacheSim/prefetchAlgo/PG.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <glib.h>

#include "../../../dataStructure/pqueue.h"
#include "../../../traceReader/generalReader/readerInternal.h"
#include "../../../traceReader/readerInternal.h"
#include "../cache.h"

#ifdef __cplusplus
Expand Down
2 changes: 2 additions & 0 deletions libCacheSim/include/libCacheSim/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ typedef struct {
int32_t op_field;
int32_t ttl_field;
int32_t cnt_field;
int32_t tenant_field;
int32_t next_access_vtime_field;

// block cache, 0 and -1 means ignore this field, 1 is also invalid
Expand Down Expand Up @@ -158,6 +159,7 @@ static inline void set_default_reader_init_params(reader_init_param_t *params) {
params->obj_size_field = 0;
params->op_field = 0;
params->ttl_field = 0;
params->tenant_field = 0;
params->next_access_vtime_field = 0;

params->has_header = false;
Expand Down
2 changes: 1 addition & 1 deletion libCacheSim/traceReader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set(source
generalReader/csv.c
generalReader/txt.c
generalReader/libcsv.c
generalReader/lcs.c
customizedReader/lcs.c
reader.c
sampling/spatial.c
sampling/temporal.c
Expand Down
178 changes: 178 additions & 0 deletions libCacheSim/traceReader/customizedReader/lcs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@

#include "lcs.h"

#include <assert.h>
#include <stdio.h>

#include "../customizedReader/binaryUtils.h"
#include "../readerInternal.h"

#ifdef __cplusplus
extern "C" {
#endif

static bool verify(lcs_trace_header_t *header) {
/* check whether the trace is valid */
if (header->start_magic != LCS_TRACE_START_MAGIC) {
ERROR("invalid trace file, start magic is wrong 0x%lx\n", (unsigned long)header->start_magic);
return false;
}

if (header->end_magic != LCS_TRACE_END_MAGIC) {
ERROR("invalid trace file, end magic is wrong 0x%lx\n", (unsigned long)header->end_magic);
return false;
}

if (header->version > MAX_LCS_VERSION) {
ERROR("invalid trace file, lcs version %ld is not supported\n", (unsigned long)header->version);
return false;
}

lcs_trace_stat_t *stat = &(header->stat);
if (stat->n_req < 0 || stat->n_obj < 0) {
ERROR("invalid trace file, n_req %ld, n_obj %ld\n", (unsigned long)stat->n_req, (unsigned long)stat->n_obj);
return false;
}

return true;
}

int lcsReader_setup(reader_t *reader) {
char *data = read_bytes(reader, sizeof(lcs_trace_header_t));
lcs_trace_header_t *header = (lcs_trace_header_t *)data;

if (!verify(header)) {
exit(1);
}

reader->lcs_ver = header->version;
reader->trace_type = LCS_TRACE;
reader->trace_format = BINARY_TRACE_FORMAT;
reader->trace_start_offset = sizeof(lcs_trace_header_t);
reader->obj_id_is_num = true;

if (reader->lcs_ver == 1) {
reader->item_size = sizeof(lcs_req_v1_t);
} else if (reader->lcs_ver == 2) {
reader->item_size = sizeof(lcs_req_v2_t);
} else if (reader->lcs_ver == 3) {
reader->item_size = sizeof(lcs_req_v3_t);
} else {
ERROR("invalid lcs version %ld\n", (unsigned long)reader->lcs_ver);
exit(1);
}

return 0;
}

// read one request from trace file
// return 0 if success, 1 if error
int lcs_read_one_req(reader_t *reader, request_t *req) {
char *record = read_bytes(reader, reader->item_size);

if (record == NULL) {
req->valid = FALSE;
return 1;
}

if (reader->lcs_ver == 1) {
lcs_req_v1_t *req_v1 = (lcs_req_v1_t *)record;
req->clock_time = req_v1->clock_time;
req->obj_id = req_v1->obj_id;
req->obj_size = req_v1->obj_size;
req->next_access_vtime = req_v1->next_access_vtime;
} else if (reader->lcs_ver == 2) {
lcs_req_v2_t *req_v2 = (lcs_req_v2_t *)record;
req->clock_time = req_v2->clock_time;
req->obj_id = req_v2->obj_id;
req->obj_size = req_v2->obj_size;
req->next_access_vtime = req_v2->next_access_vtime;
req->tenant_id = req_v2->tenant;
req->op = req_v2->op;
} else if (reader->lcs_ver == 3) {
lcs_req_v3_t *req_v3 = (lcs_req_v3_t *)record;
req->clock_time = req_v3->clock_time;
req->obj_id = req_v3->obj_id;
req->obj_size = req_v3->obj_size;
req->next_access_vtime = req_v3->next_access_vtime;
req->tenant_id = req_v3->tenant;
req->op = req_v3->op;
} else {
ERROR("invalid lcs version %ld\n", (unsigned long)reader->lcs_ver);
return 1;
}

if (req->next_access_vtime == -1 || req->next_access_vtime == INT64_MAX) {
req->next_access_vtime = MAX_REUSE_DISTANCE;
}

if (req->obj_size == 0 && reader->ignore_size_zero_req && reader->read_direction == READ_FORWARD) {
return lcs_read_one_req(reader, req);
}
return 0;
}

void lcs_print_trace_stat(reader_t *reader) {
// we need to reset the reader so clone a new one
reader_t *cloned_reader = clone_reader(reader);
// reset_reader(cloned_reader);
#ifdef SUPPORT_ZSTD_TRACE
if (reader->is_zstd_file) {
fseek(reader->zstd_reader_p->ifile, 0, SEEK_SET);
}
#endif
cloned_reader->mmap_offset = 0;

char *data = read_bytes(cloned_reader, sizeof(lcs_trace_header_t));
lcs_trace_header_t *header = (lcs_trace_header_t *)data;
lcs_trace_stat_t *stat = &(header->stat);

printf("trace stat: n_req %lld, n_obj %lld, n_byte %lld (%.2lf GiB), n_uniq_byte %lld (%.2lf GiB)\n",
(long long)stat->n_req, (long long)stat->n_obj, (long long)stat->n_req_byte, (double)stat->n_req_byte / GiB,
(long long)stat->n_obj_byte, (double)stat->n_obj_byte / GiB);

if (stat->n_read > 0) {
printf("n_read %lld, n_write %lld, n_delete %lld\n", (long long)stat->n_read, (long long)stat->n_write,
(long long)stat->n_delete);
}
printf("start time %lld, end time %lld, duration %lld seconds %.2lf days\n", (long long)stat->start_timestamp,
(long long)stat->end_timestamp, (long long)(stat->end_timestamp - stat->start_timestamp),
(double)(stat->end_timestamp - stat->start_timestamp) / (24 * 3600.0));

printf("object size: smallest %lld, largest %lld\n", (long long)stat->smallest_obj_size,
(long long)stat->largest_obj_size);
printf("most common object sizes (req fraction): %ld(%.4lf) %ld(%.4lf) %ld(%.4lf) %ld(%.4lf)...\n",
stat->most_common_obj_sizes[0], stat->most_common_obj_size_ratio[0], stat->most_common_obj_sizes[1],
stat->most_common_obj_size_ratio[1], stat->most_common_obj_sizes[2], stat->most_common_obj_size_ratio[2],
stat->most_common_obj_sizes[3], stat->most_common_obj_size_ratio[3]);

printf("highest freq: %ld %ld %ld %ld skewness %.4lf\n", stat->highest_freq[0], stat->highest_freq[1],
stat->highest_freq[2], stat->highest_freq[3], stat->skewness);
printf("most common freq (req fraction): %d(%.4lf) %d(%.4lf) %d(%.4lf) %d(%.4lf)...\n", stat->most_common_freq[0],
stat->most_common_freq_ratio[0], stat->most_common_freq[1], stat->most_common_freq_ratio[1],
stat->most_common_freq[2], stat->most_common_freq_ratio[2], stat->most_common_freq[3],
stat->most_common_freq_ratio[3]);

if (stat->n_tenant > 1) {
printf("#tenant: %ld\n", (long)stat->n_tenant);
printf("most common tenants (req fraction): %d(%.4lf) %d(%.4lf) %d(%.4lf) %d(%.4lf)...\n",
stat->most_common_tenants[0], stat->most_common_tenant_ratio[0], stat->most_common_tenants[1],
stat->most_common_tenant_ratio[1], stat->most_common_tenants[2], stat->most_common_tenant_ratio[2],
stat->most_common_tenants[3], stat->most_common_tenant_ratio[3]);
}

if (stat->n_ttl > 1) {
printf("#ttl: %ld\n", (long)stat->n_ttl);
printf("smallest ttl: %ld, largest ttl: %ld\n", (long)stat->smallest_ttl, (long)stat->largest_ttl);
printf("most common ttls (req fraction): %d(%.4lf) %d(%.4lf) %d(%.4lf) %d(%.4lf)...\n", stat->most_common_ttls[0],
stat->most_common_ttl_ratio[0], stat->most_common_ttls[1], stat->most_common_ttl_ratio[1],
stat->most_common_ttls[2], stat->most_common_ttl_ratio[2], stat->most_common_ttls[3],
stat->most_common_ttl_ratio[3]);
}

close_reader(cloned_reader);
}

#ifdef __cplusplus
}
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -26,51 +26,68 @@ extern "C" {
#define LCS_TRACE_END_MAGIC 0x123456789abcdef0

#define MAX_LCS_VERSION 2
#define N_MOST_COMMON 16

/******************************************************************************/
/** lcs trace stat header (512 bytes) **/
/** lcs trace stat header (1000 * 8 bytes) **/
/** this stores information of the trace as part of the lcs trace header **/
/** note that some fields that are added later will have 0 value **/
/** if the trace was generated before the format update **/
/** so we should avoid using 0 as the default value **/
/******************************************************************************/
typedef struct lcs_trace_stat {
int64_t version;

/**** v1 ****/
int64_t version; // version of the stat
int64_t n_req; // number of requests
int64_t n_obj; // number of objects
int64_t n_req_byte; // number of bytes requested
int64_t n_obj_byte; // number of bytes of objects
int64_t n_obj_byte; // number of unique bytes

int64_t start_timestamp; // in seconds
int64_t end_timestamp; // in seconds

int64_t n_read; // number of read requests
int64_t n_write; // number of write requests
int64_t n_delete; // number of delete requests

// 10 * 8 bytes so far

// object size
int64_t smallest_obj_size;
int64_t largest_obj_size;
int64_t most_common_obj_sizes[N_MOST_COMMON];
float most_common_obj_size_ratio[N_MOST_COMMON];
// (10 + 26) * 8 bytes so far

// popularity
// the request count of the most popular objects
int64_t highest_freq[N_MOST_COMMON];
// unpopular objects:
int32_t most_common_freq[N_MOST_COMMON];
float most_common_freq_ratio[N_MOST_COMMON];
// zipf alpha
double skewness;
// (10 + 26 + 33) * 8 bytes so far

// tenant info
int32_t n_tenant;

// block cache specific
int32_t block_size; // used in block trace, block size in bytes
int32_t most_common_tenants[N_MOST_COMMON];
float most_common_tenant_ratio[N_MOST_COMMON];
// (10 + 26 + 33 + 16.5) * 8 bytes so far

// key-value cache and object cache specific
int32_t n_ttl;
int32_t smallest_ttl;
int32_t largest_ttl;
int32_t most_common_ttls[N_MOST_COMMON];
float most_common_ttl_ratio[N_MOST_COMMON];
// (10 + 26 + 33 + 16.5 + 17.5) * 8 bytes so far

int32_t time_unit; // 1: seconds, 2: milliseconds, 3: microseconds, 4: nanoseconds
int32_t trace_type; // 1: block, 2: key-value, 3: object, 4: file
int32_t unused1;

int64_t unused[49];
} lcs_trace_stat_t;
int64_t unused[897];
} __attribute__((packed)) lcs_trace_stat_t;
// assert the struct size at compile time
typedef char static_assert_lcs_trace_stat_size[(sizeof(struct lcs_trace_stat) == 512) ? 1 : -1];
typedef char static_assert_lcs_trace_stat_size[(sizeof(struct lcs_trace_stat) == 1000 * 8) ? 1 : -1];

/******************************************************************************/
/** lcs trace format header (1024 bytes) **/
/** lcs trace format header (8192 bytes) **/
/** start_magic and end_magic is to make sure the trace is valid **/
/** the main field is **/
/** 1) version, which decides the request format **/
Expand All @@ -82,11 +99,11 @@ typedef struct lcs_trace_header {
uint64_t version;
struct lcs_trace_stat stat;

uint64_t unused[61];
uint64_t unused[21];
uint64_t end_magic;
} lcs_trace_header_t;
} __attribute__((packed)) lcs_trace_header_t;
// assert the struct size at compile time
typedef char static_assert_lcs_trace_header_size[(sizeof(struct lcs_trace_header) == 1024) ? 1 : -1];
typedef char static_assert_lcs_trace_header_size[(sizeof(struct lcs_trace_header) == 1024 * 8) ? 1 : -1];

/******************************************************************************/
/** v1 is the simplest trace format (same as oracleGeneral) **/
Expand All @@ -112,8 +129,6 @@ typedef char static_assert_lcs_v1_size[(sizeof(struct lcs_req_v1) == 24) ? 1 : -
/******************************************************************************/
typedef struct __attribute__((packed)) lcs_req_v2 {
uint32_t clock_time;
// this is the hash of key in key-value cache
// or the logical block address in block cache
uint64_t obj_id;
uint32_t obj_size;
uint32_t op : 8;
Expand All @@ -123,6 +138,20 @@ typedef struct __attribute__((packed)) lcs_req_v2 {
// assert the struct size at compile time
typedef char static_assert_lcs_v2_size[(sizeof(struct lcs_req_v2) == 28) ? 1 : -1];

/******************************************************************************/
/** v3 uses int64_t for object size and clock time **/
/******************************************************************************/
typedef struct __attribute__((packed)) lcs_req_v3 {
int64_t clock_time;
uint64_t obj_id;
int64_t obj_size;
uint32_t op : 8;
uint32_t tenant : 24;
int64_t next_access_vtime;
} lcs_req_v3_t;
// assert the struct size at compile time
typedef char static_assert_lcs_v3_size[(sizeof(struct lcs_req_v3) == 36) ? 1 : -1];

int lcsReader_setup(reader_t *reader);

int lcs_read_one_req(reader_t *reader, request_t *req);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,3 @@ static inline int oracleGeneralBin_read_one_req(reader_t *reader, request_t *req
#ifdef __cplusplus
}
#endif

2 changes: 1 addition & 1 deletion libCacheSim/traceReader/generalReader/binary.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <string.h>

#include "readerInternal.h"
#include "../readerInternal.h"

#ifdef __cplusplus
extern "C" {
Expand Down
Loading

0 comments on commit 7ae1fac

Please sign in to comment.