diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index 30957260c..20d1dc832 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -231,9 +231,9 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &tool) knobs.report_histogram = op_reuse_distance_histogram.get_value(); knobs.distance_threshold = op_reuse_distance_threshold.get_value(); knobs.report_top = op_report_top.get_value(); - knobs.skip_list_distance = op_reuse_skip_dist.get_value(); + knobs.skip_list_distance = op_reuse_skip_dist_deprecated.get_value(); knobs.distance_limit = op_reuse_distance_limit.get_value(); - knobs.verify_skip = op_reuse_verify_skip.get_value(); + knobs.verify_skip = op_reuse_verify_skip_deprecated.get_value(); knobs.histogram_bin_multiplier = op_reuse_histogram_bin_multiplier.get_value(); if (knobs.histogram_bin_multiplier < 1.0) { ERRMSG("Usage error: reuse_histogram_bin_multiplier must be >= 1.0\n"); diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 0105dfb7d..06fa06529 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -800,23 +800,21 @@ droption_t op_reuse_distance_histogram( "Print the entire reuse distance histogram.", "By default only the mean, median, and standard deviation of the reuse distances " "are reported. This option prints out the full histogram of reuse distances."); -droption_t op_reuse_skip_dist( - DROPTION_SCOPE_FRONTEND, "reuse_skip_dist", 500, - "For performance tuning: distance between skip nodes.", - "Specifies the distance between nodes in the skip list. For optimal performance, " - "set this to a value close to the estimated average reuse distance of the dataset."); +droption_t + op_reuse_skip_dist_deprecated(DROPTION_SCOPE_FRONTEND, "reuse_skip_dist", 500, + "For performance tuning: distance between skip nodes.", + "DEPRECATED: skip lists are no longer used."); droption_t op_reuse_distance_limit( DROPTION_SCOPE_FRONTEND, "reuse_distance_limit", 0, "If nonzero, restricts distance tracking to the specified maximum distance.", "Specifies the maximum length of the access history list used for distance " "calculation. Setting this limit can significantly improve performance " "and reduce memory consumption for very long traces."); -droption_t op_reuse_verify_skip( +droption_t op_reuse_verify_skip_deprecated( DROPTION_SCOPE_FRONTEND, "reuse_verify_skip", false, "Use full list walks to verify the skip list results.", - "Verifies every skip list-calculated reuse distance with a full list walk. " - "This incurs significant additional overhead. This option is only available " - "in debug builds."); + "Verifies every skip list-calculated reuse distance with a full list walk. ", + "DEPRECATED: skip lists are no longer used."); droption_t op_reuse_histogram_bin_multiplier( DROPTION_SCOPE_FRONTEND, "reuse_histogram_bin_multiplier", 1.00, "When reporting histograms, grow bins geometrically by this multiplier.", diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index c1b24e8a7..e0a1a3903 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -191,9 +191,9 @@ extern dynamorio::droption::droption_t op_add_noise_generator; extern dynamorio::droption::droption_t op_report_top; extern dynamorio::droption::droption_t op_reuse_distance_threshold; extern dynamorio::droption::droption_t op_reuse_distance_histogram; -extern dynamorio::droption::droption_t op_reuse_skip_dist; +extern dynamorio::droption::droption_t op_reuse_skip_dist_deprecated; extern dynamorio::droption::droption_t op_reuse_distance_limit; -extern dynamorio::droption::droption_t op_reuse_verify_skip; +extern dynamorio::droption::droption_t op_reuse_verify_skip_deprecated; extern dynamorio::droption::droption_t op_reuse_histogram_bin_multiplier; extern dynamorio::droption::droption_t op_view_syntax; extern dynamorio::droption::droption_t op_record_function; diff --git a/clients/drcachesim/tests/reuse_distance_test.cpp b/clients/drcachesim/tests/reuse_distance_test.cpp index aeb186015..d81f60e1d 100644 --- a/clients/drcachesim/tests/reuse_distance_test.cpp +++ b/clients/drcachesim/tests/reuse_distance_test.cpp @@ -507,6 +507,231 @@ data_histogram_test() } } +// Helper to verify all node sizes in the tree are consistent. +uint64_t +verify_sizes(line_ref_node_t *node) +{ + if (node == nullptr) + return 0; + auto expected = verify_sizes(node->left) + verify_sizes(node->right) + 1; + assert(node->size == expected); + return expected; +} + +// Test for splay tree insertion. +void +splay_tree_insert_test() +{ + std::cerr << "splay_tree_insert_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + // Test insertion and basic structure. + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + assert(tree.root_ == nodes.back()); + assert(tree.head_ == nodes.back()); + assert(tree.tail_ == nodes[0]); + assert(tree.unique_lines_ == i + 1); + assert(tree.root_->size == i + 1); + } + verify_sizes(tree.root_); +} + +// Test for splay tree move_to_front and reuse distance. +void +splay_tree_move_to_front_test() +{ + std::cerr << "splay_tree_move_to_front_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + } + + // Test move_to_front and reuse distance calculation. + auto dist = tree.move_to_front(nodes[5]); + assert(dist == 4); + assert(tree.head_ == nodes[5]); + assert(nodes[5]->total_refs == 2); + verify_sizes(tree.root_); + + dist = tree.move_to_front(nodes[0]); + assert(dist == NUM_NODES - 1); + assert(tree.head_ == nodes[0]); + verify_sizes(tree.root_); + + dist = tree.move_to_front(nodes[0]); + assert(dist == 0); + assert(nodes[0]->total_refs == 3); +} + +// Test for splay tree gate mechanism and distant reference tracking. +void +splay_tree_gate_test() +{ + std::cerr << "splay_tree_gate_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + } + + // Test gate mechanism and distant reference tracking. + assert(tree.gate_ != nullptr); + auto *distant_node = nodes[1]; // Should be beyond gate. + assert(tree.ref_is_distant(distant_node)); + auto old_distant_refs = distant_node->distant_refs; + tree.move_to_front(distant_node); + assert(distant_node->distant_refs == old_distant_refs + 1); + + auto *recent_node = tree.head_; + assert(!tree.ref_is_distant(recent_node)); +} + +// Test for splay tree rotations. +void +splay_tree_rotation_test() +{ + std::cerr << "splay_tree_rotation_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + } + + // Test splay rotations maintain invariants. + tree.splay(nodes[3]); + assert(tree.root_ == nodes[3]); + verify_sizes(tree.root_); + tree.splay(nodes[7]); + assert(tree.root_ == nodes[7]); + verify_sizes(tree.root_); +} + +// Test for splay tree get_prev traversal. +void +splay_tree_traversal_test() +{ + std::cerr << "splay_tree_traversal_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + } + + // Test get_prev traversal. + auto *current = tree.tail_; + int count = 0; + while (current != nullptr) { + current = tree.get_prev(current); + ++count; + assert(count <= NUM_NODES); + } + assert(count == NUM_NODES); + assert(tree.get_prev(nullptr) == nullptr); +} + +// Test for splay tree removal. +void +splay_tree_remove_test() +{ + std::cerr << "splay_tree_remove_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + } + + // Test removal. + auto *to_remove = nodes[4]; + tree.remove(to_remove); + assert(tree.root_->size == NUM_NODES - 1); + assert(to_remove->parent == nullptr && to_remove->left == nullptr); + delete to_remove; + verify_sizes(tree.root_); +} + +// Test for splay tree prune_tail. +void +splay_tree_prune_test() +{ + std::cerr << "splay_tree_prune_test()\n"; + constexpr uint64_t THRESHOLD = 4; + constexpr int NUM_NODES = 10; + constexpr uint64_t TEST_ADDRESS = 0x1000; + constexpr uint64_t TEST_DISTANCE_INCREMENT = 64; + + line_ref_splay_t tree(THRESHOLD); + address_generator_t agen(TEST_ADDRESS, TEST_DISTANCE_INCREMENT); + + std::vector nodes; + for (int i = 0; i < NUM_NODES; ++i) { + nodes.push_back(new line_ref_node_t(agen.next_address())); + tree.add_to_front(nodes.back()); + } + + // Test prune_tail. + auto *old_tail = tree.tail_; + tree.prune_tail(); + assert(tree.tail_ != old_tail); + assert(tree.root_->size == NUM_NODES - 1); + delete old_tail; + verify_sizes(tree.root_); + + if (TEST_VERBOSE(1)) { + std::cerr << "Final tree size: " << tree.root_->size << "\n"; + std::cerr << "Unique lines: " << tree.unique_lines_ << "\n"; + } +} + int test_main(int argc, const char *argv[]) { @@ -516,6 +741,14 @@ test_main(int argc, const char *argv[]) simple_reuse_distance_test(); reuse_distance_limit_test(); data_histogram_test(); + splay_tree_insert_test(); + splay_tree_move_to_front_test(); + splay_tree_gate_test(); + splay_tree_rotation_test(); + splay_tree_traversal_test(); + splay_tree_remove_test(); + splay_tree_prune_test(); + return 0; } diff --git a/clients/drcachesim/tools/reuse_distance.cpp b/clients/drcachesim/tools/reuse_distance.cpp index 7c3cb7627..434b5d06e 100644 --- a/clients/drcachesim/tools/reuse_distance.cpp +++ b/clients/drcachesim/tools/reuse_distance.cpp @@ -99,12 +99,11 @@ reuse_distance_t::initialize_shard_type(shard_type_t shard_type) return ""; } -reuse_distance_t::shard_data_t::shard_data_t(uint64_t reuse_threshold, uint64_t skip_dist, - uint32_t distance_limit, bool verify) +reuse_distance_t::shard_data_t::shard_data_t(uint64_t reuse_threshold, + uint32_t distance_limit) : distance_limit(distance_limit) { - ref_list = std::unique_ptr( - new line_ref_list_t(reuse_threshold, skip_dist, verify)); + ref_list = std::unique_ptr(new line_ref_splay_t(reuse_threshold)); } bool @@ -117,8 +116,7 @@ void * reuse_distance_t::parallel_shard_init_stream(int shard_index, void *worker_data, memtrace_stream_t *stream) { - auto shard = new shard_data_t(knobs_.distance_threshold, knobs_.skip_list_distance, - knobs_.distance_limit, knobs_.verify_skip); + auto shard = new shard_data_t(knobs_.distance_threshold, knobs_.distance_limit); std::lock_guard guard(shard_map_mutex_); shard->core = stream->get_output_cpuid(); shard->tid = stream->get_tid(); @@ -166,12 +164,12 @@ reuse_distance_t::parallel_shard_memref(void *shard_data, const memref_t &memref ++shard->data_refs; } addr_t tag = memref.data.addr >> line_size_bits_; - std::unordered_map::iterator it = + std::unordered_map::iterator it = shard->cache_map.find(tag); if (it == shard->cache_map.end()) { - line_ref_t *ref = new line_ref_t(tag); + line_ref_node_t *ref = new line_ref_node_t(tag); // insert into the map - shard->cache_map.insert(std::pair(tag, ref)); + shard->cache_map.insert(std::pair(tag, ref)); // insert into the list shard->ref_list->add_to_front(ref); // See if the line we're adding was previously removed. @@ -215,8 +213,7 @@ reuse_distance_t::process_memref(const memref_t &memref) int shard_index = serial_stream_->get_shard_index(); const auto &lookup = shard_map_.find(shard_index); if (lookup == shard_map_.end()) { - shard = new shard_data_t(knobs_.distance_threshold, knobs_.skip_list_distance, - knobs_.distance_limit, knobs_.verify_skip); + shard = new shard_data_t(knobs_.distance_threshold, knobs_.distance_limit); shard->core = serial_stream_->get_output_cpuid(); shard->tid = serial_stream_->get_tid(); shard_map_[shard_index] = shard; @@ -237,8 +234,8 @@ cmp_dist_key(const reuse_distance_t::distance_map_pair_t &l, } static bool -cmp_total_refs(const std::pair &l, - const std::pair &r) +cmp_total_refs(const std::pair &l, + const std::pair &r) { if (l.second->total_refs > r.second->total_refs) return true; @@ -252,8 +249,8 @@ cmp_total_refs(const std::pair &l, } static bool -cmp_distant_refs(const std::pair &l, - const std::pair &r) +cmp_distant_refs(const std::pair &l, + const std::pair &r) { if (l.second->distant_refs > r.second->distant_refs) return true; @@ -324,7 +321,7 @@ reuse_distance_t::print_shard_results(const shard_data_t *shard) std::cerr << "\n"; std::cerr << "Reuse distance threshold = " << knobs_.distance_threshold << " cache lines\n"; - std::vector> top(knobs_.report_top); + std::vector> top(knobs_.report_top); std::partial_sort_copy(shard->cache_map.begin(), shard->cache_map.end(), top.begin(), top.end(), cmp_total_refs); std::cerr << "Top " << top.size() << " frequently referenced cache lines\n"; @@ -332,7 +329,7 @@ reuse_distance_t::print_shard_results(const shard_data_t *shard) << ": " << std::setw(17) << "#references " << std::setw(14) << "#distant refs" << "\n"; - for (std::vector>::iterator it = top.begin(); + for (std::vector>::iterator it = top.begin(); it != top.end(); ++it) { if (it->second == NULL) // Very small app. break; @@ -350,7 +347,7 @@ reuse_distance_t::print_shard_results(const shard_data_t *shard) << ": " << std::setw(17) << "#references " << std::setw(14) << "#distant refs" << "\n"; - for (std::vector>::iterator it = top.begin(); + for (std::vector>::iterator it = top.begin(); it != top.end(); ++it) { if (it->second == NULL) // Very small app. break; @@ -452,8 +449,7 @@ reuse_distance_t::get_aggregated_results() // Otherwise, aggregate the per-shard data to get whole-trace data. aggregated_results_ = std::unique_ptr( - new shard_data_t(knobs_.distance_threshold, knobs_.skip_list_distance, - knobs_.distance_limit, knobs_.verify_skip)); + new shard_data_t(knobs_.distance_threshold, knobs_.distance_limit)); for (auto &shard : shard_map_) { aggregated_results_->total_refs += shard.second->total_refs; aggregated_results_->data_refs += shard.second->data_refs; @@ -484,11 +480,11 @@ reuse_distance_t::get_aggregated_results() } for (const auto &entry : shard.second->cache_map) { const auto &existing = aggregated_results_->cache_map.find(entry.first); - line_ref_t *ref; + line_ref_node_t *ref; if (existing == aggregated_results_->cache_map.end()) { - ref = new line_ref_t(entry.first); + ref = new line_ref_node_t(entry.first); aggregated_results_->cache_map.insert( - std::pair(entry.first, ref)); + std::pair(entry.first, ref)); ref->total_refs = 0; } else { ref = existing->second; @@ -507,7 +503,7 @@ reuse_distance_t::print_results() std::cerr << TOOL_NAME << " aggregated results:\n"; print_shard_results(get_aggregated_results()); - // For regular shards the line_ref_t's are deleted in ~line_ref_list_t. + // For regular shards the line_ref_node_t's are deleted in ~line_ref_splay_t. for (auto &iter : get_aggregated_results()->cache_map) { delete iter.second; } diff --git a/clients/drcachesim/tools/reuse_distance.h b/clients/drcachesim/tools/reuse_distance.h index 449930c23..410be0286 100644 --- a/clients/drcachesim/tools/reuse_distance.h +++ b/clients/drcachesim/tools/reuse_distance.h @@ -72,8 +72,8 @@ namespace drmemtrace { # define IF_DEBUG_VERBOSE(level, action) #endif -struct line_ref_list_t; -struct line_ref_t; +struct line_ref_node_t; +struct line_ref_splay_t; class reuse_distance_t : public analysis_tool_t { public: @@ -114,9 +114,8 @@ class reuse_distance_t : public analysis_tool_t { // the shards we're given. This is for simplicity and to give the user a method // for computing over different units if for some reason that was desired. struct shard_data_t { - shard_data_t(uint64_t reuse_threshold, uint64_t skip_dist, - unsigned int distance_limit, bool verify); - std::unordered_map cache_map; + shard_data_t(uint64_t reuse_threshold, unsigned int distance_limit); + std::unordered_map cache_map; std::unordered_set pruned_addresses; // These are our reuse distance histograms: one for all accesses and one // only for data references. An instruction histogram can be computed by @@ -131,7 +130,7 @@ class reuse_distance_t : public analysis_tool_t { distance_histogram_t dist_map; distance_histogram_t dist_map_data; bool dist_map_is_instr_only = true; - std::unique_ptr ref_list; + std::unique_ptr ref_list; int64_t total_refs = 0; int64_t data_refs = 0; // Non-instruction reference count. memref_tid_t tid = 0; // For SHARD_BY_THREAD. @@ -169,85 +168,88 @@ class reuse_distance_t : public analysis_tool_t { memtrace_stream_t *serial_stream_ = nullptr; }; -/* A doubly linked list node for the cache line reference info */ -struct line_ref_t { - struct line_ref_t *prev; // the prev line_ref in the list - struct line_ref_t *next; // the next line_ref in the list - uint64_t time_stamp; // the most recent reference time stamp on this line - uint64_t total_refs; // the total number of references on this line - uint64_t distant_refs; // the total number of distant references on this line +/* A splay tree node for the cache line reference info */ +struct line_ref_node_t { + struct line_ref_node_t *left; // the left child of the node + struct line_ref_node_t *right; // the right child of the node + struct line_ref_node_t *parent; // the parent of the node + uint64_t size; // Size of the subtree + uint64_t time_stamp; // the most recent reference time stamp on this line + uint64_t total_refs; // the total number of references on this line + uint64_t distant_refs; // the total number of distant references on this line addr_t tag; - // We have a one-layer skip list for more efficient depth computation. - // We inline the fields in every node for simplicity and to reduce allocs. - struct line_ref_t *prev_skip; // the prev line_ref in the skip list - struct line_ref_t *next_skip; // the next line_ref in the skip list - int64_t depth; // only valid for skip list nodes; -1 for others - - line_ref_t(addr_t val) - : prev(NULL) - , next(NULL) + line_ref_node_t(addr_t val) + : left(nullptr) + , right(nullptr) + , parent(nullptr) + , size(1) , total_refs(1) , distant_refs(0) , tag(val) - , prev_skip(NULL) - , next_skip(NULL) - , depth(-1) { } }; -// We use a doubly linked list to keep track of the cache line reuse distance. -// The head of the list is the most recently accessed cache line. -// The earlier a cache line was accessed last time, the deeper that cache line -// is in the list. +// We use a splay tree to keep track of the cache line reuse distance. +// The leftmost node of the splay tree is the most recently accessed cache line. +// The earlier the cache line was last accessed, the more to the right this cache line +// is in the splay tree. // If a cache line is accessed, its time stamp is set as current, and it is -// added/moved to the front of the list. The cache line reference -// reuse distance is the cache line position in the list before moving. +// added/moved to the left of the splay tree. The cache line reference +// reuse distance is the cache line position in the splay tree before moving. // We also keep a pointer (gate) pointing to the earliest cache // line referenced within the threshold. Thus, we can quickly check // whether a cache line is recently accessed by comparing the time // stamp of the referenced cache line and the gate cache line. -// -// We have a second doubly-linked list, a one-layer skip list, for -// more efficient computation of the depth. Each node in the skip -// list stores its depth from the front. -struct line_ref_list_t { - line_ref_t *head_; // the most recently accessed cache line - line_ref_t *gate_; // the earliest cache line refs within the threshold - line_ref_t *tail_; // the least recently accessed cache line - uint64_t cur_time_; // current time stamp - uint64_t unique_lines_; // the total number of unique cache lines accessed - uint64_t threshold_; // the reuse distance threshold - uint64_t skip_distance_; // distance between skip list nodes - bool verify_skip_; // check results using brute-force walks - - line_ref_list_t(uint64_t reuse_threshold_, uint64_t skip_dist, bool verify) - : head_(NULL) - , gate_(NULL) - , tail_(NULL) +// The splay tree is a binary search tree that uses the splay operation for balancing, +// which allows deleting and inserting an element at any position in O(log n) amortized +// time. +struct line_ref_splay_t { + line_ref_node_t *root_; // root of the splay + line_ref_node_t *gate_; // the earliest cache line refs within the threshold + line_ref_node_t *head_; // the most recently accessed cache line + line_ref_node_t *tail_; // the least recently accessed cache line + uint64_t cur_time_; // current time stamp + uint64_t unique_lines_; // the total number of unique cache lines accessed + uint64_t threshold_; // the reuse distance threshold + + line_ref_splay_t(uint64_t reuse_threshold_) + : root_(nullptr) + , gate_(nullptr) + , head_(nullptr) + , tail_(nullptr) , cur_time_(0) , unique_lines_(0) , threshold_(reuse_threshold_) - , skip_distance_(skip_dist) - , verify_skip_(verify) { } - virtual ~line_ref_list_t() + virtual ~line_ref_splay_t() { - line_ref_t *ref; - line_ref_t *next; - if (head_ == NULL) - return; - for (ref = head_; ref != NULL; ref = next) { - next = ref->next; - delete ref; + line_ref_node_t *current = root_; + while (current != nullptr) { + if (current->left != nullptr) { + current = current->left; + } else if (current->right != nullptr) { + current = current->right; + } else { + line_ref_node_t *parent = current->parent; + if (parent != nullptr) { + if (parent->left == current) { + parent->left = nullptr; + } else { + parent->right = nullptr; + } + } + delete current; + current = parent; + } } } bool - ref_is_distant(line_ref_t *ref) + ref_is_distant(line_ref_node_t *ref) { if (gate_ == NULL || ref->time_stamp >= gate_->time_stamp) return false; @@ -255,200 +257,328 @@ struct line_ref_list_t { } void - print_list() + print_node(line_ref_node_t *node) { - std::cerr << "Reuse tag list:\n"; - for (line_ref_t *node = head_; node != NULL; node = node->next) { - std::cerr << "\tTag 0x" << std::hex << node->tag; - if (node->depth != -1) { - std::cerr << " depth=" << std::dec << node->depth << " prev=" << std::hex - << (node->prev_skip == NULL ? 0 : node->prev_skip->tag) - << " next=" << std::hex - << (node->next_skip == NULL ? 0 : node->next_skip->tag); - assert(node->next_skip == NULL || node->next_skip->prev_skip == node); - } else - assert(node->next_skip == NULL && node->prev_skip == NULL); - std::cerr << "\n"; - } + assert(node != nullptr); + std::cerr << "\tTag 0x" << std::hex << node->tag << " size=" << std::dec + << node->size << " parent=" << std::hex + << (node->parent == nullptr ? 0 : node->parent->tag) + << " left=" << std::hex << (node->left == nullptr ? 0 : node->left->tag) + << " right=" << std::hex + << (node->right == nullptr ? 0 : node->right->tag); } + // Print splay tree in the order of traversal. void - move_skip_fields(line_ref_t *src, line_ref_t *dst) + print_list() { - dst->prev_skip = src->prev_skip; - dst->next_skip = src->next_skip; - dst->depth = src->depth; - if (src->prev_skip != NULL) - src->prev_skip->next_skip = dst; - if (src->next_skip != NULL) - src->next_skip->prev_skip = dst; - src->prev_skip = NULL; - src->next_skip = NULL; - src->depth = -1; + std::cerr << "Reuse tag list:\n"; + line_ref_node_t *node = root_; + // the last visited node + line_ref_node_t *last = nullptr; + + while (node != nullptr) { + assert(node->parent != nullptr || node == root_); + assert(get_size(node->left) + get_size(node->right) + 1 == node->size); + + if (node->left != nullptr && last != nullptr) { + node = node->left; + last = nullptr; + } else if (node->right != nullptr && last == node->left) { + print_node(node); + node = node->right; + last = nullptr; + } else { + if (node->right == nullptr) { + print_node(node); + } + last = node; + node = node->parent; + } + } } - // Add a new cache line to the front of the list. + // Add a new cache line to the front of the splay tree. // We may need to move gate_ forward if there are more cache lines // than the threshold so that the gate points to the earliest // referenced cache line within the threshold. void - add_to_front(line_ref_t *ref) + add_to_front(line_ref_node_t *ref) { - IF_DEBUG_VERBOSE(3, std::cerr << "Add tag 0x" << std::hex << ref->tag << "\n"); - // update head_ - ref->next = head_; - if (head_ != NULL) - head_->prev = ref; - head_ = ref; - if (gate_ == NULL) - gate_ = head_; - // move gate_ forward if necessary - if (unique_lines_ > threshold_) - gate_ = gate_->prev; - if (tail_ == NULL) + push_front(ref); + if (gate_ == nullptr) { + gate_ = ref; + } + if (tail_ == nullptr) { tail_ = ref; - unique_lines_++; - head_->time_stamp = cur_time_++; - - // Add a new skip node if necessary. - // We don't bother keeping one right at the front: too much overhead_. - uint64_t count = 0; - line_ref_t *node, *skip = NULL; - for (node = head_; node != NULL && node->depth == -1; node = node->next) { - ++count; - if (count == skip_distance_) - skip = node; } - if (count >= 2 * skip_distance_ - 1) { - assert(skip != NULL); - IF_DEBUG_VERBOSE(3, - std::cerr << "New skip node for tag 0x" << std::hex - << skip->tag << "\n"); - skip->depth = skip_distance_ - 1; - if (node != NULL) { - assert(node->prev_skip == NULL); - node->prev_skip = skip; - } - skip->next_skip = node; - assert(skip->prev_skip == NULL); + // move gate_ forward if necessary + if (unique_lines_ > threshold_) { + gate_ = get_prev(gate_); } - // Update skip list depths. - for (; node != NULL; node = node->next_skip) - ++node->depth; + + unique_lines_++; + ref->time_stamp = cur_time_++; IF_DEBUG_VERBOSE(3, print_list()); } - // Remove the last entry from the distance list. + // Remove the last entry from the distance tree. void prune_tail() { // Make sure the tail pointers are legal. - assert(tail_ != NULL); + assert(tail_ != nullptr); assert(tail_ != head_); - assert(tail_->next == NULL); - assert(tail_->prev != NULL); - - IF_DEBUG_VERBOSE(3, - std::cerr << "Prune tag 0x" << std::hex << tail_->tag << "\n"); - line_ref_t *new_tail = tail_->prev; - new_tail->next = NULL; + // Get new tail. + line_ref_node_t *new_tail = get_prev(tail_); - // If there's a prior skip, remove its ptr to tail. - if (tail_->depth != -1 && tail_->prev_skip != NULL) { - tail_->prev_skip->next_skip = NULL; + // Link the child of tail with the parent + if (tail_->parent != nullptr) { + tail_->parent->right = tail_->left; + if (tail_->left != nullptr) { + tail_->left->parent = tail_->parent; + } } + // Update sizes of all parents + line_ref_node_t *parent = tail_->parent; + while (parent != nullptr) { + parent->size--; + parent = parent->parent; + } if (tail_ == gate_) { // move gate_ if tail_ was the gate_. - gate_ = gate_->prev; + gate_ = new_tail; } - // And finally, update tail_. tail_ = new_tail; } - // Move a referenced cache line to the front of the list. + // Move a referenced cache line to the front of the splay tree. // We need to move the gate_ pointer forward if the referenced cache // line is the gate_ cache line or any cache line after. // Returns the reuse distance of ref. int64_t - move_to_front(line_ref_t *ref) + move_to_front(line_ref_node_t *ref) { - IF_DEBUG_VERBOSE( - 3, std::cerr << "Move tag 0x" << std::hex << ref->tag << " to front\n"); - line_ref_t *prev; - line_ref_t *next; - ref->total_refs++; if (ref == head_) return 0; + splay(ref); + // Get the reuse distance of ref. + int64_t dist = get_size(ref->left); + if (ref_is_distant(ref)) { ref->distant_refs++; - gate_ = gate_->prev; + gate_ = get_prev(gate_); } else if (ref == gate_) { // move gate_ if ref is the gate_. - gate_ = gate_->prev; + gate_ = get_prev(gate_); } if (ref == tail_) { - tail_ = tail_->prev; + tail_ = get_prev(tail_); } + remove(ref); + push_front(ref); + ref->time_stamp = cur_time_++; + IF_DEBUG_VERBOSE(3, print_list()); + return dist; + } - // Compute reuse distance. - int64_t dist = 0; - line_ref_t *skip; - for (skip = ref; skip != NULL && skip->depth == -1; skip = skip->prev) - ++dist; - if (skip != NULL) - dist += skip->depth; + // Push node to the front of the splay tree. + void + push_front(line_ref_node_t *ref) + { + // Link ref to the front of the head. + if (head_) + head_->left = ref; else - --dist; // Don't count self. - - IF_DEBUG_VERBOSE( - 0, if (verify_skip_) { - // Compute reuse distance with a full list walk as a sanity check. - // This is a debug-only option, so we guard with IF_DEBUG_VERBOSE(0). - // Yes, the option check branch shows noticeable overhead without it. - int64_t brute_dist = 0; - for (prev = head_; prev != ref; prev = prev->next) - ++brute_dist; - if (brute_dist != dist) { - std::cerr << "Mismatch! Brute=" << std::dec << brute_dist - << " vs skip=" << dist << "\n"; - print_list(); - assert(false); - } - }); - - // Shift skip nodes between where ref was and head one earlier to - // maintain spacing. This means their depths remain the same. - if (skip != NULL) { - for (; skip != NULL; skip = next) { - next = skip->prev_skip; - assert(skip->prev != NULL); - move_skip_fields(skip, skip->prev); - } - } else - assert(ref->depth == -1); - - // remove ref from the list - prev = ref->prev; - next = ref->next; - prev->next = next; - // ref could be the last - if (next != NULL) - next->prev = prev; - // move ref to the front - ref->prev = NULL; - ref->next = head_; - head_->prev = ref; + root_ = ref; + ref->parent = head_; + line_ref_node_t *parent = ref->parent; + // Update sizes of parents + while (parent != nullptr) { + parent->size++; + parent = parent->parent; + } + + // Update head head_ = ref; - head_->time_stamp = cur_time_++; + splay(ref); + } + // Remove the node ref from the splay tree + void + remove(line_ref_node_t *ref) + { + splay(ref); + // Replace ref with left child and link right child to the tail of left child + if (ref->left != nullptr) { + ref->left->parent = ref->parent; + if (ref->right != nullptr) { + line_ref_node_t *left_tail = get_tail(ref->left); + ref->right->parent = left_tail; + left_tail->right = ref->right; + // Update sizes of the left child's subtrees + do { + left_tail->size += ref->right->size; + left_tail = left_tail->parent; + } while (left_tail != ref->left->parent); + } + if (root_ == ref) { + root_ = ref->left; + } + } else { + if (root_ == ref) + root_ = ref->right; + ref->right->parent = ref->parent; + } + // Update sizes of the ref's parents + line_ref_node_t *parent = ref->parent; + while (parent != nullptr) { + parent->size--; + parent = parent->parent; + } + // Clear ref + ref->parent = ref->right = ref->left = nullptr; + ref->size = 1; + } - IF_DEBUG_VERBOSE(3, print_list()); - // XXX: we should keep a running mean of the distance, and adjust - // knob_reuse_skip_dist to stay close to the mean, for best performance. - return dist; + // Find the tail of the splay tree. + // Returns a pointer to the tail. + line_ref_node_t * + get_tail(line_ref_node_t *root) + { + if (root == nullptr) { + return nullptr; + } + // Tail is the far right node in the tree + while (root->right != nullptr) { + root = root->right; + } + return root; + } + + // Find previous element of ref in the splay tree. + // Returns a pointer to the previous node. + line_ref_node_t * + get_prev(line_ref_node_t *ref) + { + if (ref == nullptr) + return nullptr; + // the last visited node + line_ref_node_t *last = nullptr; + // Walk up by tree while did not found a node to the left of the current + while ((ref->left == nullptr || ref->left == last) && ref->parent != nullptr && + ref->parent->left == ref) { + last = ref; + ref = ref->parent; + } + + // previous node is the far right node in left subtree + if (ref->left != nullptr && ref->left != last) { + return get_tail(ref->left); + } + + // if the node is right child of another, the parent is the previous node + if (ref->parent != nullptr && ref->parent->right == ref) + return ref->parent; + return nullptr; + } + + // Get size of node + // Returns size of node if node exist and 0 else + uint64_t + get_size(line_ref_node_t *node) + { + if (node != nullptr) + return node->size; + return 0; + } + + // Recalculate size of node. + void + recalc_size(line_ref_node_t *node) + { + node->size = get_size(node->left) + get_size(node->right) + 1; + } + + // Make left rotate of the subtree. + void + left_rotate(line_ref_node_t *node) + { + line_ref_node_t *new_parent = node->right; + if (new_parent != nullptr) { + node->right = new_parent->left; + if (new_parent->left != nullptr) + new_parent->left->parent = node; + new_parent->parent = node->parent; + } + + if (node->parent == nullptr) + root_ = new_parent; + else if (node == node->parent->left) + node->parent->left = new_parent; + else + node->parent->right = new_parent; + if (new_parent != nullptr) + new_parent->left = node; + node->parent = new_parent; + recalc_size(node); + recalc_size(new_parent); + } + + // Make right rotate of the subtree. + void + right_rotate(line_ref_node_t *node) + { + line_ref_node_t *new_parent = node->left; + if (new_parent != nullptr) { + node->left = new_parent->right; + if (new_parent->right != nullptr) + new_parent->right->parent = node; + new_parent->parent = node->parent; + } + if (node->parent == nullptr) + root_ = new_parent; + else if (node == node->parent->left) + node->parent->left = new_parent; + else + node->parent->right = new_parent; + if (new_parent != nullptr) + new_parent->right = node; + node->parent = new_parent; + recalc_size(node); + recalc_size(new_parent); + } + + // Move the node to the root using rotate operations. + void + splay(line_ref_node_t *node) + { + while (node->parent != nullptr) { + if (node->parent->parent == nullptr) { + if (node->parent->left == node) + right_rotate(node->parent); + else + left_rotate(node->parent); + } else if (node->parent->left == node && + node->parent->parent->left == node->parent) { + right_rotate(node->parent->parent); + right_rotate(node->parent); + } else if (node->parent->right == node && + node->parent->parent->right == node->parent) { + left_rotate(node->parent->parent); + left_rotate(node->parent); + } else if (node->parent->left == node && + node->parent->parent->right == node->parent) { + right_rotate(node->parent); + left_rotate(node->parent); + } else { + left_rotate(node->parent); + right_rotate(node->parent); + } + } } };