From cd3e1095b0c77e3a58feff259b7612e9701f6ce4 Mon Sep 17 00:00:00 2001 From: Rick Altherr Date: Sat, 30 Jan 2016 19:25:35 -0800 Subject: [PATCH 1/5] rtlil: improve performance of SigSpec::remove2(SigSpec, SigSpec*) --- kernel/rtlil.cc | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index 7878eaae..91b73715 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -2659,8 +2659,35 @@ void RTLIL::SigSpec::remove(const RTLIL::SigSpec &pattern, RTLIL::SigSpec *other void RTLIL::SigSpec::remove2(const RTLIL::SigSpec &pattern, RTLIL::SigSpec *other) { - pool pattern_bits = pattern.to_sigbit_pool(); - remove2(pattern_bits, other); + if (other) + cover("kernel.rtlil.sigspec.remove_other"); + else + cover("kernel.rtlil.sigspec.remove"); + + unpack(); + if (other != NULL) { + log_assert(width_ == other->width_); + other->unpack(); + } + + for (int i = GetSize(bits_) - 1; i >= 0; i--) { + if (bits_[i].wire == NULL) continue; + + for (auto &pattern_chunk : pattern.chunks()) { + if (bits_[i].wire == pattern_chunk.wire && + bits_[i].offset >= pattern_chunk.offset && + bits_[i].offset < pattern_chunk.offset + pattern_chunk.width) { + bits_.erase(bits_.begin() + i); + width_--; + if (other != NULL) { + other->bits_.erase(other->bits_.begin() + i); + other->width_--; + } + } + } + } + + check(); } void RTLIL::SigSpec::remove(const pool &pattern) From 34969d41405a1ad418b82caa394f880ea0f6243f Mon Sep 17 00:00:00 2001 From: Rick Altherr Date: Sat, 30 Jan 2016 19:26:46 -0800 Subject: [PATCH 2/5] genrtlil: avoid converting SigSpec to set when going through removeSignalFromCaseTree() --- frontends/ast/genrtlil.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontends/ast/genrtlil.cc b/frontends/ast/genrtlil.cc index 87e8379e..876bfbe3 100644 --- a/frontends/ast/genrtlil.cc +++ b/frontends/ast/genrtlil.cc @@ -379,7 +379,7 @@ struct AST_INTERNAL::ProcessGenerator // e.g. when the last statement in the code "a = 23; if (b) a = 42; a = 0;" is processed this // function is called to clean up the first two assignments as they are overwritten by // the third assignment. - void removeSignalFromCaseTree(const std::set &pattern, RTLIL::CaseRule *cs) + void removeSignalFromCaseTree(const RTLIL::SigSpec &pattern, RTLIL::CaseRule *cs) { for (auto it = cs->actions.begin(); it != cs->actions.end(); it++) it->first.remove2(pattern, &it->second); @@ -434,7 +434,7 @@ struct AST_INTERNAL::ProcessGenerator subst_rvalue_map.set(unmapped_lvalue[i], rvalue[i]); } - removeSignalFromCaseTree(lvalue.to_sigbit_set(), current_case); + removeSignalFromCaseTree(lvalue, current_case); remove_unwanted_lvalue_bits(lvalue, rvalue); current_case->actions.push_back(RTLIL::SigSig(lvalue, rvalue)); } @@ -511,7 +511,7 @@ struct AST_INTERNAL::ProcessGenerator subst_rvalue_map.set(this_case_eq_lvalue[i], this_case_eq_ltemp[i]); this_case_eq_lvalue.replace(subst_lvalue_map.stdmap()); - removeSignalFromCaseTree(this_case_eq_lvalue.to_sigbit_set(), current_case); + removeSignalFromCaseTree(this_case_eq_lvalue, current_case); addChunkActions(current_case->actions, this_case_eq_lvalue, this_case_eq_ltemp); } break; From 89dc40f162a7f06d15ad489066dd0cc64937fbd7 Mon Sep 17 00:00:00 2001 From: Rick Altherr Date: Sat, 30 Jan 2016 19:43:29 -0800 Subject: [PATCH 3/5] rtlil: improve performance of SigSpec::replace(SigSpec, SigSpec, SigSpec*) --- kernel/rtlil.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index 91b73715..ca448057 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -2584,18 +2584,26 @@ void RTLIL::SigSpec::replace(const RTLIL::SigSpec &pattern, const RTLIL::SigSpec void RTLIL::SigSpec::replace(const RTLIL::SigSpec &pattern, const RTLIL::SigSpec &with, RTLIL::SigSpec *other) const { + log_assert(other != NULL); + log_assert(width_ == other->width_); log_assert(pattern.width_ == with.width_); pattern.unpack(); with.unpack(); + unpack(); + other->unpack(); - dict rules; + for (int i = 0; i < GetSize(pattern.bits_); i++) { + if (pattern.bits_[i].wire != NULL) { + for (int j = 0; j < GetSize(bits_); j++) { + if (bits_[j] == pattern.bits_[i]) { + other->bits_[j] = with.bits_[i]; + } + } + } + } - for (int i = 0; i < GetSize(pattern.bits_); i++) - if (pattern.bits_[i].wire != NULL) - rules[pattern.bits_[i]] = with.bits_[i]; - - replace(rules, other); + other->check(); } void RTLIL::SigSpec::replace(const dict &rules) From 0265d7b1006e0946eb4635d73a6d49d31b3e4235 Mon Sep 17 00:00:00 2001 From: Rick Altherr Date: Sun, 31 Jan 2016 08:55:49 -0800 Subject: [PATCH 4/5] rtlil: speed up SigSpec::sort_and_unify() std::set<> internally is often a red-black tree which is fairly expensive to create but fast to lookup. In the case of sort_and_unify(), a set<> is constructed as a temporary object to attempt to speed up lookups. Being a temporarily, however, the cost of creation far outweights the lookup improvement and is a net performance loss. Instead, sort the vector<> that already exists and then apply std::unique(). --- kernel/rtlil.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index ca448057..ee0f44a3 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -2573,8 +2573,18 @@ void RTLIL::SigSpec::sort() void RTLIL::SigSpec::sort_and_unify() { + unpack(); cover("kernel.rtlil.sigspec.sort_and_unify"); - *this = this->to_sigbit_set(); + + // A copy of the bits vector is used to prevent duplicating the logic from + // SigSpec::SigSpec(std::vector). This incurrs an extra copy but + // that isn't showing up as significant in profiles. + std::vector unique_bits = bits_; + std::sort(unique_bits.begin(), unique_bits.end()); + auto last = std::unique(unique_bits.begin(), unique_bits.end()); + unique_bits.erase(last, unique_bits.end()); + + *this = unique_bits; } void RTLIL::SigSpec::replace(const RTLIL::SigSpec &pattern, const RTLIL::SigSpec &with) From 3c48de8e218ff70f4b5f8e42a6794a48354ea062 Mon Sep 17 00:00:00 2001 From: Rick Altherr Date: Sun, 31 Jan 2016 09:07:21 -0800 Subject: [PATCH 5/5] rtlil: Improve performance of SigSpec::extract(SigSpec, SigSpec*) Converting to a pool is fairly expensive due to inserts somewhat frequently causing rehashing. Instead, walk through the pattern SigSpec directly on a chunk-by-chunk basis and apply it to this SigSpec's individual bits. Using chunks for the pattern minimizes the number of iterations in the outer loop. --- kernel/rtlil.cc | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index ee0f44a3..1f3f7899 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -2777,8 +2777,37 @@ void RTLIL::SigSpec::remove2(const std::set &pattern, RTLIL::SigS RTLIL::SigSpec RTLIL::SigSpec::extract(const RTLIL::SigSpec &pattern, const RTLIL::SigSpec *other) const { - pool pattern_bits = pattern.to_sigbit_pool(); - return extract(pattern_bits, other); + if (other) + cover("kernel.rtlil.sigspec.extract_other"); + else + cover("kernel.rtlil.sigspec.extract"); + + log_assert(other == NULL || width_ == other->width_); + + RTLIL::SigSpec ret; + std::vector bits_match = to_sigbit_vector(); + + for (auto& pattern_chunk : pattern.chunks()) { + if (other) { + std::vector bits_other = other->to_sigbit_vector(); + for (int i = 0; i < width_; i++) + if (bits_match[i].wire && + bits_match[i].wire == pattern_chunk.wire && + bits_match[i].offset >= pattern_chunk.offset && + bits_match[i].offset < pattern_chunk.offset + pattern_chunk.width) + ret.append_bit(bits_other[i]); + } else { + for (int i = 0; i < width_; i++) + if (bits_match[i].wire && + bits_match[i].wire == pattern_chunk.wire && + bits_match[i].offset >= pattern_chunk.offset && + bits_match[i].offset < pattern_chunk.offset + pattern_chunk.width) + ret.append_bit(bits_match[i]); + } + } + + ret.check(); + return ret; } RTLIL::SigSpec RTLIL::SigSpec::extract(const pool &pattern, const RTLIL::SigSpec *other) const