diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc index 061b60042ec..d16cfd5baf0 100644 --- a/libstdc++-v3/include/bits/regex.tcc +++ b/libstdc++-v3/include/bits/regex.tcc @@ -61,26 +61,19 @@ namespace __detail __m._M_resize(__re._M_automaton->_M_sub_count()); bool __ret; + bool __use_dfs = true; if ((__re.flags() & regex_constants::__polynomial) || (__policy == _RegexExecutorPolicy::_S_alternate && !__re._M_automaton->_M_has_backref)) - { - _Executor<_BiIter, _Alloc, _TraitsT, false> - __executor(__s, __e, __res, __re, __flags); - if (__match_mode) - __ret = __executor._M_match(); - else - __ret = __executor._M_search(); - } + __use_dfs = false; + + _Executor<_BiIter, _Alloc, _TraitsT, true> + __executor(__s, __e, __res, __re, __flags, __use_dfs); + if (__match_mode) + __ret = __executor._M_match(); else - { - _Executor<_BiIter, _Alloc, _TraitsT, true> - __executor(__s, __e, __res, __re, __flags); - if (__match_mode) - __ret = __executor._M_match(); - else - __ret = __executor._M_search(); - } + __ret = __executor._M_search(); + if (__ret) { for (auto& __it : __res) diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 94048e1f632..2638df4d5af 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) bool __dfs_mode> class _Executor { - using __search_mode = integral_constant; - using __dfs = true_type; - using __bfs = false_type; - + enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 }; enum class _Match_mode : unsigned char { _Exact, _Prefix }; public: @@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _BiIter __end, _ResultsVec& __results, const _RegexT& __re, - _FlagT __flags) + _FlagT __flags, + bool __use_dfs) : _M_cur_results(__results.get_allocator()), _M_begin(__begin), _M_end(__end), @@ -82,14 +80,21 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _M_nfa(*__re._M_automaton), _M_results(__results), _M_rep_count(_M_nfa.size()), - _M_states(_M_nfa._M_start(), _M_nfa.size()), - _M_flags(__flags) + _M_start(_M_nfa._M_start()), + _M_visited_states(nullptr), + _M_flags(__flags), + _M_search_mode(__use_dfs ? _Search_mode::_DFS : _Search_mode::_BFS) { using namespace regex_constants; if (__flags & match_prev_avail) // ignore not_bol and not_bow _M_flags &= ~(match_not_bol | match_not_bow); + if (_M_search_mode == _Search_mode::_BFS) + _M_visited_states = new bool[_M_nfa.size()]; } + ~_Executor() + { delete[] _M_visited_states; } + // Set matched when string exactly matches the pattern. bool _M_match() @@ -154,13 +159,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) bool _M_main(_Match_mode __match_mode) - { return _M_main_dispatch(__match_mode, __search_mode{}); } + { + if (_M_search_mode == _Search_mode::_DFS) + return _M_main_dfs(__match_mode); + else + return _M_main_bfs(__match_mode); + } bool - _M_main_dispatch(_Match_mode __match_mode, __dfs); + _M_main_dfs(_Match_mode __match_mode); bool - _M_main_dispatch(_Match_mode __match_mode, __bfs); + _M_main_bfs(_Match_mode __match_mode); bool _M_is_word(_CharT __ch) const @@ -239,62 +249,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) return (_M_re._M_automaton->_M_options() & __m) == __m; } - // Holds additional information used in BFS-mode. - template - struct _State_info; - - template - struct _State_info<__bfs, _ResultsVec> - { - explicit - _State_info(_StateIdT __start, size_t __n) - : _M_visited_states(new bool[__n]()), _M_start(__start) - { } - - ~_State_info() { delete[] _M_visited_states; } - - _State_info(const _State_info&) = delete; - _State_info& operator=(const _State_info&) = delete; - - bool _M_visited(_StateIdT __i) + bool + _M_visited(_StateIdT __i) + { + if (_M_visited_states) { if (_M_visited_states[__i]) return true; _M_visited_states[__i] = true; - return false; } + return false; + } - void _M_queue(_StateIdT __i, const _ResultsVec& __res) - { _M_match_queue.emplace_back(__i, __res); } - - // Dummy implementations for BFS mode. - _BiIter* _M_get_sol_pos() { return nullptr; } - - // Saves states that need to be considered for the next character. - _GLIBCXX_STD_C::vector> _M_match_queue; - // Indicates which states are already visited. - bool* _M_visited_states; - // To record current solution. - _StateIdT _M_start; - }; - - template - struct _State_info<__dfs, _ResultsVec> - { - explicit - _State_info(_StateIdT __start, size_t) : _M_start(__start) - { } - - // Dummy implementations for DFS mode. - bool _M_visited(_StateIdT) const { return false; } - void _M_queue(_StateIdT, const _ResultsVec&) { } - - _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } - - // To record current solution. - _StateIdT _M_start; - _BiIter _M_sol_pos; - }; + _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } public: _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames; @@ -306,8 +273,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) const _NFAT& _M_nfa; _ResultsVec& _M_results; _GLIBCXX_STD_C::vector> _M_rep_count; - _State_info<__search_mode, _ResultsVec> _M_states; + // To record current solution. + _StateIdT _M_start; + _BiIter _M_sol_pos; + // (BFS only) Saves states that need to be considered for the next character. + _GLIBCXX_STD_C::vector> _M_match_queue; + // (BFS only) Indicates which states are already visited. + bool* _M_visited_states; _FlagT _M_flags; + const _Search_mode _M_search_mode; // Do we have a solution so far? bool _M_has_sol; }; diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 19b5ad27df4..0ae7bdf6839 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) }; // The _M_main function operates in different modes, DFS mode or BFS mode, - // indicated by template parameter __dfs_mode, and dispatches to one of the - // _M_main_dispatch overloads. + // indicated by _M_search_mode, and dispatches to either _M_main_dfs or + // _M_main_bfs. // // ------------------------------------------------------------ // @@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) template bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_main_dispatch(_Match_mode __match_mode, __dfs) + _M_main_dfs(_Match_mode __match_mode) { _M_has_sol = false; - *_M_states._M_get_sol_pos() = _BiIter(); + *_M_get_sol_pos() = _BiIter(); _M_cur_results = _M_results; - _M_dfs(__match_mode, _M_states._M_start); + _M_dfs(__match_mode, _M_start); return _M_has_sol; } @@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // It first computes epsilon closure (states that can be achieved without // consuming characters) for every state that's still matching, // using the same DFS algorithm, but doesn't re-enter states (using - // _M_states._M_visited to check), nor follow _S_opcode_match. + // _M_visited to check), nor follow _S_opcode_match. // - // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue) + // Then apply DFS using every _S_opcode_match (in _M_match_queue) // as the start state. // // It significantly reduces potential duplicate states, so has a better @@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) template bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_main_dispatch(_Match_mode __match_mode, __bfs) + _M_main_bfs(_Match_mode __match_mode) { - _M_states._M_queue(_M_states._M_start, _M_results); + _M_match_queue.emplace_back(_M_start, _M_results); bool __ret = false; while (1) { _M_has_sol = false; - if (_M_states._M_match_queue.empty()) + if (_M_match_queue.empty()) break; - std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false); - auto __old_queue = std::move(_M_states._M_match_queue); + std::fill_n(_M_visited_states, _M_nfa.size(), false); + auto __old_queue = std::move(_M_match_queue); auto __alloc = _M_cur_results.get_allocator(); for (auto& __task : __old_queue) { @@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } if (__match_mode == _Match_mode::_Exact) __ret = _M_has_sol; - _M_states._M_match_queue.clear(); + _M_match_queue.clear(); return __ret; } @@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // We may want to make this faster by not copying, // but let's not be clever prematurely. _ResultsVec __what(_M_cur_results); - _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); - __sub._M_states._M_start = __next; + _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags, + bool(_M_search_mode)); + __sub._M_start = __next; if (__sub._M_search_from_first()) { for (size_t __i = 0; __i < __what.size(); __i++) @@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // Greedy. if (!__state._M_neg) { - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) // If it's DFS executor and already accepted, we're done. _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next, _M_current); @@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } else // Non-greedy mode { - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) { // vice-versa. _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i, @@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) const auto& __state = _M_nfa[__i]; if (_M_current == _M_end) return; - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) { if (__state._M_matches(*_M_current)) { @@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } else if (__state._M_matches(*_M_current)) - _M_states._M_queue(__state._M_next, _M_cur_results); + _M_match_queue.emplace_back(__state._M_next, _M_cur_results); } template @@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) { - static_assert(__dfs_mode, "this should never be instantiated"); + __glibcxx_assert(_M_search_mode == _Search_mode::_DFS); const auto& __state = _M_nfa[__i]; auto& __submatch = _M_cur_results[__state._M_backref_index]; @@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_accept(_Match_mode __match_mode, _StateIdT) { - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) { __glibcxx_assert(!_M_has_sol); if (__match_mode == _Match_mode::_Exact) @@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _M_results = _M_cur_results; else // POSIX { - __glibcxx_assert(_M_states._M_get_sol_pos()); + __glibcxx_assert(_M_get_sol_pos()); // Here's POSIX's logic: match the longest one. However // we never know which one (lhs or rhs of "|") is longer // unless we try both of them and compare the results. @@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // position of the last successful match. It's better // to be larger, because POSIX regex is always greedy. // TODO: This could be slow. - if (*_M_states._M_get_sol_pos() == _BiIter() - || std::distance(_M_begin, - *_M_states._M_get_sol_pos()) + if (*_M_get_sol_pos() == _BiIter() + || std::distance(_M_begin, *_M_get_sol_pos()) < std::distance(_M_begin, _M_current)) { - *_M_states._M_get_sol_pos() = _M_current; + *_M_get_sol_pos() = _M_current; _M_results = _M_cur_results; } } @@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_node(_Match_mode __match_mode, _StateIdT __i) { - if (_M_states._M_visited(__i)) + if (_M_visited(__i)) return; switch (_M_nfa[__i]._M_opcode()) @@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_opcode_match: _M_handle_match(__match_mode, __i); break; case _S_opcode_backref: - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) _M_handle_backref(__match_mode, __i); else __builtin_unreachable(); @@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } template - void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: + bool __dummy> + void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>:: _M_dfs(_Match_mode __match_mode, _StateIdT __start) { + const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS); _M_frames.emplace_back(_S_fopcode_next, __start); while (!_M_frames.empty()) @@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_fallback_next: if (_M_has_sol) break; - if constexpr (__dfs_mode) + if (__dfs_mode) _M_current = __frame._M_pos; [[__fallthrough__]]; case _S_fopcode_next: @@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_fallback_rep_once_more: if (_M_has_sol) break; - if constexpr (__dfs_mode) + if (__dfs_mode) _M_current = __frame._M_pos; [[__fallthrough__]]; case _S_fopcode_rep_once_more: @@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_posix_alternative: _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol); _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id); - if constexpr (__dfs_mode) + if (__dfs_mode) _M_current = __frame._M_pos; _M_has_sol = false; break;