libstdc++/regex: Replace __dfs_mode template parameter with run-time flag

That _Executor's __dfs_mode flag is a template parameter means
__regex_algo_impl has to instantiate two different (but largely the
same) versions of _Executor even though the BFS version is rarely used.
And it seems the compiler has trouble DCEing the unused version of
_Executor, which needlessly increases code size and burdens the optimizer.

This patch replaces the template parameter with a run-time data member
_M_search_mode.  We in turn need to inline the _Executor::_State_info
member functions and data members into _Executor so that they depend on
the run-time instead of compile-time flag.  This means _Executor is
three pointers bigger in DFS mode (due to the unused _M_match_queue and
_M_visited_states members), which we can reduce to one pointer if we
really want to, but that doesn't seem ver worthwhile.

The __dfs_mode template parameter is now unused but not yet removed,
that'll be done in the next patch.

After this patch, both code size and run time for the microbenchmark

    for (int i = 0; i < 10000; i++)
      regex_match(string(200, 'a'), regex("(a|b|c)*"));

decreases by about 15% each (with -O2).  Compile time/memory use decreases
by 5-10%.

libstdc++-v3/ChangeLog:

	* include/bits/regex.tcc (__regex_algo_impl): Pass __use_dfs
	parameter to _Executor's constructor.
	* include/bits/regex_executor.h (_Executor::_Search_mode): New.
	(_Executor::_Executor): Add __use_dfs parameter and initialize
	_M_search_mode.  Adjust after inlining _State_info members into
	_Executor.
	(_Executor::~_Executor): Free _M_visted_states.
	(_Executor::_M_main): Adjust after renaming _M_main_dispatch
	overloads to _M_main_dfs and _M_main_bfs.
	(_Executor::_State_info): Remove.
	(_Executor::_M_visited): Inlined from _State_info.
	(_Executor::_M_get_sol_pos): Likewise.
	(_Executor::_M_states): Remove.
	(_Executor::_M_start): Inlined from _State_info.
	(_Executor::_M_sol_pos): Likewise.
	(_Executor::_M_match_queue): Likewise.
	(_Executor::_M_search_mode): New.
	* include/bits/regex_executor.tcc (_Executor::_M_main_dispatch):
	Renamed to...
	(_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these.
	(_Executor::_M_*): Adjust after _M_states removal.
	(_Executor::_M_lookhead): Also adjust _Executor constructor
	call.

Reviewed-by: Tomasz Kamiński <tkaminsk@redhat.com>
Reviewed-by: Jonathan Wakely <jwakely@redhat.com>
This commit is contained in:
Patrick Palka
2026-02-09 20:04:00 -05:00
parent 0c763ab721
commit c49ce07bf0
3 changed files with 78 additions and 110 deletions

View File

@@ -61,26 +61,19 @@ namespace __detail
__m._M_resize(__re._M_automaton->_M_sub_count());
bool __ret;
bool __use_dfs = true;
if ((__re.flags() & regex_constants::__polynomial)
|| (__policy == _RegexExecutorPolicy::_S_alternate
&& !__re._M_automaton->_M_has_backref))
{
_Executor<_BiIter, _Alloc, _TraitsT, false>
__executor(__s, __e, __res, __re, __flags);
if (__match_mode)
__ret = __executor._M_match();
else
__ret = __executor._M_search();
}
__use_dfs = false;
_Executor<_BiIter, _Alloc, _TraitsT, true>
__executor(__s, __e, __res, __re, __flags, __use_dfs);
if (__match_mode)
__ret = __executor._M_match();
else
{
_Executor<_BiIter, _Alloc, _TraitsT, true>
__executor(__s, __e, __res, __re, __flags);
if (__match_mode)
__ret = __executor._M_match();
else
__ret = __executor._M_search();
}
__ret = __executor._M_search();
if (__ret)
{
for (auto& __it : __res)

View File

@@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
bool __dfs_mode>
class _Executor
{
using __search_mode = integral_constant<bool, __dfs_mode>;
using __dfs = true_type;
using __bfs = false_type;
enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
enum class _Match_mode : unsigned char { _Exact, _Prefix };
public:
@@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
_BiIter __end,
_ResultsVec& __results,
const _RegexT& __re,
_FlagT __flags)
_FlagT __flags,
bool __use_dfs)
: _M_cur_results(__results.get_allocator()),
_M_begin(__begin),
_M_end(__end),
@@ -82,14 +80,21 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
_M_nfa(*__re._M_automaton),
_M_results(__results),
_M_rep_count(_M_nfa.size()),
_M_states(_M_nfa._M_start(), _M_nfa.size()),
_M_flags(__flags)
_M_start(_M_nfa._M_start()),
_M_visited_states(nullptr),
_M_flags(__flags),
_M_search_mode(__use_dfs ? _Search_mode::_DFS : _Search_mode::_BFS)
{
using namespace regex_constants;
if (__flags & match_prev_avail) // ignore not_bol and not_bow
_M_flags &= ~(match_not_bol | match_not_bow);
if (_M_search_mode == _Search_mode::_BFS)
_M_visited_states = new bool[_M_nfa.size()];
}
~_Executor()
{ delete[] _M_visited_states; }
// Set matched when string exactly matches the pattern.
bool
_M_match()
@@ -154,13 +159,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
bool
_M_main(_Match_mode __match_mode)
{ return _M_main_dispatch(__match_mode, __search_mode{}); }
{
if (_M_search_mode == _Search_mode::_DFS)
return _M_main_dfs(__match_mode);
else
return _M_main_bfs(__match_mode);
}
bool
_M_main_dispatch(_Match_mode __match_mode, __dfs);
_M_main_dfs(_Match_mode __match_mode);
bool
_M_main_dispatch(_Match_mode __match_mode, __bfs);
_M_main_bfs(_Match_mode __match_mode);
bool
_M_is_word(_CharT __ch) const
@@ -239,62 +249,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
return (_M_re._M_automaton->_M_options() & __m) == __m;
}
// Holds additional information used in BFS-mode.
template<typename _SearchMode, typename _ResultsVec>
struct _State_info;
template<typename _ResultsVec>
struct _State_info<__bfs, _ResultsVec>
{
explicit
_State_info(_StateIdT __start, size_t __n)
: _M_visited_states(new bool[__n]()), _M_start(__start)
{ }
~_State_info() { delete[] _M_visited_states; }
_State_info(const _State_info&) = delete;
_State_info& operator=(const _State_info&) = delete;
bool _M_visited(_StateIdT __i)
bool
_M_visited(_StateIdT __i)
{
if (_M_visited_states)
{
if (_M_visited_states[__i])
return true;
_M_visited_states[__i] = true;
return false;
}
return false;
}
void _M_queue(_StateIdT __i, const _ResultsVec& __res)
{ _M_match_queue.emplace_back(__i, __res); }
// Dummy implementations for BFS mode.
_BiIter* _M_get_sol_pos() { return nullptr; }
// Saves states that need to be considered for the next character.
_GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
// Indicates which states are already visited.
bool* _M_visited_states;
// To record current solution.
_StateIdT _M_start;
};
template<typename _ResultsVec>
struct _State_info<__dfs, _ResultsVec>
{
explicit
_State_info(_StateIdT __start, size_t) : _M_start(__start)
{ }
// Dummy implementations for DFS mode.
bool _M_visited(_StateIdT) const { return false; }
void _M_queue(_StateIdT, const _ResultsVec&) { }
_BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
// To record current solution.
_StateIdT _M_start;
_BiIter _M_sol_pos;
};
_BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
public:
_GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
@@ -306,8 +273,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
const _NFAT& _M_nfa;
_ResultsVec& _M_results;
_GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
_State_info<__search_mode, _ResultsVec> _M_states;
// To record current solution.
_StateIdT _M_start;
_BiIter _M_sol_pos;
// (BFS only) Saves states that need to be considered for the next character.
_GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
// (BFS only) Indicates which states are already visited.
bool* _M_visited_states;
_FlagT _M_flags;
const _Search_mode _M_search_mode;
// Do we have a solution so far?
bool _M_has_sol;
};

View File

@@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
};
// The _M_main function operates in different modes, DFS mode or BFS mode,
// indicated by template parameter __dfs_mode, and dispatches to one of the
// _M_main_dispatch overloads.
// indicated by _M_search_mode, and dispatches to either _M_main_dfs or
// _M_main_bfs.
//
// ------------------------------------------------------------
//
@@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_main_dispatch(_Match_mode __match_mode, __dfs)
_M_main_dfs(_Match_mode __match_mode)
{
_M_has_sol = false;
*_M_states._M_get_sol_pos() = _BiIter();
*_M_get_sol_pos() = _BiIter();
_M_cur_results = _M_results;
_M_dfs(__match_mode, _M_states._M_start);
_M_dfs(__match_mode, _M_start);
return _M_has_sol;
}
@@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// It first computes epsilon closure (states that can be achieved without
// consuming characters) for every state that's still matching,
// using the same DFS algorithm, but doesn't re-enter states (using
// _M_states._M_visited to check), nor follow _S_opcode_match.
// _M_visited to check), nor follow _S_opcode_match.
//
// Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
// Then apply DFS using every _S_opcode_match (in _M_match_queue)
// as the start state.
//
// It significantly reduces potential duplicate states, so has a better
@@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_main_dispatch(_Match_mode __match_mode, __bfs)
_M_main_bfs(_Match_mode __match_mode)
{
_M_states._M_queue(_M_states._M_start, _M_results);
_M_match_queue.emplace_back(_M_start, _M_results);
bool __ret = false;
while (1)
{
_M_has_sol = false;
if (_M_states._M_match_queue.empty())
if (_M_match_queue.empty())
break;
std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
auto __old_queue = std::move(_M_states._M_match_queue);
std::fill_n(_M_visited_states, _M_nfa.size(), false);
auto __old_queue = std::move(_M_match_queue);
auto __alloc = _M_cur_results.get_allocator();
for (auto& __task : __old_queue)
{
@@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
if (__match_mode == _Match_mode::_Exact)
__ret = _M_has_sol;
_M_states._M_match_queue.clear();
_M_match_queue.clear();
return __ret;
}
@@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// We may want to make this faster by not copying,
// but let's not be clever prematurely.
_ResultsVec __what(_M_cur_results);
_Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
__sub._M_states._M_start = __next;
_Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags,
bool(_M_search_mode));
__sub._M_start = __next;
if (__sub._M_search_from_first())
{
for (size_t __i = 0; __i < __what.size(); __i++)
@@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// Greedy.
if (!__state._M_neg)
{
if constexpr (__dfs_mode)
if (_M_search_mode == _Search_mode::_DFS)
// If it's DFS executor and already accepted, we're done.
_M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
_M_current);
@@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
else // Non-greedy mode
{
if constexpr (__dfs_mode)
if (_M_search_mode == _Search_mode::_DFS)
{
// vice-versa.
_M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
@@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
const auto& __state = _M_nfa[__i];
if (_M_current == _M_end)
return;
if constexpr (__dfs_mode)
if (_M_search_mode == _Search_mode::_DFS)
{
if (__state._M_matches(*_M_current))
{
@@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
else
if (__state._M_matches(*_M_current))
_M_states._M_queue(__state._M_next, _M_cur_results);
_M_match_queue.emplace_back(__state._M_next, _M_cur_results);
}
template<typename _BiIter, typename _TraitsT>
@@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
{
static_assert(__dfs_mode, "this should never be instantiated");
__glibcxx_assert(_M_search_mode == _Search_mode::_DFS);
const auto& __state = _M_nfa[__i];
auto& __submatch = _M_cur_results[__state._M_backref_index];
@@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_accept(_Match_mode __match_mode, _StateIdT)
{
if constexpr (__dfs_mode)
if (_M_search_mode == _Search_mode::_DFS)
{
__glibcxx_assert(!_M_has_sol);
if (__match_mode == _Match_mode::_Exact)
@@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
_M_results = _M_cur_results;
else // POSIX
{
__glibcxx_assert(_M_states._M_get_sol_pos());
__glibcxx_assert(_M_get_sol_pos());
// Here's POSIX's logic: match the longest one. However
// we never know which one (lhs or rhs of "|") is longer
// unless we try both of them and compare the results.
@@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
// position of the last successful match. It's better
// to be larger, because POSIX regex is always greedy.
// TODO: This could be slow.
if (*_M_states._M_get_sol_pos() == _BiIter()
|| std::distance(_M_begin,
*_M_states._M_get_sol_pos())
if (*_M_get_sol_pos() == _BiIter()
|| std::distance(_M_begin, *_M_get_sol_pos())
< std::distance(_M_begin, _M_current))
{
*_M_states._M_get_sol_pos() = _M_current;
*_M_get_sol_pos() = _M_current;
_M_results = _M_cur_results;
}
}
@@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_node(_Match_mode __match_mode, _StateIdT __i)
{
if (_M_states._M_visited(__i))
if (_M_visited(__i))
return;
switch (_M_nfa[__i]._M_opcode())
@@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_opcode_match:
_M_handle_match(__match_mode, __i); break;
case _S_opcode_backref:
if constexpr (__dfs_mode)
if (_M_search_mode == _Search_mode::_DFS)
_M_handle_backref(__match_mode, __i);
else
__builtin_unreachable();
@@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
}
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
bool __dummy>
void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>::
_M_dfs(_Match_mode __match_mode, _StateIdT __start)
{
const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS);
_M_frames.emplace_back(_S_fopcode_next, __start);
while (!_M_frames.empty())
@@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_fopcode_fallback_next:
if (_M_has_sol)
break;
if constexpr (__dfs_mode)
if (__dfs_mode)
_M_current = __frame._M_pos;
[[__fallthrough__]];
case _S_fopcode_next:
@@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_fopcode_fallback_rep_once_more:
if (_M_has_sol)
break;
if constexpr (__dfs_mode)
if (__dfs_mode)
_M_current = __frame._M_pos;
[[__fallthrough__]];
case _S_fopcode_rep_once_more:
@@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
case _S_fopcode_posix_alternative:
_M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
_M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
if constexpr (__dfs_mode)
if (__dfs_mode)
_M_current = __frame._M_pos;
_M_has_sol = false;
break;