*Depending on algorithm used internally.
*There are many slightly different dialects (Perl, ECMA, Posix).
// compile the RE only once
std::regex re{"abc|[0-9]+"};
bool match(std::string_view sv) {
return std::regex_match(sv.begin(), sv.end(), re);
}
const unsigned char * p = "^[0-9]{4,16}?[aA]";
int err = 0;
size_t errOff = 0;
pcre2_code * re = pcre2_compile(p, PCRE2_ZERO_TERMINATED,
0, &err, &errOff, NULL);
int match(const char * str, size_t sz) {
return pcre2_match(re, str, sz, 0, 0, NULL, NULL);
}
// TODO: don't forget to call pcre2_code_free at the end ;)
You can write a finite state machine by yourself.
But that's hard to write properly.
You can easily introduce an error.
You can use an external utility to generate the parser.
bool match(std::string_view sv) noexcept {
return std::regex_match<"abc|[0-9]+">(sv);
}
This is possible to implement with C++20's language support
for Non Type Template Parameters.
bool match(std::string_view sv) noexcept {
return ctre::match<"abc|[0-9]+">(sv);
}
Disabled for compilers without class NTTP support.
static constexpr ctre::fixed_string
ptn = "abc|[0-9]+";
bool match(std::string_view sv) noexcept {
return ctre::match<ptn>(sv);
}
bool match(std::string_view sv) noexcept {
using namespace ctre::literals;
return "abc|[0-9]+"_ctre.match(sv);
}
static_assert(
ctre::match<"[0-9]+\\.[0-9]+">("123.456"sv)
);
static_assert(
ctre::match<"[0-9]+\\.([0-9]+)">("123.456"sv)
.get<1>() == "456"sv
);
struct date { unsigned year, month, day; };
std::optional<date> extract(std::string_view sv) noexcept {
if (auto re = ctre::match<"(\\n+)/(\\n{1,2})/(\\n{1,2})">(sv)) {
return date{
conv(re.get<0>()),
conv(re.get<1>()),
conv(re.get<2>())
};
}
return std::nullopt;
}
struct name { std::string_view first, family; };
std::optional<name> extract(std::string_view sv) noexcept {
if (auto [re,f,l] = ctre::match<"([A-Za-z]++),([A-Za-z]++)">(sv); re) {
return name{f,l};
} else {
return std::nullopt;
}
}
And a complete std::regex implementation is included :)
static inline constexpr ctre::fixed_string pattern = ")hello";
bool fnc(std::string_view view) {
return ctre::match<pattern>(view);
}
In file included from test.cpp:1: In file included from include/ctre.hpp:5: include/ctre/functions.hpp:48:2: error: static_assert failed due to requirement 'correct' "Regular Expression contains syntax error." static_assert(correct, "Regular Expression contains syntax error."); ^ ~~~~~~~ test.cpp:21:15: note: in instantiation of function template specialization 'ctre::match<&pattern>' requested here return ctre::match<pattern>(view); ^
( | ) | * | + | ? | \ | a | d | | | other | ε | |
---|---|---|---|---|---|---|---|---|---|---|---|
→ S | ( alt0 ) mod seq alt | \ esc mod seq alt | a char mod seq alt | d char mod seq alt | other char mod seq alt | ε | |||||
alt0 | ( alt0 ) mod seq alt | \ esc mod seq alt | a char mod seq alt | d char mod seq alt | other char mod seq alt | ||||||
alt | ε | | seq0 alt alt | ε | ||||||||
esc | a alpha | d digit | |||||||||
mod | ε | ε | * star | + plus | ? opt | ε | ε | ε | ε | ε | ε |
seq0 | ( alt0 ) mod seq | \ esc mod seq | a char mod seq | d char mod seq | other char mod seq | ||||||
seq | ( alt0 ) mod seq seq | ε | \ esc mod seq seq | a char mod seq seq | d char mod seq seq | ε | other char mod seq seq | ε | |||
( | pop | ||||||||||
) | pop | ||||||||||
* | pop | ||||||||||
+ | pop | ||||||||||
? | pop | ||||||||||
\ | pop | ||||||||||
a | pop | ||||||||||
d | pop | ||||||||||
| | pop | ||||||||||
other | pop | ||||||||||
Z0 | accept |
struct S {};
struct alt0 {};
struct alt {};
struct esc {};
struct mod {};
struct seq0 {};
struct seq {};
using start_symbol = S;
// f (symbol, char) → (...)
auto f(symbol, term<'c'>) -> list{...};
// f (symbol, symbol) → pop input
template <auto S> auto f(term<S>, term<S>) -> pop_input;
// f (symbol, char) → reject
auto f(...) -> reject;
// f (Z0, ε) → accept
auto f(empty_stack, epsilon) -> accept;
struct my_grammar {
struct S {};
struct alt0 {};
struct alt {};
// ...
using start_symbol = S;
auto f(...) -> reject;
// ...
}
constexpr bool ok = parser<my_grammar, "^hello">::correct;
template <typename Grammar, ...> struct parser {
//...
auto next_move = Grammar::f(top_of_stack, current_char);
//...
}
template <..., fixed_string Str> struct parser {
// ...
template <size_t Pos> constexpr auto get_character() const {
if constexpr (Pos < Str.size()) return term<Str[Pos]>{};
else return epsilon{};
}
// ...
};
template <typename CharT, size_t N> struct fixed_string {
std::array<CharT, N> data; // constexpr constructor from const char[N]
constexpr auto operator[](size_t i) const noexcept { return data[i]; }
constexpr size_t size() const noexcept { return N; }
constexpr auto operator<=>(const fixed_string &) = default;
};
template <typename CharT, size_t N>
fixed_string(const CharT[N]) -> fixed_string<CharT, N>;
// more info about class NTTP in p0732 by Jeff Snyder and Louis Dionne
template <typename... Ts> struct list { };
template <typename... Ts, typename... As>
constexpr auto push(list<Ts...>, As...) -> list<As..., Ts...>;
template <typename T, typename... As>
constexpr auto pop(list<T, Ts...>) -> list<Ts...>;
template <typename T, typename... Ts>
constexpr auto top(list<T, Ts...>) -> T;
struct empty { };
constexpr auto top(list<>) -> empty;
constexpr bool ok = parser<my_grammar, "there$">::correct;
template <typename Grammar, fixed_string Str> struct parser { static constexpr bool correct = parse(list<Grammar::start_symbol>());
// prepare each step and move to next
template <size_t Pos = 0, typename S>
static constexpr bool parse(S stack) {
using next_stack = decltype( pop(stack) );
using symbol = decltype( top(stack) ); using current = decltype( get_character<Pos>() ); using next_move = decltype( Grammar::f(symbol{}, current{}) ); return move<Pos>(next_move{}, next_stack{}); }
//...
// pushing something to stack (epsilon case included)
template <size_t Pos, typename... Push, typename Stack> static constexpr bool move(list<Push...>, Stack stack) { using next_stack = decltype( push(stack, Push{}...) )
return parse<Pos>(next_stack{});}
// move to next character
template <size_t Pos, typename Stack> static constexpr bool move(pop_input, Stack stack) { return parse<Pos+1>(stack);}
//...
template <size_t Pos, typename Stack> static constexpr bool move(reject, Stack) { return false;
}
template <size_t Pos, typename Stack> static constexpr bool move(accept, Stack) { return true;
}
}; // end of Parser struct
( | ) | * | + | ? | \ | a | d | | | other | ε | |
---|---|---|---|---|---|---|---|---|---|---|---|
→ S | ( alt0 ) mod seq alt | \ esc mod seq alt | a char mod seq alt | d char mod seq alt | other char mod seq alt | ε | |||||
alt0 | ( alt0 ) mod seq alt | \ esc mod seq alt | a char mod seq alt | d char mod seq alt | other char mod seq alt | ||||||
alt | ε | | seq0 alt alt | ε | ||||||||
esc | a alpha | d digit | |||||||||
mod | ε | ε | * star | + plus | ? opt | ε | ε | ε | ε | ε | ε |
seq0 | ( alt0 ) mod seq | \ esc mod seq | a char mod seq | d char mod seq | other char mod seq | ||||||
seq | ( alt0 ) mod seq seq | ε | \ esc mod seq seq | a char mod seq seq | d char mod seq seq | ε | other char mod seq seq | ε | |||
( | pop | ||||||||||
) | pop | ||||||||||
* | pop | ||||||||||
+ | pop | ||||||||||
? | pop | ||||||||||
\ | pop | ||||||||||
a | pop | ||||||||||
d | pop | ||||||||||
| | pop | ||||||||||
other | pop | ||||||||||
Z0 | accept |
struct my_grammar {
struct _char: action {};
struct alpha: action {};
struct digit: action {};
struct seq: action {};
struct star: action {};
struct plus: action {};
struct opt: action {};
// ...
}
template <typename Grammar, fixed_string Str> struct parser {
template <size_t Pos = 0, typename S, typename T = list<>>
static constexpr auto parse(S stack, T subject = {}) {
using next_stack = decltype( pop(stack) );
using symbol = decltype( top(stack) ); if constexpr (SemanticAction<symbol>) {
using previous = decltype( prev_character<Pos>() );
using next_subject = decltype( modify(symbol{}, previous{}, subject) );
return parse<Pos>(next_stack(), next_subject{});
} else { using current = decltype( get_character<Pos>() );
using next_move = decltype( Grammar::f(symbol{}, current{}) );
return move<Pos>(next_move{}, next_stack{}, subject);
}
}
// pushing something to stack (epsilon case included)
template <size_t Pos, typename... Push, typename Stack, typename T>
static constexpr auto move(list<Push...>, Stack stack, T subject) {
using next_stack = decltype( push(stack, Push{}...) )
return parse<Pos>(next_stack{}, subject);
}
// move to next character
template <size_t Pos, typename Stack, typename T>
static constexpr auto move(pop_input, Stack stack, T subject) {
return parse<Pos+1>(stack, subject);
}
template <size_t Pos, typename Stack, typename T>
static constexpr auto move(reject, Stack, T subject) { return pair{false, subject};}
template <size_t Pos, typename Stack, typename T>
static constexpr auto move(accept, Stack, T subject) { return pair{true, subject};}
}; // end of Parser struct
template <typename Grammar, fixed_string Str> struct parser {
static constexpr pair result
= parse(list<Grammar::start_symbol>(), MYSTERY);
static constexpr auto correct = result.first;
using output_type = decltype(result.second);
// ...
};
using T = parser<my_grammar, "^wow$">::output_type;
template <auto C, typename... Ts>
auto modify(my_grammar::_char, term<C>, list<Ts...>)
-> list<ch<C>, Ts...>;
(Notice the term and ch types.)
template <auto C, typename Opt, typename... Ts>
auto modify(my_grammar::opt, term<C>, list<Opt, Ts...>)
-> list<opt<Opt>, Ts...>;
template <auto C, typename Plus, typename... Ts>
auto modify(my_grammar::plus, term<C>, list<Plus, Ts...>)
-> list<plus<Plus>, Ts...>;
template <auto C, typename Star, typename... Ts>
auto modify(my_grammar::star, term<C>, list<Star, Ts...>)
-> list<star<Star>, Ts...>;
template <auto C, typename A, typename B, typename... Ts>
auto modify(my_grammar::seq, term<C>, list<B, A, Ts...>)
-> list<seq<A, B>, Ts...>;
(Notice the switched order of A & B on the stack.)
template <auto C, typename... As, typename B, typename... Ts>
auto modify(my_grammar::seq, term<C>, list<B, seq<As...>, Ts...>)
-> list<seq<As..., B>, Ts...>;
template <auto C, typename A, typename B, typename... Ts>
auto modify(my_grammar::alt, term<C>, list<B, A, Ts...>)
-> list<alt<A, B>, Ts...>;
(Notice the switched order of A & B on the stack.)
template <auto C, typename... As, typename B, typename... Ts>
auto modify(my_grammar::alt, term<C>, list<B, alt<As...>, Ts...>)
-> list<alt<As..., B>, Ts...>;
template <auto C, typename... Ts>
auto modify(my_grammar::alpha, term<C>, list<Ts...>)
-> list<alpha, Ts...>;
template <auto C, typename... Ts>
auto modify(my_grammar::digit, term<C>, list<Ts...>)
-> list<digit, Ts...>;
static_assert(std::is_same_v<
,
parser<my_grammar,"">::output_type
>);
match<"\\a\\d+">("a42"sv);
template <fixed_string re> bool match(std::string_view sv) { static_assert(parser<my_grammar, re>::correct); using RE = parser<my_grammar, re>::output_type;
return match(sv.begin(), sv.begin(), sv.end(), list<RE>{}).success;}
template <ForwardIterator It> struct result {
ForwardIterator it;
bool success;
// constexpr constructor etc...
};
template <ForwardIterator It, auto C, typename... Ts>
result<It> match(It begin, It it, It end, list<ch<C>, Ts...>) {
if (it == end) return {it, false}; if (*it != C) return {it, false};
return match(begin, std::next(it), end, list<Ts...>{});
}
template <ForwardIterator It, typename... Ts>
result<It> match(It begin, It it, It end, list<digit, Ts...>) { if (it == end) return {it, false}; if (!(*it >= '0' && *it <= '9')) return {it, false}; return match(begin, std::next(it), end, list<Ts...>{});}
template <ForwardIterator It, typename... Ts>
result<It> match(It begin, It it, It end, list<alpha, Ts...>) { if (it == end) return {it, false}; if (!(*it >= 'a' && *it <= 'z')) return {it, false}; return match(begin, std::next(it), end, list<Ts...>{});}
template <ForwardIterator It, typename... Seq, typename... Ts>
result<It> match(It begin, It it, It end, list<seq<Seq...>, Ts...>) {
return match(begin, it, end, list<Seq..., Ts...>{});}
template <ForwardIterator It, typename... Opt, typename... Ts>
result<It> match(It begin, It it, It end, list<opt<Opt...>, Ts...>) {
if (auto out = match(begin, it, end, list<Opt..., Ts...>{})) {
return out;
} else {
// try it without the content of opt<...>
return match(begin, it, end, list<Ts...>{}));
}
}
template <ForwardIterator It, typename Head, typename... Tail, typename... Ts>
result<It> match(It begin, It it, It end, list<alt<Head, Tail...>, Ts...>) {
if (auto out = match(begin, it, end, list<Head, Ts...>{})) {
return out;
} else {
// try the next one
return match(begin, it, end, list<alt<Tail...>, Ts...>{}));
}
}
template <ForwardIterator It, typename... Ts>
result<It> match(It begin, It it, It end, list<alt<>, Ts...>) {
// no option from alternation was successful
return {it, false};
}
template <ForwardIterator It, typename First, typename... Alt, typename... Ts>
result<It> match(It begin, It it, It end, list<plus<Plus...>, Ts...>) {
for (;;) {
if (auto inner = match(begin, it, end, list<Plus..., end_of_cycle>{})) {
if (auto out = match(begin, it, end, list<Ts...>{})) {
return out;
} else {
it = inner.it;
}
} else return {it, false};
}
}
struct end_of_cycle {};
template <ForwardIterator It>
result<It> match(It, It it, It, list<end_of_cycle>) {
return {it, true};
}
(The cycle is lazy.)
template <ForwardIterator It, typename First, typename... Alt, typename... Ts>
result<It> match(It begin, It it, It end, list<star<Star...>, Ts...>) {
for (;;) { if (auto out = match(begin, it, end, list<Ts...>{})) {
return out; } else if (auto inner = match(begin, it, end, list<Star..., end_of_cycle>{})) {
if (auto out = match(begin, it, end, list<Ts...>{})) {
return out;
} else {
it = inner.it;
}
} else return {it, false};
}
}
(The cycle is lazy.)
// and at the end... we need to check for the end :)
template <ForwardIterator It>
result<It> match(It begin, It it, It end, list<>{}) {
// if we are at the end input we should accept
return {it, it == end};
}
(Helper for finishing the regex matching.)
int main(int argc, char ** argv) { auto re = PREPARE("PATTERN"); std::ifstream stream{argv[1], std::ifstream::in};
std::string line;
while (std::getline(stream, line)) { if (re.MATCH(line)) { std::cout << line << '\n';
}
}
}
enum class decision {
undecided,
correct,
wrong
};
// move to next character
template <size_t Pos, typename Stack, typename T>
static constexpr auto move(pop_input, Stack stack, T subject) {
// return parse<Pos+1>(stack, subject);
return state<Pos+1, Stack, T, decision::undecided>();
}
// instead of returning pair with subject and boolean flag
template <size_t Pos, typename Stack, typename T>
static constexpr auto move(reject, Stack s, T subject) {
return state<Pos, Stack, T, decision::correct>();
}
template <size_t Pos, typename Stack, typename T>
static constexpr auto move(accept, Stack s, T subject) {
return state<Pos, Stack, T, decision::wrong>();
}
struct placeholder {};
template <size_t Pos, typename Stack, typename Subject, decision Decision>
struct state {
auto operator+(placeholder) {
if (Decision == decision::undecided) {
return parse<Pos>(Stack(), Subject());
} else {
return *this;
}
}
}
template <size_t Idx> using index_placeholder = placeholder;
template <typename Subject> auto tr_parse(Subject s) {
return tr_parse(s, std::make_index_sequence<input.length()>());
}
template <typename Subject, size_t... Idx>
auto tr_parse(Subject s, std::index_sequence<Idx...>) {
return pair(parse<0>(s) + ... + index_placeholder<Idx>());
}
You can find the slides and sources at:
compile-time.re