From bff50816118175cf199ffb753a4949526fc65853 Mon Sep 17 00:00:00 2001 From: Snaipe Date: Sun, 3 May 2015 00:41:58 +0200 Subject: [PATCH 1/3] [Issue #24] Implemented extended shell pattern using a PCRE translator --- Makefile.am | 4 +- samples/Makefile.am | 2 +- src/extmatch.c | 200 ++++++++++++++++++++++++++++++++++++++++++++ src/extmatch.h | 6 ++ src/report.c | 4 +- 5 files changed, 212 insertions(+), 4 deletions(-) create mode 100644 src/extmatch.c create mode 100644 src/extmatch.h diff --git a/Makefile.am b/Makefile.am index 87b4321..63fc36e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,7 +18,7 @@ libcriterion_la_CFLAGS = \ libcriterion_la_LDFLAGS = $(COVERAGE_LDFLAGS) -version-info 1:0:0 # dirty but unless someone has a better alternative... -libcriterion_la_LIBADD = dependencies/csptr/src/libcsptr_la-*.lo +libcriterion_la_LIBADD = dependencies/csptr/src/libcsptr_la-*.lo -lpcre EXTRA_DIST = config.rpath LICENSE @@ -59,6 +59,8 @@ libcriterion_la_SOURCES = \ src/i18n.h \ src/ordered-set.c \ src/posix-compat.c \ + src/extmatch.c \ + src/extmatch.h \ src/main.c TARGET = $(PACKAGE)-$(VERSION) diff --git a/samples/Makefile.am b/samples/Makefile.am index 7ed5f12..e5c5a06 100644 --- a/samples/Makefile.am +++ b/samples/Makefile.am @@ -14,7 +14,7 @@ TESTS_ENVIRONMENT = CRITERION_ALWAYS_SUCCEED=1 check_PROGRAMS := $(BIN_TESTS) CFLAGS = -I$(top_srcdir)/include/ -std=c99 -Wall -Wextra -pedantic -LDADD = -L$(top_srcdir)/ -lcriterion +LDADD = -L$(top_srcdir)/ -lcriterion -lpcre if ENABLE_RT_TESTS BIN_TESTS += with-time diff --git a/src/extmatch.c b/src/extmatch.c new file mode 100644 index 0000000..e76ebec --- /dev/null +++ b/src/extmatch.c @@ -0,0 +1,200 @@ +#include +#include +#include + +#include +#include "criterion/common.h" + +struct context { + int depth; + char *dst; + const char *src; + char old, cur; + int eos; +}; + +void transform_impl(struct context *ctx); + +static inline void transform_rec(struct context *ctx) { + struct context new_ctx = { + .depth = ctx->depth + 1, + .dst = ctx->dst, + .src = ctx->src, + .old = ctx->old, + .eos = ctx->eos, + }; + transform_impl(&new_ctx); + ctx->dst = new_ctx.dst; + ctx->src = new_ctx.src; + ctx->old = new_ctx.old; + ctx->eos = new_ctx.eos; +} + +static inline char read_char(struct context *ctx) { + char c = *ctx->src; + ctx->old = ctx->cur; + ctx->cur = c; + if (c == '\0') + ctx->eos = 1; + ++ctx->src; + return c; +} + +static inline char peek_char(struct context *ctx) { + return *ctx->src; +} + +static inline void copy_char(struct context *ctx, char src) { + *ctx->dst = src; + ++ctx->dst; +} + +static inline void dup_char(struct context *ctx) { + copy_char(ctx, read_char(ctx)); +} + +static inline void copy_str(struct context *ctx, const char *src) { + size_t len = strlen(src); + strncpy(ctx->dst, src, len); + ctx->dst += len; +} + +#define PREPREFIX 0 +#define POSTPREFIX 1 +#define PRESUFFIX 2 +#define POSTSUFFIX 3 +#define ELSESTR 4 + +typedef struct { + int (*validator)(struct context *ctx); + char *str; +} handler_arg; + +static int active() { return 1; } +static int inactive() { return 0; } + +static int is_eos(struct context *ctx) { + return peek_char(ctx) == '\0'; +} + +static inline void handle_special(struct context *ctx, handler_arg strs[5]) { + if (peek_char(ctx) == '(') { + if ((strs[0].validator ?: inactive)(ctx)) + copy_str(ctx, strs[0].str); + dup_char(ctx); + if ((strs[1].validator ?: inactive)(ctx)) + copy_str(ctx, strs[1].str); + + transform_rec(ctx); + + if ((strs[2].validator ?: inactive)(ctx)) + copy_str(ctx,strs[2].str); + copy_char(ctx, ')'); + if ((strs[3].validator ?: inactive)(ctx)) + copy_str(ctx, strs[3].str); + } else if ((strs[4].validator ?: inactive)(ctx)) { + copy_str(ctx, strs[4].str); + } +} + +# define Handler(Name, ...) \ + static void Name(struct context *ctx, UNUSED char c) { \ + handle_special(ctx, (handler_arg[5]) { \ + __VA_ARGS__ \ + }); \ + } + +# define _ active +Handler(handle_plus, [POSTSUFFIX] = {_, "+"}, [ELSESTR] = {_, "+" }); +Handler(handle_star, [POSTSUFFIX] = {_, "*"}, [ELSESTR] = {_, ".*"}); +Handler(handle_wild, [POSTSUFFIX] = {_, "?"}, [ELSESTR] = {_, "." }); +Handler(handle_excl, + [POSTPREFIX] = {_, "?!"}, + [PRESUFFIX] = {is_eos, "$" }, + [POSTSUFFIX] = {_, ".*"}, + [ELSESTR] = {_, "!" } + ); +Handler(handle_at, [ELSESTR] = {_, "@"}); +# undef _ + +static void handle_rbra(struct context *ctx, UNUSED char c) { + copy_char(ctx, c); + if (peek_char(ctx) == '!') { + read_char(ctx); + copy_char(ctx, '^'); + } +} + +static void escape_char(struct context *ctx, char c) { + copy_char(ctx, '\\'); + copy_char(ctx, c); +} + +static void escape_pipe(struct context *ctx, UNUSED char c) { + if (ctx->depth == 0) + copy_char(ctx, '\\'); + copy_char(ctx, '|'); +} + +typedef void (*f_handler)(struct context *, char); + +void transform_impl(struct context *ctx) { + static f_handler handlers[] = { + ['+'] = handle_plus, + ['*'] = handle_star, + ['?'] = handle_wild, + ['!'] = handle_excl, + ['['] = handle_rbra, + ['@'] = handle_at, + + ['.'] = escape_char, + ['('] = escape_char, + [')'] = escape_char, + ['|'] = escape_pipe, + }; + for (char c = read_char(ctx); !ctx->eos; c = read_char(ctx)) { + f_handler handler = handlers[(unsigned char) c]; + + if (ctx->old == '\\') + handler = copy_char; + + if (c == ')' && ctx->depth > 0) + return; + + (handler ?: copy_char)(ctx, c); + + if (ctx->eos) + return; + } + if (ctx->depth > 0) { + puts("pattern error: mismatching parenthesis"); + exit(1); + } +} + +static void transform(const char *pattern, char *result) { + struct context ctx = { + .src = pattern, + .dst = result, + }; + copy_char(&ctx, '^'); + transform_impl(&ctx); + copy_char(&ctx, '$'); + copy_char(&ctx, '\0'); +} + +int extmatch(const char *pattern, const char *string) { + char regex[strlen(pattern) * 2]; + transform(pattern, regex); + + const char *errmsg; + int erroffset; + pcre *preg = pcre_compile(regex, 0, &errmsg, &erroffset, NULL); + if (preg) { + int res = pcre_exec(preg, NULL, string, strlen(string), 0, 0, NULL, 0); + pcre_free(preg); + return res; + } + printf("pattern error: %s\n", errmsg); + exit(1); +} diff --git a/src/extmatch.h b/src/extmatch.h new file mode 100644 index 0000000..3c6f528 --- /dev/null +++ b/src/extmatch.h @@ -0,0 +1,6 @@ +#ifndef EXTMATCH_H_ +# define EXTMATCH_H_ + +int extmatch(const char *pattern, const char *string); + +#endif /* !EXTMATCH_H_ */ diff --git a/src/report.c b/src/report.c index ea13806..847600d 100644 --- a/src/report.c +++ b/src/report.c @@ -33,7 +33,7 @@ #include "config.h" #ifdef HAVE_FNMATCH -#include +#include "extmatch.h" #endif #define IMPL_CALL_REPORT_HOOKS(Kind) \ @@ -60,7 +60,7 @@ void disable_unmatching(struct criterion_test_set *set) { continue; FOREACH_SET(struct criterion_test *test, s->tests) { - if (fnmatch(criterion_options.pattern, test->data->identifier_, 0)) + if (extmatch(criterion_options.pattern, test->data->identifier_)) test->data->disabled = true; } } From 1dcba90eb3b3d8f2315426cebf655d8fd3fa5ceb Mon Sep 17 00:00:00 2001 From: Snaipe Date: Sun, 3 May 2015 16:02:26 +0200 Subject: [PATCH 2/3] Added optimal max pattern length and proof, added better error handling --- src/extmatch.c | 94 ++++++++++++++++++++++++++++++++++++++------------ src/extmatch.h | 2 +- src/report.c | 8 ++++- 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/src/extmatch.c b/src/extmatch.c index e76ebec..486a612 100644 --- a/src/extmatch.c +++ b/src/extmatch.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "criterion/common.h" @@ -11,6 +12,8 @@ struct context { const char *src; char old, cur; int eos; + const char **errmsg; + jmp_buf jmp; }; void transform_impl(struct context *ctx); @@ -167,34 +170,81 @@ void transform_impl(struct context *ctx) { return; } if (ctx->depth > 0) { - puts("pattern error: mismatching parenthesis"); - exit(1); + *ctx->errmsg = "mismatching parenthesis"; + longjmp(ctx->jmp, -1); // abort operation } } -static void transform(const char *pattern, char *result) { +static int transform(const char *pattern, char *result, const char **errmsg) { struct context ctx = { .src = pattern, .dst = result, + .errmsg = errmsg, }; - copy_char(&ctx, '^'); - transform_impl(&ctx); - copy_char(&ctx, '$'); - copy_char(&ctx, '\0'); -} - -int extmatch(const char *pattern, const char *string) { - char regex[strlen(pattern) * 2]; - transform(pattern, regex); - - const char *errmsg; - int erroffset; - pcre *preg = pcre_compile(regex, 0, &errmsg, &erroffset, NULL); - if (preg) { - int res = pcre_exec(preg, NULL, string, strlen(string), 0, 0, NULL, 0); - pcre_free(preg); - return res; + if (!setjmp(ctx.jmp)) { + copy_char(&ctx, '^'); + transform_impl(&ctx); + copy_char(&ctx, '$'); + copy_char(&ctx, '\0'); + return 0; } - printf("pattern error: %s\n", errmsg); - exit(1); + return -1; +} + +/* + * let T be the transformation function, + * let diff be the function that yields the greatest character difference + * before and after its parameter has been transformed by T. + * + * T('*') = '.*'; diff('*') = 1 + * T('!()') = '(?!).*' or '(?!$).*'; diff('!()') = 4 + * T('@') = '' or '@'; diff('@') = 0 + * T('|') = '|' or '\|'; diff('|') = 1 + * T('.') = '\.'; diff('.') = 1 + * T('(') = '\('; diff('(') = 1 + * T(')') = '\)'; diff(')') = 1 + * for every other 1 character string s, we have T(s) = s; hence diff(s) = 0 + * + * let num be the function that yields the number of occurences of a string. + * let spec be the set {(s, num(s)) | ∀s} + * ∀s, lenght(T(s)) = length(s) + Σ((c_i, n_i) ∈ spec, n_i * diff(c_i)) + * + * let S = {'*', '!()', '|', '.', '(', ')'}. + * since ∀s ∉ S, diff(s) = 0, we can simplify the above equation as: + * + * ∀s, lenght(T(s)) = length(s) + num('*') + num('|') + num('.') + * + num('(') + num(')') + 4 * num('!()'). + * + * We must now find the maximal length L such as ∀s, L >= length(T(s)) + * + * It is immediately apparent that the largest string will depend on the number + * of occurrences of '!()'. Hence, let s be a string that is a repeating + * sequence of '!()', + * + * let N = floor(length(s) / 3), + * let Q = length(s) mod 3, + * ∀s, num('!()') = N (1) + * + * ∀s, length(T(s)) <= length(s) + 4 * N + * <= 3 * N + Q + 4 * N + * <= 7 * N + 2 + * <= 7 * floor(length(s) / 3) + 2 + * + */ +static inline size_t max_length(size_t len) { + return 7 * len / 3 + 2; +} + +int extmatch(const char *pattern, const char *string, const char **errmsg) { + char regex[max_length(strlen(pattern))]; + if (transform(pattern, regex, errmsg) != -1) { + int erroffset; + pcre *preg = pcre_compile(regex, 0, errmsg, &erroffset, NULL); + if (preg) { + int res = pcre_exec(preg, NULL, string, strlen(string), 0, 0, NULL, 0); + pcre_free(preg); + return res; + } + } + return -10; } diff --git a/src/extmatch.h b/src/extmatch.h index 3c6f528..d65eeb1 100644 --- a/src/extmatch.h +++ b/src/extmatch.h @@ -1,6 +1,6 @@ #ifndef EXTMATCH_H_ # define EXTMATCH_H_ -int extmatch(const char *pattern, const char *string); +int extmatch(const char *pattern, const char *string, const char **errmsg); #endif /* !EXTMATCH_H_ */ diff --git a/src/report.c b/src/report.c index 847600d..4ebbd8f 100644 --- a/src/report.c +++ b/src/report.c @@ -60,8 +60,14 @@ void disable_unmatching(struct criterion_test_set *set) { continue; FOREACH_SET(struct criterion_test *test, s->tests) { - if (extmatch(criterion_options.pattern, test->data->identifier_)) + const char *errmsg; + int ret = extmatch(criterion_options.pattern, test->data->identifier_, &errmsg); + if (ret == -10) { + printf("pattern error: %s\n", errmsg); + exit(1); + } else if (ret < 0) { test->data->disabled = true; + } } } } From e8443cd07126425aad3f97fd734582e48e4bf712 Mon Sep 17 00:00:00 2001 From: Snaipe Date: Sun, 3 May 2015 16:19:23 +0200 Subject: [PATCH 3/3] Fixed wrong step in max_length proof --- src/extmatch.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/extmatch.c b/src/extmatch.c index 486a612..f03154d 100644 --- a/src/extmatch.c +++ b/src/extmatch.c @@ -218,21 +218,26 @@ static int transform(const char *pattern, char *result, const char **errmsg) { * We must now find the maximal length L such as ∀s, L >= length(T(s)) * * It is immediately apparent that the largest string will depend on the number - * of occurrences of '!()'. Hence, let s be a string that is a repeating - * sequence of '!()', + * of occurrences of '!()'. Hence, let u be a string that is a repeating + * sequence of '!()' padded by '.' to a multiple of 3, * - * let N = floor(length(s) / 3), - * let Q = length(s) mod 3, - * ∀s, num('!()') = N (1) + * let N = floor(length(u) / 3), + * let Q = length(u) mod 3, + * hence num('!()') = N. * - * ∀s, length(T(s)) <= length(s) + 4 * N - * <= 3 * N + Q + 4 * N - * <= 7 * N + 2 - * <= 7 * floor(length(s) / 3) + 2 + * ∀s | lenght(s) = length(u), + * length(T(s)) <= length(T(u)) + * <= length(u) | the original length + * + 4 * N | the expansion of all '!()' + * + Q * diff('.') | the expansion of Q '.' + * <= 3 * N + Q + 4 * N + Q + * <= 7 * N + 4 + * <= 7 * floor(length(u) / 3) + 4 + * <= 7 * floor(length(s) / 3) + 4 * */ static inline size_t max_length(size_t len) { - return 7 * len / 3 + 2; + return 7 * len / 3 + 4; } int extmatch(const char *pattern, const char *string, const char **errmsg) {