[augeas-devel] [PATCH 4/6] Remove character ranges from regexps if we are not in the C locale
David Lutterkort
lutter at redhat.com
Fri Oct 23 17:30:11 UTC 2009
Since re_compile_pattern uses the current locale when expanding character
ranges like [a-z], we need to be careful that such ranges are expanded in
the C locale.
* configure.ac: check for uselocale
* src/internal.[ch] (regexp_c_locale): new function
* src/lens.c (digits_string): spell out the digits
* src/lens.c (restrict_regexp): expand char ranges in restricted regexp
* src/lexer.l (regexp_literal): run regexp literals through
regexp_c_locale
* src/regexp.c (regexp_escape): compress character ranges for printing
* src/regexp.c (regexp_minus): expand charcter ranges
---
src/internal.c | 36 ++++++++++++++++++++++++++++++++++++
src/internal.h | 10 ++++++++++
src/lens.c | 8 +++++++-
src/lexer.l | 14 +++++++++++++-
src/regexp.c | 23 ++++++++++++++++++++++-
5 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/src/internal.c b/src/internal.c
index 3ac5350..318c79a 100644
--- a/src/internal.c
+++ b/src/internal.c
@@ -25,9 +25,11 @@
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>
+#include <locale.h>
#include "internal.h"
#include "memory.h"
+#include "fa.h"
#ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -402,6 +404,40 @@ void calc_line_ofs(const char *text, size_t pos, size_t *line, size_t *ofs) {
}
}
+#if HAVE_USELOCALE
+int regexp_c_locale(ATTRIBUTE_UNUSED char **u, ATTRIBUTE_UNUSED size_t *len) {
+ /* On systems with uselocale, we are ok, since we make sure that we
+ * switch to the "C" locale any time we enter through the public API
+ */
+ return 0;
+}
+#else
+int regexp_c_locale(char **u, size_t *len) {
+ /* Without uselocale, we need to expand character ranges */
+ int r;
+ char *s = *u;
+ size_t s_len, u_len;
+ if (len == NULL) {
+ len = &u_len;
+ s_len = strlen(s);
+ } else {
+ s_len = *len;
+ }
+ r = fa_expand_char_ranges(s, s_len, u, len);
+ if (r != 0) {
+ *u = s;
+ *len = s_len;
+ }
+ if (r < 0)
+ return -1;
+ /* Syntax errors will be caught when the result is compiled */
+ if (r > 0)
+ return 0;
+ free(s);
+ return 1;
+}
+#endif
+
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/src/internal.h b/src/internal.h
index 55aa551..fe5b716 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -271,6 +271,16 @@ int xasprintf(char **strp, const char *format, ...);
/* Calculate line and column number of character POS in TEXT */
void calc_line_ofs(const char *text, size_t pos, size_t *line, size_t *ofs);
+/* Take the first LEN characters from the regexp *U and expand any
+ * character ranges in it. The expanded regexp, if expansion is necessary,
+ * is in U, and the old string is freed. If expansion is not needed or an
+ * error happens, U will be unchanged.
+ *
+ * Return 0 if expansion is not necessary, -1 if an error occurs, and 1 if
+ * expansion was needed.
+ */
+int regexp_c_locale(char **u, size_t *len);
+
/* Struct: augeas
* The data structure representing a connection to Augeas. */
struct augeas {
diff --git a/src/lens.c b/src/lens.c
index 9a9b6e2..d4a8b4b 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -50,7 +50,7 @@ static const char *const tags[] = {
};
static const struct string digits_string = {
- .ref = REF_MAX, .str = (char *) "[0-9]+"
+ .ref = REF_MAX, .str = (char *) "[0123456789]+"
};
static const struct string *const digits_pat = &digits_string;
@@ -312,6 +312,12 @@ static struct regexp *restrict_regexp(struct regexp *r) {
if (ret != 0)
return NULL;
+ ret = regexp_c_locale(&nre, &nre_len);
+ if (ret < 0) {
+ free(nre);
+ return NULL;
+ }
+
r = make_regexp(r->info, nre);
if (regexp_compile(r) != 0)
abort();
diff --git a/src/lexer.l b/src/lexer.l
index a62d2c7..a690f07 100644
--- a/src/lexer.l
+++ b/src/lexer.l
@@ -52,6 +52,18 @@ static void loc_update(YYLTYPE *yylloc, const char *s, int len) {
}
}
}
+
+static char *regexp_literal(const char *s, int len) {
+ char *u = unescape(s, len);
+ size_t u_len = strlen(u);
+
+ if (u == NULL)
+ return NULL;
+
+ regexp_c_locale(&u, &u_len);
+
+ return u;
+}
%}
DIGIT [0-9]
@@ -84,7 +96,7 @@ ARROW ->
\/([^/]|\\\/)*\/ {
loc_update(yylloc, yytext, yyleng);
- yylval->string = unescape(yytext+1, yyleng-2);
+ yylval->string = regexp_literal(yytext+1, yyleng-2);
return REGEXP;
}
diff --git a/src/regexp.c b/src/regexp.c
index 3748de7..795e6f0 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -34,7 +34,25 @@ static const struct string empty_pattern_string = {
static const struct string *const empty_pattern = &empty_pattern_string;
char *regexp_escape(const struct regexp *r) {
- char *pat = escape(r->pattern->str, -1);
+ char *pat = NULL;
+
+#if !HAVE_USELOCALE
+ char *nre = NULL;
+ int ret;
+ size_t nre_len;
+
+ /* Use a range with from > to to force conversion of ranges into
+ * short form */
+ ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
+ &nre, &nre_len, 2, 1);
+ if (ret == 0) {
+ pat = escape(nre, nre_len);
+ free(nre);
+ }
+#endif
+
+ if (pat == NULL)
+ pat = escape(r->pattern->str, -1);
if (pat == NULL)
return NULL;
@@ -233,6 +251,9 @@ regexp_minus(struct info *info, struct regexp *r1, struct regexp *r2) {
goto error;
}
+ if (regexp_c_locale(&s, NULL) < 0)
+ goto error;
+
result = make_regexp(info, s);
s = NULL;
--
1.6.2.5
More information about the augeas-devel
mailing list