pcre: enable UCP, UTF-8 (if available), extend t/19-match

2011-09-11 11:40:51 +01:00 · 2011-09-11 11:40:51 +01:00 · c1c1730516
commit c1c1730516
parent 1a91c695e7
2 changed files with 51 additions and 8 deletions
--- a/src/regex.c
+++ b/src/regex.c
@ -21,16 +21,31 @@
 */
 struct regex *regex_new(const char *pattern) {
    const char *error;
-    int offset;
+    int errorcode, offset;
    struct regex *re = scalloc(sizeof(struct regex));
    re->pattern = sstrdup(pattern);
-    if (!(re->regex = pcre_compile(pattern, 0, &error, &offset, NULL))) {
+    /* We use PCRE_UCP so that \B, \b, \D, \d, \S, \s, \W, \w and some POSIX
-        ELOG("PCRE regular expression compilation failed at %d: %s",
+     * character classes play nicely with Unicode */
    int options = PCRE_UCP | PCRE_UTF8;
    while (!(re->regex = pcre_compile2(pattern, options, &errorcode, &error, &offset, NULL))) {
        /* If the error is that PCRE was not compiled with UTF-8 support we
         * disable it and try again */
        if (errorcode == 32) {
            options &= ~PCRE_UTF8;
            continue;
        }
        ELOG("PCRE regular expression compilation failed at %d: %s\n",
             offset, error);
        return NULL;
    }
    re->extra = pcre_study(re->regex, 0, &error);
    /* If an error happened, we print the error message, but continue.
     * Studying the regular expression leads to faster matching, but it’s not
     * absolutely necessary. */
    if (error) {
        ELOG("PCRE regular expression studying failed: %s\n", error);
    }
    return re;
 }
@ -43,8 +58,8 @@ struct regex *regex_new(const char *pattern) {
 bool regex_matches(struct regex *regex, const char *input) {
    int rc;
-    /* TODO: is strlen(input) correct for UTF-8 matching? */
+    /* We use strlen() because pcre_exec() expects the length of the input
-    /* TODO: enable UTF-8 */
+     * string in bytes */
    if ((rc = pcre_exec(regex->regex, regex->extra, input, strlen(input), 0, 0, NULL, 0)) == 0) {
        LOG("Regular expression \"%s\" matches \"%s\"\n",
            regex->pattern, input);
@ -57,7 +72,7 @@ bool regex_matches(struct regex *regex, const char *input) {
        return false;
    }
-    /* TODO: handle the other error codes */
+    ELOG("PCRE error %d while trying to use regular expression \"%s\" on input \"%s\", see pcreapi(3)\n",
-    LOG("PCRE error\n");
+         rc, regex->pattern, input);
    return false;
 }
--- a/testcases/t/19-match.t
+++ b/testcases/t/19-match.t
@ -139,7 +139,7 @@ sleep 0.25;
 # two windows should be here
 $content = get_ws_content($tmp);
-ok(@{$content} == 1, 'two windows opened');
+ok(@{$content} == 1, 'window opened');
 cmd '[class="^special[0-9]$"] kill';
@ -148,5 +148,33 @@ sleep 0.25;
 $content = get_ws_content($tmp);
 is(@{$content}, 0, 'window killed');
 ######################################################################
 # check that UTF-8 works when matching
 ######################################################################
 $tmp = fresh_workspace;
 $left = $x->root->create_child(
    class => WINDOW_CLASS_INPUT_OUTPUT,
    rect => [ 0, 0, 30, 30 ],
    background_color => '#0000ff',
 );
 $left->_create;
 set_wm_class($left->id, 'special7', 'special7');
 $left->name('ä 3');
 $left->map;
 sleep 0.25;
 # two windows should be here
 $content = get_ws_content($tmp);
 ok(@{$content} == 1, 'window opened');
 cmd '[title="^\w [3]$"] kill';
 sleep 0.25;
 $content = get_ws_content($tmp);
 is(@{$content}, 0, 'window killed');
 done_testing;