i3/src/config_parser.c

#undef I3__FILE__
#define I3__FILE__ "config_parser.c"
/*
 * vim:ts=4:sw=4:expandtab
 *
 * i3 - an improved dynamic tiling window manager
 * © 2009-2012 Michael Stapelberg and contributors (see also: LICENSE)
 *
 * config_parser.c: hand-written parser to parse configuration directives.
 *
 * See also src/commands_parser.c for rationale on why we use a custom parser.
 *
 * This parser works VERY MUCH like src/commands_parser.c, so read that first.
 * The differences are:
 *
 * 1. config_parser supports the 'number' token type (in addition to 'word' and
 *    'string'). Numbers are referred to using &num (like $str).
 *
 * 2. Criteria are not executed immediately, they are just stored.
 *
 * 3. config_parser recognizes \n and \r as 'end' token, while commands_parser
 *    ignores them.
 *
 * 4. config_parser skips the current line on invalid inputs and follows the
 *    nearest <error> token.
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdint.h>

#include "all.h"

// Macros to make the YAJL API a bit easier to use.
#define y(x, ...) yajl_gen_ ## x (command_output.json_gen, ##__VA_ARGS__)
#define ystr(str) yajl_gen_string(command_output.json_gen, (unsigned char*)str, strlen(str))

/*******************************************************************************
 * The data structures used for parsing. Essentially the current state and a
 * list of tokens for that state.
 *
 * The GENERATED_* files are generated by generate-commands-parser.pl with the
 * input parser-specs/configs.spec.
 ******************************************************************************/

#include "GENERATED_config_enums.h"

typedef struct token {
    char *name;
    char *identifier;
    /* This might be __CALL */
    cmdp_state next_state;
    union {
        uint16_t call_identifier;
    } extra;
} cmdp_token;

typedef struct tokenptr {
    cmdp_token *array;
    int n;
} cmdp_token_ptr;

#include "GENERATED_config_tokens.h"

/*******************************************************************************
 * The (small) stack where identified literals are stored during the parsing
 * of a single command (like $workspace).
 ******************************************************************************/

struct stack_entry {
    /* Just a pointer, not dynamically allocated. */
    const char *identifier;
    enum {
        STACK_STR = 0,
        STACK_LONG = 1,
    } type;
    union {
        char *str;
        long num;
    } val;
};

/* 10 entries should be enough for everybody. */
static struct stack_entry stack[10];

/*
 * Pushes a string (identified by 'identifier') on the stack. We simply use a
 * single array, since the number of entries we have to store is very small.
 *
 */
static void push_string(const char *identifier, const char *str) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier != NULL &&
            strcmp(stack[c].identifier, identifier) != 0)
            continue;
        if (stack[c].identifier == NULL) {
            /* Found a free slot, let’s store it here. */
            stack[c].identifier = identifier;
            stack[c].val.str = sstrdup(str);
            stack[c].type = STACK_STR;
        } else {
            /* Append the value. */
            char *prev = stack[c].val.str;
            sasprintf(&(stack[c].val.str), "%s,%s", prev, str);
            free(prev);
        }
        return;
    }

    /* When we arrive here, the stack is full. This should not happen and
     * means there’s either a bug in this parser or the specification
     * contains a command with more than 10 identified tokens. */
    fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
                    "in the code, or a new command which contains more than "
                    "10 identified tokens.\n");
    exit(1);
}

static void push_long(const char *identifier, long num) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier != NULL)
            continue;
        /* Found a free slot, let’s store it here. */
        stack[c].identifier = identifier;
        stack[c].val.num = num;
        stack[c].type = STACK_LONG;
        return;
    }

    /* When we arrive here, the stack is full. This should not happen and
     * means there’s either a bug in this parser or the specification
     * contains a command with more than 10 identified tokens. */
    fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
                    "in the code, or a new command which contains more than "
                    "10 identified tokens.\n");
    exit(1);

}

static const char *get_string(const char *identifier) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier == NULL)
            break;
        if (strcmp(identifier, stack[c].identifier) == 0)
            return stack[c].val.str;
    }
    return NULL;
}

static const long get_long(const char *identifier) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier == NULL)
            break;
        if (strcmp(identifier, stack[c].identifier) == 0)
            return stack[c].val.num;
    }
    return 0;
}

static void clear_stack(void) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].type == STACK_STR && stack[c].val.str != NULL)
            free(stack[c].val.str);
        stack[c].identifier = NULL;
        stack[c].val.str = NULL;
        stack[c].val.num = 0;
    }
}

// TODO: remove this if it turns out we don’t need it for testing.
#if 0
/*******************************************************************************
 * A dynamically growing linked list which holds the criteria for the current
 * command.
 ******************************************************************************/

typedef struct criterion {
    char *type;
    char *value;

    TAILQ_ENTRY(criterion) criteria;
} criterion;

static TAILQ_HEAD(criteria_head, criterion) criteria =
  TAILQ_HEAD_INITIALIZER(criteria);

/*
 * Stores the given type/value in the list of criteria.
 * Accepts a pointer as first argument, since it is 'call'ed by the parser.
 *
 */
static void push_criterion(void *unused_criteria, const char *type,
                           const char *value) {
    struct criterion *criterion = malloc(sizeof(struct criterion));
    criterion->type = strdup(type);
    criterion->value = strdup(value);
    TAILQ_INSERT_TAIL(&criteria, criterion, criteria);
}

/*
 * Clears the criteria linked list.
 * Accepts a pointer as first argument, since it is 'call'ed by the parser.
 *
 */
static void clear_criteria(void *unused_criteria) {
    struct criterion *criterion;
    while (!TAILQ_EMPTY(&criteria)) {
        criterion = TAILQ_FIRST(&criteria);
        free(criterion->type);
        free(criterion->value);
        TAILQ_REMOVE(&criteria, criterion, criteria);
        free(criterion);
    }
}
#endif

/*******************************************************************************
 * The parser itself.
 ******************************************************************************/

static cmdp_state state;
static Match current_match;
static struct ConfigResult subcommand_output;
static struct ConfigResult command_output;

/* A list which contains the states that lead to the current state, e.g.
 * INITIAL, WORKSPACE_LAYOUT.
 * When jumping back to INITIAL, statelist_idx will simply be set to 1
 * (likewise for other states, e.g. MODE or BAR).
 * This list is used to process the nearest error token. */
static cmdp_state statelist[10] = { INITIAL };
/* NB: statelist_idx points to where the next entry will be inserted */
static int statelist_idx = 1;

#include "GENERATED_config_call.h"


static void next_state(const cmdp_token *token) {
    cmdp_state _next_state = token->next_state;

	//printf("token = name %s identifier %s\n", token->name, token->identifier);
	//printf("next_state = %d\n", token->next_state);
    if (token->next_state == __CALL) {
        subcommand_output.json_gen = command_output.json_gen;
        GENERATED_call(token->extra.call_identifier, &subcommand_output);
        _next_state = subcommand_output.next_state;
        clear_stack();
    }

    state = _next_state;
    if (state == INITIAL) {
        clear_stack();
    }

    /* See if we are jumping back to a state in which we were in previously
     * (statelist contains INITIAL) and just move statelist_idx accordingly. */
    for (int i = 0; i < statelist_idx; i++) {
        if (statelist[i] != _next_state)
            continue;
        statelist_idx = i+1;
        return;
    }

    /* Otherwise, the state is new and we add it to the list */
    statelist[statelist_idx++] = _next_state;
}

/*
 * Returns a pointer to the start of the line (one byte after the previous \r,
 * \n) or the start of the input, if this is the first line.
 *
 */
static const char *start_of_line(const char *walk, const char *beginning) {
    while (*walk != '\n' && *walk != '\r' && walk >= beginning) {
        walk--;
    }

    return walk + 1;
}

/*
 * Copies the line and terminates it at the next \n, if any.
 *
 * The caller has to free() the result.
 *
 */
static char *single_line(const char *start) {
    char *result = sstrdup(start);
    char *end = strchr(result, '\n');
    if (end != NULL)
        *end = '\0';
    return result;
}

struct ConfigResult *parse_config(const char *input, struct context *context) {
    /* Dump the entire config file into the debug log. We cannot just use
     * DLOG("%s", input); because one log message must not exceed 4 KiB. */
    const char *dumpwalk = input;
    int linecnt = 1;
    while (*dumpwalk != '\0') {
        char *next_nl = strchr(dumpwalk, '\n');
        if (next_nl != NULL) {
            DLOG("CONFIG(line %3d): %.*s\n", linecnt, (int)(next_nl - dumpwalk), dumpwalk);
            dumpwalk = next_nl + 1;
        } else {
            DLOG("CONFIG(line %3d): %s\n", linecnt, dumpwalk);
            break;
        }
        linecnt++;
    }
    state = INITIAL;
    statelist_idx = 1;

/* A YAJL JSON generator used for formatting replies. */
#if YAJL_MAJOR >= 2
    command_output.json_gen = yajl_gen_alloc(NULL);
#else
    command_output.json_gen = yajl_gen_alloc(NULL, NULL);
#endif

    y(array_open);

    const char *walk = input;
    const size_t len = strlen(input);
    int c;
    const cmdp_token *token;
    bool token_handled;
    linecnt = 1;

    // TODO: make this testable
#ifndef TEST_PARSER
    cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
#endif

    /* The "<=" operator is intentional: We also handle the terminating 0-byte
     * explicitly by looking for an 'end' token. */
    while ((walk - input) <= len) {
        /* Skip whitespace before every token, newlines are relevant since they
         * separate configuration directives. */
        while ((*walk == ' ' || *walk == '\t') && *walk != '\0')
            walk++;

		//printf("remaining input: %s\n", walk);

        cmdp_token_ptr *ptr = &(tokens[state]);
        token_handled = false;
        for (c = 0; c < ptr->n; c++) {
            token = &(ptr->array[c]);

            /* A literal. */
            if (token->name[0] == '\'') {
                if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) {
                    if (token->identifier != NULL)
                        push_string(token->identifier, token->name + 1);
                    walk += strlen(token->name) - 1;
                    next_state(token);
                    token_handled = true;
                    break;
                }
                continue;
            }

            if (strcmp(token->name, "number") == 0) {
                /* Handle numbers. We only accept decimal numbers for now. */
                char *end = NULL;
                errno = 0;
                long int num = strtol(walk, &end, 10);
                if ((errno == ERANGE && (num == LONG_MIN || num == LONG_MAX)) ||
                    (errno != 0 && num == 0))
                    continue;

                /* No valid numbers found */
                if (end == walk)
                    continue;

                if (token->identifier != NULL)
                    push_long(token->identifier, num);

                /* Set walk to the first non-number character */
                walk = end;
                next_state(token);
                token_handled = true;
                break;
            }

            if (strcmp(token->name, "string") == 0 ||
                strcmp(token->name, "word") == 0) {
                const char *beginning = walk;
                /* Handle quoted strings (or words). */
                if (*walk == '"') {
                    beginning++;
                    walk++;
                    while (*walk != '\0' && (*walk != '"' || *(walk-1) == '\\'))
                        walk++;
                } else {
                    if (token->name[0] == 's') {
                        while (*walk != '\0' && *walk != '\r' && *walk != '\n')
                            walk++;
                    } else {
                        /* For a word, the delimiters are white space (' ' or
                         * '\t'), closing square bracket (]), comma (,) and
                         * semicolon (;). */
                        while (*walk != ' ' && *walk != '\t' &&
                               *walk != ']' && *walk != ',' &&
                               *walk !=  ';' && *walk != '\r' &&
                               *walk != '\n' && *walk != '\0')
                            walk++;
                    }
                }
                if (walk != beginning) {
                    char *str = scalloc(walk-beginning + 1);
                    /* We copy manually to handle escaping of characters. */
                    int inpos, outpos;
                    for (inpos = 0, outpos = 0;
                         inpos < (walk-beginning);
                         inpos++, outpos++) {
                        /* We only handle escaped double quotes to not break
                         * backwards compatibility with people using \w in
                         * regular expressions etc. */
                        if (beginning[inpos] == '\\' && beginning[inpos+1] == '"')
                            inpos++;
                        str[outpos] = beginning[inpos];
                    }
                    if (token->identifier)
                        push_string(token->identifier, str);
                    free(str);
                    /* If we are at the end of a quoted string, skip the ending
                     * double quote. */
                    if (*walk == '"')
                        walk++;
                    next_state(token);
                    token_handled = true;
                    break;
                }
            }

            if (strcmp(token->name, "end") == 0) {
                //printf("checking for end: *%s*\n", walk);
                if (*walk == '\0' || *walk == '\n' || *walk == '\r') {
                    next_state(token);
                    token_handled = true;
                    /* To make sure we start with an appropriate matching
                     * datastructure for commands which do *not* specify any
                     * criteria, we re-initialize the criteria system after
                     * every command. */
                    // TODO: make this testable
#ifndef TEST_PARSER
                    cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
#endif
                    linecnt++;
                    walk++;
                    break;
               }
           }
        }

        if (!token_handled) {
            /* Figure out how much memory we will need to fill in the names of
             * all tokens afterwards. */
            int tokenlen = 0;
            for (c = 0; c < ptr->n; c++)
                tokenlen += strlen(ptr->array[c].name) + strlen("'', ");

            /* Build up a decent error message. We include the problem, the
             * full input, and underline the position where the parser
             * currently is. */
            char *errormessage;
            char *possible_tokens = smalloc(tokenlen + 1);
            char *tokenwalk = possible_tokens;
            for (c = 0; c < ptr->n; c++) {
                token = &(ptr->array[c]);
                if (token->name[0] == '\'') {
                    /* A literal is copied to the error message enclosed with
                     * single quotes. */
                    *tokenwalk++ = '\'';
                    strcpy(tokenwalk, token->name + 1);
                    tokenwalk += strlen(token->name + 1);
                    *tokenwalk++ = '\'';
                } else {
                    /* Skip error tokens in error messages, they are used
                     * internally only and might confuse users. */
                    if (strcmp(token->name, "error") == 0)
                        continue;
                    /* Any other token is copied to the error message enclosed
                     * with angle brackets. */
                    *tokenwalk++ = '<';
                    strcpy(tokenwalk, token->name);
                    tokenwalk += strlen(token->name);
                    *tokenwalk++ = '>';
                }
                if (c < (ptr->n - 1)) {
                    *tokenwalk++ = ',';
                    *tokenwalk++ = ' ';
                }
            }
            *tokenwalk = '\0';
            sasprintf(&errormessage, "Expected one of these tokens: %s",
                      possible_tokens);
            free(possible_tokens);


            /* Go back to the beginning of the line */
            const char *error_line = start_of_line(walk, input);

            /* Contains the same amount of characters as 'input' has, but with
             * the unparseable part highlighted using ^ characters. */
            char *position = scalloc(strlen(error_line) + 1);
            const char *copywalk;
            for (copywalk = error_line;
                 *copywalk != '\n' && *copywalk != '\r' && *copywalk != '\0';
                 copywalk++)
                position[(copywalk - error_line)] = (copywalk >= walk ? '^' : (*copywalk == '\t' ? '\t' : ' '));
            position[(copywalk - error_line)] = '\0';

            ELOG("CONFIG: %s\n", errormessage);
            ELOG("CONFIG: (in file %s)\n", context->filename);
            char *error_copy = single_line(error_line);

            /* Print context lines *before* the error, if any. */
            if (linecnt > 1) {
                const char *context_p1_start = start_of_line(error_line-2, input);
                char *context_p1_line = single_line(context_p1_start);
                if (linecnt > 2) {
                    const char *context_p2_start = start_of_line(context_p1_start-2, input);
                    char *context_p2_line = single_line(context_p2_start);
                    ELOG("CONFIG: Line %3d: %s\n", linecnt - 2, context_p2_line);
                    free(context_p2_line);
                }
                ELOG("CONFIG: Line %3d: %s\n", linecnt - 1, context_p1_line);
                free(context_p1_line);
            }
            ELOG("CONFIG: Line %3d: %s\n", linecnt, error_copy);
            ELOG("CONFIG:           %s\n", position);
            free(error_copy);
            /* Print context lines *after* the error, if any. */
            for (int i = 0; i < 2; i++) {
                char *error_line_end = strchr(error_line, '\n');
                if (error_line_end != NULL && *(error_line_end + 1) != '\0') {
                    error_line = error_line_end + 1;
                    error_copy = single_line(error_line);
                    ELOG("CONFIG: Line %3d: %s\n", linecnt + i + 1, error_copy);
                    free(error_copy);
                }
            }

            context->has_errors = true;

            /* Format this error message as a JSON reply. */
            y(map_open);
            ystr("success");
            y(bool, false);
            /* We set parse_error to true to distinguish this from other
             * errors. i3-nagbar is spawned upon keypresses only for parser
             * errors. */
            ystr("parse_error");
            y(bool, true);
            ystr("error");
            ystr(errormessage);
            ystr("input");
            ystr(input);
            ystr("errorposition");
            ystr(position);
            y(map_close);

            /* Skip the rest of this line, but continue parsing. */
            while ((walk - input) <= len && *walk != '\n')
                walk++;

            free(position);
            free(errormessage);
            clear_stack();

            /* To figure out in which state to go (e.g. MODE or INITIAL),
             * we find the nearest state which contains an <error> token
             * and follow that one. */
            bool error_token_found = false;
            for (int i = statelist_idx-1; (i >= 0) && !error_token_found; i--) {
                cmdp_token_ptr *errptr = &(tokens[statelist[i]]);
                for (int j = 0; j < errptr->n; j++) {
                    if (strcmp(errptr->array[j].name, "error") != 0)
                        continue;
                    next_state(&(errptr->array[j]));
                    error_token_found = true;
                    break;
                }
            }

            assert(error_token_found);
        }
    }

    y(array_close);

    return &command_output;
}

/*******************************************************************************
 * Code for building the stand-alone binary test.commands_parser which is used
 * by t/187-commands-parser.t.
 ******************************************************************************/

#ifdef TEST_PARSER

/*
 * Logs the given message to stdout while prefixing the current time to it,
 * but only if debug logging was activated.
 * This is to be called by DLOG() which includes filename/linenumber
 *
 */
void debuglog(char *fmt, ...) {
    va_list args;

    va_start(args, fmt);
    fprintf(stdout, "# ");
    vfprintf(stdout, fmt, args);
    va_end(args);
}

void errorlog(char *fmt, ...) {
    va_list args;

    va_start(args, fmt);
    vfprintf(stderr, fmt, args);
    va_end(args);
}

static int criteria_next_state;

void cfg_criteria_init(I3_CFG, int _state) {
    criteria_next_state = _state;
}

void cfg_criteria_add(I3_CFG, const char *ctype, const char *cvalue) {
}

void cfg_criteria_pop_state(I3_CFG) {
    result->next_state = criteria_next_state;
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(stderr, "Syntax: %s <command>\n", argv[0]);
        return 1;
    }
    struct context context;
    context.filename = "<stdin>";
    parse_config(argv[1], &context);
}
#endif