i3/src/config_parser.c
Emil Mikulic d2b533328d Fix memory leaks in config_parser.
push_token() doesn't take ownership of its str argument.
2012-11-24 17:39:52 +01:00

653 lines
22 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#undef I3__FILE__
#define I3__FILE__ "config_parser.c"
/*
* vim:ts=4:sw=4:expandtab
*
* i3 - an improved dynamic tiling window manager
* © 2009-2012 Michael Stapelberg and contributors (see also: LICENSE)
*
* config_parser.c: hand-written parser to parse configuration directives.
*
* See also src/commands_parser.c for rationale on why we use a custom parser.
*
* This parser works VERY MUCH like src/commands_parser.c, so read that first.
* The differences are:
*
* 1. config_parser supports the 'number' token type (in addition to 'word' and
* 'string'). Numbers are referred to using &num (like $str).
*
* 2. Criteria are not executed immediately, they are just stored.
*
* 3. config_parser recognizes \n and \r as 'end' token, while commands_parser
* ignores them.
*
* 4. config_parser skips the current line on invalid inputs and follows the
* nearest <error> token.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdint.h>
#include "all.h"
// Macros to make the YAJL API a bit easier to use.
#define y(x, ...) yajl_gen_ ## x (command_output.json_gen, ##__VA_ARGS__)
#define ystr(str) yajl_gen_string(command_output.json_gen, (unsigned char*)str, strlen(str))
/*******************************************************************************
* The data structures used for parsing. Essentially the current state and a
* list of tokens for that state.
*
* The GENERATED_* files are generated by generate-commands-parser.pl with the
* input parser-specs/configs.spec.
******************************************************************************/
#include "GENERATED_config_enums.h"
typedef struct token {
char *name;
char *identifier;
/* This might be __CALL */
cmdp_state next_state;
union {
uint16_t call_identifier;
} extra;
} cmdp_token;
typedef struct tokenptr {
cmdp_token *array;
int n;
} cmdp_token_ptr;
#include "GENERATED_config_tokens.h"
/*******************************************************************************
* The (small) stack where identified literals are stored during the parsing
* of a single command (like $workspace).
******************************************************************************/
struct stack_entry {
/* Just a pointer, not dynamically allocated. */
const char *identifier;
enum {
STACK_STR = 0,
STACK_LONG = 1,
} type;
union {
char *str;
long num;
} val;
};
/* 10 entries should be enough for everybody. */
static struct stack_entry stack[10];
/*
* Pushes a string (identified by 'identifier') on the stack. We simply use a
* single array, since the number of entries we have to store is very small.
*
*/
static void push_string(const char *identifier, const char *str) {
for (int c = 0; c < 10; c++) {
if (stack[c].identifier != NULL &&
strcmp(stack[c].identifier, identifier) != 0)
continue;
if (stack[c].identifier == NULL) {
/* Found a free slot, lets store it here. */
stack[c].identifier = identifier;
stack[c].val.str = sstrdup(str);
stack[c].type = STACK_STR;
} else {
/* Append the value. */
char *prev = stack[c].val.str;
sasprintf(&(stack[c].val.str), "%s,%s", prev, str);
free(prev);
}
return;
}
/* When we arrive here, the stack is full. This should not happen and
* means theres either a bug in this parser or the specification
* contains a command with more than 10 identified tokens. */
fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
"in the code, or a new command which contains more than "
"10 identified tokens.\n");
exit(1);
}
static void push_long(const char *identifier, long num) {
for (int c = 0; c < 10; c++) {
if (stack[c].identifier != NULL)
continue;
/* Found a free slot, lets store it here. */
stack[c].identifier = identifier;
stack[c].val.num = num;
stack[c].type = STACK_LONG;
return;
}
/* When we arrive here, the stack is full. This should not happen and
* means theres either a bug in this parser or the specification
* contains a command with more than 10 identified tokens. */
fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
"in the code, or a new command which contains more than "
"10 identified tokens.\n");
exit(1);
}
static const char *get_string(const char *identifier) {
for (int c = 0; c < 10; c++) {
if (stack[c].identifier == NULL)
break;
if (strcmp(identifier, stack[c].identifier) == 0)
return stack[c].val.str;
}
return NULL;
}
static const long get_long(const char *identifier) {
for (int c = 0; c < 10; c++) {
if (stack[c].identifier == NULL)
break;
if (strcmp(identifier, stack[c].identifier) == 0)
return stack[c].val.num;
}
return 0;
}
static void clear_stack(void) {
for (int c = 0; c < 10; c++) {
if (stack[c].type == STACK_STR && stack[c].val.str != NULL)
free(stack[c].val.str);
stack[c].identifier = NULL;
stack[c].val.str = NULL;
stack[c].val.num = 0;
}
}
// TODO: remove this if it turns out we dont need it for testing.
#if 0
/*******************************************************************************
* A dynamically growing linked list which holds the criteria for the current
* command.
******************************************************************************/
typedef struct criterion {
char *type;
char *value;
TAILQ_ENTRY(criterion) criteria;
} criterion;
static TAILQ_HEAD(criteria_head, criterion) criteria =
TAILQ_HEAD_INITIALIZER(criteria);
/*
* Stores the given type/value in the list of criteria.
* Accepts a pointer as first argument, since it is 'call'ed by the parser.
*
*/
static void push_criterion(void *unused_criteria, const char *type,
const char *value) {
struct criterion *criterion = malloc(sizeof(struct criterion));
criterion->type = strdup(type);
criterion->value = strdup(value);
TAILQ_INSERT_TAIL(&criteria, criterion, criteria);
}
/*
* Clears the criteria linked list.
* Accepts a pointer as first argument, since it is 'call'ed by the parser.
*
*/
static void clear_criteria(void *unused_criteria) {
struct criterion *criterion;
while (!TAILQ_EMPTY(&criteria)) {
criterion = TAILQ_FIRST(&criteria);
free(criterion->type);
free(criterion->value);
TAILQ_REMOVE(&criteria, criterion, criteria);
free(criterion);
}
}
#endif
/*******************************************************************************
* The parser itself.
******************************************************************************/
static cmdp_state state;
static Match current_match;
static struct ConfigResult subcommand_output;
static struct ConfigResult command_output;
/* A list which contains the states that lead to the current state, e.g.
* INITIAL, WORKSPACE_LAYOUT.
* When jumping back to INITIAL, statelist_idx will simply be set to 1
* (likewise for other states, e.g. MODE or BAR).
* This list is used to process the nearest error token. */
static cmdp_state statelist[10] = { INITIAL };
/* NB: statelist_idx points to where the next entry will be inserted */
static int statelist_idx = 1;
#include "GENERATED_config_call.h"
static void next_state(const cmdp_token *token) {
cmdp_state _next_state = token->next_state;
//printf("token = name %s identifier %s\n", token->name, token->identifier);
//printf("next_state = %d\n", token->next_state);
if (token->next_state == __CALL) {
subcommand_output.json_gen = command_output.json_gen;
GENERATED_call(token->extra.call_identifier, &subcommand_output);
_next_state = subcommand_output.next_state;
clear_stack();
}
state = _next_state;
if (state == INITIAL) {
clear_stack();
}
/* See if we are jumping back to a state in which we were in previously
* (statelist contains INITIAL) and just move statelist_idx accordingly. */
for (int i = 0; i < statelist_idx; i++) {
if (statelist[i] != _next_state)
continue;
statelist_idx = i+1;
return;
}
/* Otherwise, the state is new and we add it to the list */
statelist[statelist_idx++] = _next_state;
}
/*
* Returns a pointer to the start of the line (one byte after the previous \r,
* \n) or the start of the input, if this is the first line.
*
*/
static const char *start_of_line(const char *walk, const char *beginning) {
while (*walk != '\n' && *walk != '\r' && walk >= beginning) {
walk--;
}
return walk + 1;
}
/*
* Copies the line and terminates it at the next \n, if any.
*
* The caller has to free() the result.
*
*/
static char *single_line(const char *start) {
char *result = sstrdup(start);
char *end = strchr(result, '\n');
if (end != NULL)
*end = '\0';
return result;
}
struct ConfigResult *parse_config(const char *input, struct context *context) {
/* Dump the entire config file into the debug log. We cannot just use
* DLOG("%s", input); because one log message must not exceed 4 KiB. */
const char *dumpwalk = input;
int linecnt = 1;
while (*dumpwalk != '\0') {
char *next_nl = strchr(dumpwalk, '\n');
if (next_nl != NULL) {
DLOG("CONFIG(line %3d): %.*s\n", linecnt, (int)(next_nl - dumpwalk), dumpwalk);
dumpwalk = next_nl + 1;
} else {
DLOG("CONFIG(line %3d): %s\n", linecnt, dumpwalk);
break;
}
linecnt++;
}
state = INITIAL;
statelist_idx = 1;
/* A YAJL JSON generator used for formatting replies. */
#if YAJL_MAJOR >= 2
command_output.json_gen = yajl_gen_alloc(NULL);
#else
command_output.json_gen = yajl_gen_alloc(NULL, NULL);
#endif
y(array_open);
const char *walk = input;
const size_t len = strlen(input);
int c;
const cmdp_token *token;
bool token_handled;
linecnt = 1;
// TODO: make this testable
#ifndef TEST_PARSER
cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
#endif
/* The "<=" operator is intentional: We also handle the terminating 0-byte
* explicitly by looking for an 'end' token. */
while ((walk - input) <= len) {
/* Skip whitespace before every token, newlines are relevant since they
* separate configuration directives. */
while ((*walk == ' ' || *walk == '\t') && *walk != '\0')
walk++;
//printf("remaining input: %s\n", walk);
cmdp_token_ptr *ptr = &(tokens[state]);
token_handled = false;
for (c = 0; c < ptr->n; c++) {
token = &(ptr->array[c]);
/* A literal. */
if (token->name[0] == '\'') {
if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) {
if (token->identifier != NULL)
push_string(token->identifier, token->name + 1);
walk += strlen(token->name) - 1;
next_state(token);
token_handled = true;
break;
}
continue;
}
if (strcmp(token->name, "number") == 0) {
/* Handle numbers. We only accept decimal numbers for now. */
char *end = NULL;
errno = 0;
long int num = strtol(walk, &end, 10);
if ((errno == ERANGE && (num == LONG_MIN || num == LONG_MAX)) ||
(errno != 0 && num == 0))
continue;
/* No valid numbers found */
if (end == walk)
continue;
if (token->identifier != NULL)
push_long(token->identifier, num);
/* Set walk to the first non-number character */
walk = end;
next_state(token);
token_handled = true;
break;
}
if (strcmp(token->name, "string") == 0 ||
strcmp(token->name, "word") == 0) {
const char *beginning = walk;
/* Handle quoted strings (or words). */
if (*walk == '"') {
beginning++;
walk++;
while (*walk != '\0' && (*walk != '"' || *(walk-1) == '\\'))
walk++;
} else {
if (token->name[0] == 's') {
while (*walk != '\0' && *walk != '\r' && *walk != '\n')
walk++;
} else {
/* For a word, the delimiters are white space (' ' or
* '\t'), closing square bracket (]), comma (,) and
* semicolon (;). */
while (*walk != ' ' && *walk != '\t' &&
*walk != ']' && *walk != ',' &&
*walk != ';' && *walk != '\r' &&
*walk != '\n' && *walk != '\0')
walk++;
}
}
if (walk != beginning) {
char *str = scalloc(walk-beginning + 1);
/* We copy manually to handle escaping of characters. */
int inpos, outpos;
for (inpos = 0, outpos = 0;
inpos < (walk-beginning);
inpos++, outpos++) {
/* We only handle escaped double quotes to not break
* backwards compatibility with people using \w in
* regular expressions etc. */
if (beginning[inpos] == '\\' && beginning[inpos+1] == '"')
inpos++;
str[outpos] = beginning[inpos];
}
if (token->identifier)
push_string(token->identifier, str);
free(str);
/* If we are at the end of a quoted string, skip the ending
* double quote. */
if (*walk == '"')
walk++;
next_state(token);
token_handled = true;
break;
}
}
if (strcmp(token->name, "end") == 0) {
//printf("checking for end: *%s*\n", walk);
if (*walk == '\0' || *walk == '\n' || *walk == '\r') {
next_state(token);
token_handled = true;
/* To make sure we start with an appropriate matching
* datastructure for commands which do *not* specify any
* criteria, we re-initialize the criteria system after
* every command. */
// TODO: make this testable
#ifndef TEST_PARSER
cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
#endif
linecnt++;
walk++;
break;
}
}
}
if (!token_handled) {
/* Figure out how much memory we will need to fill in the names of
* all tokens afterwards. */
int tokenlen = 0;
for (c = 0; c < ptr->n; c++)
tokenlen += strlen(ptr->array[c].name) + strlen("'', ");
/* Build up a decent error message. We include the problem, the
* full input, and underline the position where the parser
* currently is. */
char *errormessage;
char *possible_tokens = smalloc(tokenlen + 1);
char *tokenwalk = possible_tokens;
for (c = 0; c < ptr->n; c++) {
token = &(ptr->array[c]);
if (token->name[0] == '\'') {
/* A literal is copied to the error message enclosed with
* single quotes. */
*tokenwalk++ = '\'';
strcpy(tokenwalk, token->name + 1);
tokenwalk += strlen(token->name + 1);
*tokenwalk++ = '\'';
} else {
/* Skip error tokens in error messages, they are used
* internally only and might confuse users. */
if (strcmp(token->name, "error") == 0)
continue;
/* Any other token is copied to the error message enclosed
* with angle brackets. */
*tokenwalk++ = '<';
strcpy(tokenwalk, token->name);
tokenwalk += strlen(token->name);
*tokenwalk++ = '>';
}
if (c < (ptr->n - 1)) {
*tokenwalk++ = ',';
*tokenwalk++ = ' ';
}
}
*tokenwalk = '\0';
sasprintf(&errormessage, "Expected one of these tokens: %s",
possible_tokens);
free(possible_tokens);
/* Go back to the beginning of the line */
const char *error_line = start_of_line(walk, input);
/* Contains the same amount of characters as 'input' has, but with
* the unparseable part highlighted using ^ characters. */
char *position = scalloc(strlen(error_line) + 1);
const char *copywalk;
for (copywalk = error_line;
*copywalk != '\n' && *copywalk != '\r' && *copywalk != '\0';
copywalk++)
position[(copywalk - error_line)] = (copywalk >= walk ? '^' : (*copywalk == '\t' ? '\t' : ' '));
position[(copywalk - error_line)] = '\0';
ELOG("CONFIG: %s\n", errormessage);
ELOG("CONFIG: (in file %s)\n", context->filename);
char *error_copy = single_line(error_line);
/* Print context lines *before* the error, if any. */
if (linecnt > 1) {
const char *context_p1_start = start_of_line(error_line-2, input);
char *context_p1_line = single_line(context_p1_start);
if (linecnt > 2) {
const char *context_p2_start = start_of_line(context_p1_start-2, input);
char *context_p2_line = single_line(context_p2_start);
ELOG("CONFIG: Line %3d: %s\n", linecnt - 2, context_p2_line);
free(context_p2_line);
}
ELOG("CONFIG: Line %3d: %s\n", linecnt - 1, context_p1_line);
free(context_p1_line);
}
ELOG("CONFIG: Line %3d: %s\n", linecnt, error_copy);
ELOG("CONFIG: %s\n", position);
free(error_copy);
/* Print context lines *after* the error, if any. */
for (int i = 0; i < 2; i++) {
char *error_line_end = strchr(error_line, '\n');
if (error_line_end != NULL && *(error_line_end + 1) != '\0') {
error_line = error_line_end + 1;
error_copy = single_line(error_line);
ELOG("CONFIG: Line %3d: %s\n", linecnt + i + 1, error_copy);
free(error_copy);
}
}
context->has_errors = true;
/* Format this error message as a JSON reply. */
y(map_open);
ystr("success");
y(bool, false);
/* We set parse_error to true to distinguish this from other
* errors. i3-nagbar is spawned upon keypresses only for parser
* errors. */
ystr("parse_error");
y(bool, true);
ystr("error");
ystr(errormessage);
ystr("input");
ystr(input);
ystr("errorposition");
ystr(position);
y(map_close);
/* Skip the rest of this line, but continue parsing. */
while ((walk - input) <= len && *walk != '\n')
walk++;
free(position);
free(errormessage);
clear_stack();
/* To figure out in which state to go (e.g. MODE or INITIAL),
* we find the nearest state which contains an <error> token
* and follow that one. */
bool error_token_found = false;
for (int i = statelist_idx-1; (i >= 0) && !error_token_found; i--) {
cmdp_token_ptr *errptr = &(tokens[statelist[i]]);
for (int j = 0; j < errptr->n; j++) {
if (strcmp(errptr->array[j].name, "error") != 0)
continue;
next_state(&(errptr->array[j]));
error_token_found = true;
break;
}
}
assert(error_token_found);
}
}
y(array_close);
return &command_output;
}
/*******************************************************************************
* Code for building the stand-alone binary test.commands_parser which is used
* by t/187-commands-parser.t.
******************************************************************************/
#ifdef TEST_PARSER
/*
* Logs the given message to stdout while prefixing the current time to it,
* but only if debug logging was activated.
* This is to be called by DLOG() which includes filename/linenumber
*
*/
void debuglog(char *fmt, ...) {
va_list args;
va_start(args, fmt);
fprintf(stdout, "# ");
vfprintf(stdout, fmt, args);
va_end(args);
}
void errorlog(char *fmt, ...) {
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
}
static int criteria_next_state;
void cfg_criteria_init(I3_CFG, int _state) {
criteria_next_state = _state;
}
void cfg_criteria_add(I3_CFG, const char *ctype, const char *cvalue) {
}
void cfg_criteria_pop_state(I3_CFG) {
result->next_state = criteria_next_state;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Syntax: %s <command>\n", argv[0]);
return 1;
}
struct context context;
context.filename = "<stdin>";
parse_config(argv[1], &context);
}
#endif