Merge pull request #13197 from HendrikVE/shell-tokenizer-refactor

sys/shell: refactor tokenizer code
This commit is contained in:
Francisco 2020-06-24 12:32:51 +02:00 committed by GitHub
commit af80e863b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 255 additions and 138 deletions

View File

@ -42,12 +42,6 @@
#define BS '\x08' /** ASCII "Backspace" */ #define BS '\x08' /** ASCII "Backspace" */
#define DEL '\x7f' /** ASCII "Delete" */ #define DEL '\x7f' /** ASCII "Delete" */
#ifdef MODULE_SHELL_COMMANDS
#define MORE_COMMANDS _shell_command_list
#else
#define MORE_COMMANDS
#endif /* MODULE_SHELL_COMMANDS */
#ifdef MODULE_NEWLIB #ifdef MODULE_NEWLIB
#define flush_if_needed() fflush(stdout) #define flush_if_needed() fflush(stdout)
#else #else
@ -66,160 +60,241 @@
#define PROMPT_ON 0 #define PROMPT_ON 0
#endif /* SHELL_NO_PROMPT */ #endif /* SHELL_NO_PROMPT */
static shell_command_handler_t find_handler(const shell_command_t *command_list, char *command) #ifdef MODULE_SHELL_COMMANDS
#define _builtin_cmds _shell_command_list
#else
#define _builtin_cmds NULL
#endif
#define SQUOTE '\''
#define DQUOTE '"'
#define ESCAPECHAR '\\'
#define SPACE ' '
#define TAB '\t'
#define PARSE_ESCAPE_MASK 0x4;
enum parse_state {
PARSE_BLANK = 0x0,
PARSE_UNQUOTED = 0x1,
PARSE_SINGLEQUOTE = 0x2,
PARSE_DOUBLEQUOTE = 0x3,
PARSE_UNQUOTED_ESC = 0x5,
PARSE_SINGLEQUOTE_ESC = 0x6,
PARSE_DOUBLEQUOTE_ESC = 0x7,
};
static enum parse_state escape_toggle(enum parse_state s)
{ {
const shell_command_t *command_lists[] = { return s ^ PARSE_ESCAPE_MASK;
command_list, }
MORE_COMMANDS
};
/* iterating over command_lists */ static shell_command_handler_t search_commands(const shell_command_t *entry,
for (unsigned int i = 0; i < ARRAY_SIZE(command_lists); i++) { char *command)
{
const shell_command_t *entry; for (; entry->name != NULL; entry++) {
if (strcmp(entry->name, command) == 0) {
if ((entry = command_lists[i])) { return entry->handler;
/* iterating over commands in command_lists entry */
while (entry->name != NULL) {
if (strcmp(entry->name, command) == 0) {
return entry->handler;
}
else {
entry++;
}
}
} }
} }
return NULL; return NULL;
} }
static shell_command_handler_t find_handler(
const shell_command_t *command_list, char *command)
{
shell_command_handler_t handler = NULL;
if (command_list != NULL) {
handler = search_commands(command_list, command);
}
if (handler == NULL && _builtin_cmds != NULL) {
handler = search_commands(_builtin_cmds, command);
}
return handler;
}
static void print_commands(const shell_command_t *entry)
{
for (; entry->name != NULL; entry++) {
printf("%-20s %s\n", entry->name, entry->desc);
}
}
static void print_help(const shell_command_t *command_list) static void print_help(const shell_command_t *command_list)
{ {
printf("%-20s %s\n", "Command", "Description"); puts("Command Description"
puts("---------------------------------------"); "\n---------------------------------------");
if (command_list != NULL) {
print_commands(command_list);
}
const shell_command_t *command_lists[] = { if (_builtin_cmds != NULL) {
command_list, print_commands(_builtin_cmds);
MORE_COMMANDS
};
/* iterating over command_lists */
for (unsigned int i = 0; i < ARRAY_SIZE(command_lists); i++) {
const shell_command_t *entry;
if ((entry = command_lists[i])) {
/* iterating over commands in command_lists entry */
while (entry->name != NULL) {
printf("%-20s %s\n", entry->name, entry->desc);
entry++;
}
}
} }
} }
/**
* Break input line into words, create argv and call the command handler.
*
* Words are broken up at spaces. A backslash escapes the character that comes
* after (meaning if it is taken literally and if it is a space it does not break
* the word). Spaces can also be protected by quoting with double or single
* quotes.
*
* There are two unquoted states (PARSE_BLANK and PARSE_UNQUOTED) and two quoted
* states (PARSE_SINGLEQUOTE and PARSE_DOUBLEQUOTE). In addition, every state
* (except PARSE_BLANK) has an escaped pair state (e.g PARSE_SINGLEQUOTE and
* PARSE_SINGLEQUOTE_ESC).
*
* For the following let's define some things
* - Function transit(character, state) to change to 'state' after
* 'character' was read. The order of a list of transit-functions matters.
* - A BLANK is either SPACE or TAB
* - '*' means any character
*
* PARSE_BLANK
* transit(SQUOTE, PARSE_SINGLEQUOTE)
* transit(DQUOTE, PARSE_DOUBLEQUOTE)
* transit(ESCAPECHAR, PARSE_UNQUOTED_ESC)
* transit(BLANK, PARSE_BLANK)
* transit(*, PARSE_UNQUOTED) -> store character
*
* PARSE_UNQUOTED
* transit(SQUOTE, PARSE_SINGLEQUOTE)
* transit(DQUOTE, PARSE_DOUBLEQUOTE)
* transit(BLANK, PARSE_BLANK)
* transit(ESCAPECHAR, PARSE_UNQUOTED_ESC)
* transit(*, PARSE_UNQUOTED) -> store character
*
* PARSE_UNQUOTED_ESC
* transit(*, PARSE_UNQUOTED) -> store character
*
* PARSE_SINGLEQUOTE
* transit(SQUOTE, PARSE_UNQUOTED)
* transit(ESCAPECHAR, PARSE_SINGLEQUOTE_ESC)
* transit(*, PARSE_SINGLEQUOTE) -> store character
*
* PARSE_SINGLEQUOTE_ESC
* transit(*, PARSE_SINGLEQUOTE) -> store character
*
* PARSE_DOUBLEQUOTE
* transit(DQUOTE, PARSE_UNQUOTED)
* transit(ESCAPECHAR, PARSE_DOUBLEQUOTE_ESC)
* transit(*, PARSE_DOUBLEQUOTE) -> store character
*
* PARSE_DOUBLEQUOTE_ESC
* transit(*, PARSE_DOUBLEQUOTE) -> store character
*
*
*/
static void handle_input_line(const shell_command_t *command_list, char *line) static void handle_input_line(const shell_command_t *command_list, char *line)
{ {
static const char *INCORRECT_QUOTING = "shell: incorrect quoting";
/* first we need to calculate the number of arguments */ /* first we need to calculate the number of arguments */
unsigned argc = 0; int argc = 0;
char *pos = line; char *readpos = line;
int contains_esc_seq = 0; char *writepos = readpos;
while (1) {
if ((unsigned char) *pos > ' ') { uint8_t pstate = PARSE_BLANK;
/* found an argument */
if (*pos == '"' || *pos == '\'') { for (; *readpos != '\0'; readpos++) {
/* it's a quoted argument */
const char quote_char = *pos; char wordbreak = SPACE;
do { bool is_wordbreak = false;
++pos;
if (!*pos) { switch (pstate) {
puts(INCORRECT_QUOTING);
return; case PARSE_BLANK:
} if (*readpos != SPACE && *readpos != TAB) {
else if (*pos == '\\') { argc++;
/* skip over the next character */ }
++contains_esc_seq;
++pos; if (*readpos == SQUOTE) {
if (!*pos) { pstate = PARSE_SINGLEQUOTE;
puts(INCORRECT_QUOTING); }
return; else if (*readpos == DQUOTE) {
} pstate = PARSE_DOUBLEQUOTE;
continue; }
} else if (*readpos == ESCAPECHAR) {
} while (*pos != quote_char); pstate = PARSE_UNQUOTED_ESC;
if ((unsigned char) pos[1] > ' ') { }
puts(INCORRECT_QUOTING); else if (*readpos != SPACE && *readpos != TAB) {
return; pstate = PARSE_UNQUOTED;
*writepos++ = *readpos;
}
break;
case PARSE_UNQUOTED:
if (*readpos == SQUOTE) {
pstate = PARSE_SINGLEQUOTE;
}
else if (*readpos == DQUOTE) {
pstate = PARSE_DOUBLEQUOTE;
}
else if (*readpos == ESCAPECHAR) {
pstate = escape_toggle(pstate);
}
else if (*readpos == SPACE || *readpos == TAB) {
pstate = PARSE_BLANK;
*writepos++ = '\0';
}
else {
*writepos++ = *readpos;
}
break;
case PARSE_SINGLEQUOTE:
wordbreak = SQUOTE;
is_wordbreak = true;
break;
case PARSE_DOUBLEQUOTE:
wordbreak = DQUOTE;
is_wordbreak = true;
break;
default: /* QUOTED state */
pstate = escape_toggle(pstate);
*writepos++ = *readpos;
break;
}
if (is_wordbreak) {
if (*readpos == wordbreak) {
if (wordbreak == SQUOTE || wordbreak == DQUOTE) {
pstate = PARSE_UNQUOTED;
} }
} }
else { else if (*readpos == ESCAPECHAR) {
/* it's an unquoted argument */ pstate = escape_toggle(pstate);
do { }
if (*pos == '\\') { else {
/* skip over the next character */ *writepos++ = *readpos;
++contains_esc_seq;
++pos;
if (!*pos) {
puts(INCORRECT_QUOTING);
return;
}
}
++pos;
if (*pos == '"') {
puts(INCORRECT_QUOTING);
return;
}
} while ((unsigned char) *pos > ' ');
} }
/* count the number of arguments we got */
++argc;
}
/* zero out current position (space or quotation mark) and advance */
if (*pos > 0) {
*pos = 0;
++pos;
}
else {
break;
} }
} }
if (!argc) { *writepos = '\0';
if (pstate != PARSE_BLANK && pstate != PARSE_UNQUOTED) {
puts("shell: incorrect quoting");
return;
}
if (argc == 0) {
return; return;
} }
/* then we fill the argv array */ /* then we fill the argv array */
char *argv[argc + 1]; int collected;
argv[argc] = NULL; char *argv[argc];
pos = line;
for (unsigned i = 0; i < argc; ++i) { readpos = line;
while (!*pos) { for (collected = 0; collected < argc; collected++) {
++pos; argv[collected] = readpos;
} readpos += strlen(readpos) + 1;
if (*pos == '"' || *pos == '\'') {
++pos;
}
argv[i] = pos;
while (*pos) {
++pos;
}
}
for (char **arg = argv; contains_esc_seq && *arg; ++arg) {
for (char *c = *arg; *c; ++c) {
if (*c != '\\') {
continue;
}
for (char *d = c; *d; ++d) {
*d = d[1];
}
if (--contains_esc_seq == 0) {
break;
}
}
} }
/* then we call the appropriate handler */ /* then we call the appropriate handler */

View File

@ -43,19 +43,61 @@ CONTROL_D = DLE+'\x04'
PROMPT = '> ' PROMPT = '> '
CMDS = ( CMDS = (
# test start
('start_test', '[TEST_START]'), ('start_test', '[TEST_START]'),
(CONTROL_C, PROMPT), (CONTROL_C, PROMPT),
('\n', PROMPT), ('\n', PROMPT),
# test simple word separation
('echo a string', '"echo""a""string"'),
('echo multiple spaces between argv', '"echo""multiple""spaces""between""argv"'),
('echo \t tabs\t\t processed \t\tlike\t \t\tspaces', '"echo""tabs""processed""like""spaces"'),
# test long line
('123456789012345678901234567890123456789012345678901234567890', ('123456789012345678901234567890123456789012345678901234567890',
'shell: command not found: ' 'shell: command not found: '
'123456789012345678901234567890123456789012345678901234567890'), '123456789012345678901234567890123456789012345678901234567890'),
('unknown_command', 'shell: command not found: unknown_command'), ('unknown_command', 'shell: command not found: unknown_command'),
# test leading/trailing BLANK
(' echo leading spaces', '"echo""leading""spaces"'),
('\t\t\t\t\techo leading tabs', '"echo""leading""tabs"'),
('echo trailing spaces ', '"echo""trailing""spaces"'),
('echo trailing tabs\t\t\t\t\t', '"echo""trailing""tabs"'),
# test backspace
('hello-willy\b\b\b\borld', 'shell: command not found: hello-world'), ('hello-willy\b\b\b\borld', 'shell: command not found: hello-world'),
('\b\b\b\becho', '\"echo\"'), ('\b\b\b\becho', '"echo"'),
('help', EXPECTED_HELP),
('echo a string', '\"echo\"\"a\"\"string\"'), # test escaping
('echo \\\'', '"echo""\'"'),
('echo \\"', '"echo""""'),
('echo escaped\\ space', '"echo""escaped space"'),
('echo escape within \'\\s\\i\\n\\g\\l\\e\\q\\u\\o\\t\\e\'', '"echo""escape""within""singlequote"'),
('echo escape within "\\d\\o\\u\\b\\l\\e\\q\\u\\o\\t\\e"', '"echo""escape""within""doublequote"'),
("""echo "t\e st" "\\"" '\\'' a\ b""", '"echo""te st"""""\'""a b"'),
# test correct quoting
('echo "hello"world', '"echo""helloworld"'),
('echo hel"lowo"rld', '"echo""helloworld"'),
('echo hello"world"', '"echo""helloworld"'),
('echo quoted space " "', '"echo""quoted""space"" "'),
('echo abc"def\'ghijk"lmn', '"echo""abcdef\'ghijklmn"'),
('echo abc\'def"ghijk\'lmn', '"echo""abcdef"ghijklmn"'),
('echo "\'" \'"\'', '"echo""\'""""'),
# test incorrect quoting
('echo a\\', 'shell: incorrect quoting'),
('echo "', 'shell: incorrect quoting'),
('echo \'', 'shell: incorrect quoting'),
('echo abcdef"ghijklmn', 'shell: incorrect quoting'),
('echo abcdef\'ghijklmn', 'shell: incorrect quoting'),
# test default commands
('ps', EXPECTED_PS), ('ps', EXPECTED_PS),
('help', EXPECTED_HELP), ('help', EXPECTED_HELP),
# test end
('reboot', 'test_shell.'), ('reboot', 'test_shell.'),
('end_test', '[TEST_END]'), ('end_test', '[TEST_END]'),
) )