Merge pull request #13197 from HendrikVE/shell-tokenizer-refactor
sys/shell: refactor tokenizer code
This commit is contained in:
commit
af80e863b3
@ -42,12 +42,6 @@
|
|||||||
#define BS '\x08' /** ASCII "Backspace" */
|
#define BS '\x08' /** ASCII "Backspace" */
|
||||||
#define DEL '\x7f' /** ASCII "Delete" */
|
#define DEL '\x7f' /** ASCII "Delete" */
|
||||||
|
|
||||||
#ifdef MODULE_SHELL_COMMANDS
|
|
||||||
#define MORE_COMMANDS _shell_command_list
|
|
||||||
#else
|
|
||||||
#define MORE_COMMANDS
|
|
||||||
#endif /* MODULE_SHELL_COMMANDS */
|
|
||||||
|
|
||||||
#ifdef MODULE_NEWLIB
|
#ifdef MODULE_NEWLIB
|
||||||
#define flush_if_needed() fflush(stdout)
|
#define flush_if_needed() fflush(stdout)
|
||||||
#else
|
#else
|
||||||
@ -66,160 +60,241 @@
|
|||||||
#define PROMPT_ON 0
|
#define PROMPT_ON 0
|
||||||
#endif /* SHELL_NO_PROMPT */
|
#endif /* SHELL_NO_PROMPT */
|
||||||
|
|
||||||
static shell_command_handler_t find_handler(const shell_command_t *command_list, char *command)
|
#ifdef MODULE_SHELL_COMMANDS
|
||||||
{
|
#define _builtin_cmds _shell_command_list
|
||||||
const shell_command_t *command_lists[] = {
|
#else
|
||||||
command_list,
|
#define _builtin_cmds NULL
|
||||||
MORE_COMMANDS
|
#endif
|
||||||
|
|
||||||
|
#define SQUOTE '\''
|
||||||
|
#define DQUOTE '"'
|
||||||
|
#define ESCAPECHAR '\\'
|
||||||
|
#define SPACE ' '
|
||||||
|
#define TAB '\t'
|
||||||
|
|
||||||
|
#define PARSE_ESCAPE_MASK 0x4;
|
||||||
|
|
||||||
|
enum parse_state {
|
||||||
|
PARSE_BLANK = 0x0,
|
||||||
|
|
||||||
|
PARSE_UNQUOTED = 0x1,
|
||||||
|
PARSE_SINGLEQUOTE = 0x2,
|
||||||
|
PARSE_DOUBLEQUOTE = 0x3,
|
||||||
|
|
||||||
|
PARSE_UNQUOTED_ESC = 0x5,
|
||||||
|
PARSE_SINGLEQUOTE_ESC = 0x6,
|
||||||
|
PARSE_DOUBLEQUOTE_ESC = 0x7,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* iterating over command_lists */
|
static enum parse_state escape_toggle(enum parse_state s)
|
||||||
for (unsigned int i = 0; i < ARRAY_SIZE(command_lists); i++) {
|
{
|
||||||
|
return s ^ PARSE_ESCAPE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
const shell_command_t *entry;
|
static shell_command_handler_t search_commands(const shell_command_t *entry,
|
||||||
|
char *command)
|
||||||
if ((entry = command_lists[i])) {
|
{
|
||||||
/* iterating over commands in command_lists entry */
|
for (; entry->name != NULL; entry++) {
|
||||||
while (entry->name != NULL) {
|
|
||||||
if (strcmp(entry->name, command) == 0) {
|
if (strcmp(entry->name, command) == 0) {
|
||||||
return entry->handler;
|
return entry->handler;
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
entry++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
static shell_command_handler_t find_handler(
|
||||||
|
const shell_command_t *command_list, char *command)
|
||||||
|
{
|
||||||
|
shell_command_handler_t handler = NULL;
|
||||||
|
if (command_list != NULL) {
|
||||||
|
handler = search_commands(command_list, command);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (handler == NULL && _builtin_cmds != NULL) {
|
||||||
|
handler = search_commands(_builtin_cmds, command);
|
||||||
|
}
|
||||||
|
|
||||||
|
return handler;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_commands(const shell_command_t *entry)
|
||||||
|
{
|
||||||
|
for (; entry->name != NULL; entry++) {
|
||||||
|
printf("%-20s %s\n", entry->name, entry->desc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_help(const shell_command_t *command_list)
|
static void print_help(const shell_command_t *command_list)
|
||||||
{
|
{
|
||||||
printf("%-20s %s\n", "Command", "Description");
|
puts("Command Description"
|
||||||
puts("---------------------------------------");
|
"\n---------------------------------------");
|
||||||
|
if (command_list != NULL) {
|
||||||
const shell_command_t *command_lists[] = {
|
print_commands(command_list);
|
||||||
command_list,
|
|
||||||
MORE_COMMANDS
|
|
||||||
};
|
|
||||||
|
|
||||||
/* iterating over command_lists */
|
|
||||||
for (unsigned int i = 0; i < ARRAY_SIZE(command_lists); i++) {
|
|
||||||
|
|
||||||
const shell_command_t *entry;
|
|
||||||
|
|
||||||
if ((entry = command_lists[i])) {
|
|
||||||
/* iterating over commands in command_lists entry */
|
|
||||||
while (entry->name != NULL) {
|
|
||||||
printf("%-20s %s\n", entry->name, entry->desc);
|
|
||||||
entry++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (_builtin_cmds != NULL) {
|
||||||
|
print_commands(_builtin_cmds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Break input line into words, create argv and call the command handler.
|
||||||
|
*
|
||||||
|
* Words are broken up at spaces. A backslash escapes the character that comes
|
||||||
|
* after (meaning if it is taken literally and if it is a space it does not break
|
||||||
|
* the word). Spaces can also be protected by quoting with double or single
|
||||||
|
* quotes.
|
||||||
|
*
|
||||||
|
* There are two unquoted states (PARSE_BLANK and PARSE_UNQUOTED) and two quoted
|
||||||
|
* states (PARSE_SINGLEQUOTE and PARSE_DOUBLEQUOTE). In addition, every state
|
||||||
|
* (except PARSE_BLANK) has an escaped pair state (e.g PARSE_SINGLEQUOTE and
|
||||||
|
* PARSE_SINGLEQUOTE_ESC).
|
||||||
|
*
|
||||||
|
* For the following let's define some things
|
||||||
|
* - Function transit(character, state) to change to 'state' after
|
||||||
|
* 'character' was read. The order of a list of transit-functions matters.
|
||||||
|
* - A BLANK is either SPACE or TAB
|
||||||
|
* - '*' means any character
|
||||||
|
*
|
||||||
|
* PARSE_BLANK
|
||||||
|
* transit(SQUOTE, PARSE_SINGLEQUOTE)
|
||||||
|
* transit(DQUOTE, PARSE_DOUBLEQUOTE)
|
||||||
|
* transit(ESCAPECHAR, PARSE_UNQUOTED_ESC)
|
||||||
|
* transit(BLANK, PARSE_BLANK)
|
||||||
|
* transit(*, PARSE_UNQUOTED) -> store character
|
||||||
|
*
|
||||||
|
* PARSE_UNQUOTED
|
||||||
|
* transit(SQUOTE, PARSE_SINGLEQUOTE)
|
||||||
|
* transit(DQUOTE, PARSE_DOUBLEQUOTE)
|
||||||
|
* transit(BLANK, PARSE_BLANK)
|
||||||
|
* transit(ESCAPECHAR, PARSE_UNQUOTED_ESC)
|
||||||
|
* transit(*, PARSE_UNQUOTED) -> store character
|
||||||
|
*
|
||||||
|
* PARSE_UNQUOTED_ESC
|
||||||
|
* transit(*, PARSE_UNQUOTED) -> store character
|
||||||
|
*
|
||||||
|
* PARSE_SINGLEQUOTE
|
||||||
|
* transit(SQUOTE, PARSE_UNQUOTED)
|
||||||
|
* transit(ESCAPECHAR, PARSE_SINGLEQUOTE_ESC)
|
||||||
|
* transit(*, PARSE_SINGLEQUOTE) -> store character
|
||||||
|
*
|
||||||
|
* PARSE_SINGLEQUOTE_ESC
|
||||||
|
* transit(*, PARSE_SINGLEQUOTE) -> store character
|
||||||
|
*
|
||||||
|
* PARSE_DOUBLEQUOTE
|
||||||
|
* transit(DQUOTE, PARSE_UNQUOTED)
|
||||||
|
* transit(ESCAPECHAR, PARSE_DOUBLEQUOTE_ESC)
|
||||||
|
* transit(*, PARSE_DOUBLEQUOTE) -> store character
|
||||||
|
*
|
||||||
|
* PARSE_DOUBLEQUOTE_ESC
|
||||||
|
* transit(*, PARSE_DOUBLEQUOTE) -> store character
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
static void handle_input_line(const shell_command_t *command_list, char *line)
|
static void handle_input_line(const shell_command_t *command_list, char *line)
|
||||||
{
|
{
|
||||||
static const char *INCORRECT_QUOTING = "shell: incorrect quoting";
|
|
||||||
|
|
||||||
/* first we need to calculate the number of arguments */
|
/* first we need to calculate the number of arguments */
|
||||||
unsigned argc = 0;
|
int argc = 0;
|
||||||
char *pos = line;
|
char *readpos = line;
|
||||||
int contains_esc_seq = 0;
|
char *writepos = readpos;
|
||||||
while (1) {
|
|
||||||
if ((unsigned char) *pos > ' ') {
|
uint8_t pstate = PARSE_BLANK;
|
||||||
/* found an argument */
|
|
||||||
if (*pos == '"' || *pos == '\'') {
|
for (; *readpos != '\0'; readpos++) {
|
||||||
/* it's a quoted argument */
|
|
||||||
const char quote_char = *pos;
|
char wordbreak = SPACE;
|
||||||
do {
|
bool is_wordbreak = false;
|
||||||
++pos;
|
|
||||||
if (!*pos) {
|
switch (pstate) {
|
||||||
puts(INCORRECT_QUOTING);
|
|
||||||
return;
|
case PARSE_BLANK:
|
||||||
}
|
if (*readpos != SPACE && *readpos != TAB) {
|
||||||
else if (*pos == '\\') {
|
argc++;
|
||||||
/* skip over the next character */
|
|
||||||
++contains_esc_seq;
|
|
||||||
++pos;
|
|
||||||
if (!*pos) {
|
|
||||||
puts(INCORRECT_QUOTING);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} while (*pos != quote_char);
|
|
||||||
if ((unsigned char) pos[1] > ' ') {
|
|
||||||
puts(INCORRECT_QUOTING);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* it's an unquoted argument */
|
|
||||||
do {
|
|
||||||
if (*pos == '\\') {
|
|
||||||
/* skip over the next character */
|
|
||||||
++contains_esc_seq;
|
|
||||||
++pos;
|
|
||||||
if (!*pos) {
|
|
||||||
puts(INCORRECT_QUOTING);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++pos;
|
|
||||||
if (*pos == '"') {
|
|
||||||
puts(INCORRECT_QUOTING);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} while ((unsigned char) *pos > ' ');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* count the number of arguments we got */
|
if (*readpos == SQUOTE) {
|
||||||
++argc;
|
pstate = PARSE_SINGLEQUOTE;
|
||||||
}
|
}
|
||||||
|
else if (*readpos == DQUOTE) {
|
||||||
|
pstate = PARSE_DOUBLEQUOTE;
|
||||||
|
}
|
||||||
|
else if (*readpos == ESCAPECHAR) {
|
||||||
|
pstate = PARSE_UNQUOTED_ESC;
|
||||||
|
}
|
||||||
|
else if (*readpos != SPACE && *readpos != TAB) {
|
||||||
|
pstate = PARSE_UNQUOTED;
|
||||||
|
*writepos++ = *readpos;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/* zero out current position (space or quotation mark) and advance */
|
case PARSE_UNQUOTED:
|
||||||
if (*pos > 0) {
|
if (*readpos == SQUOTE) {
|
||||||
*pos = 0;
|
pstate = PARSE_SINGLEQUOTE;
|
||||||
++pos;
|
}
|
||||||
|
else if (*readpos == DQUOTE) {
|
||||||
|
pstate = PARSE_DOUBLEQUOTE;
|
||||||
|
}
|
||||||
|
else if (*readpos == ESCAPECHAR) {
|
||||||
|
pstate = escape_toggle(pstate);
|
||||||
|
}
|
||||||
|
else if (*readpos == SPACE || *readpos == TAB) {
|
||||||
|
pstate = PARSE_BLANK;
|
||||||
|
*writepos++ = '\0';
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
*writepos++ = *readpos;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PARSE_SINGLEQUOTE:
|
||||||
|
wordbreak = SQUOTE;
|
||||||
|
is_wordbreak = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PARSE_DOUBLEQUOTE:
|
||||||
|
wordbreak = DQUOTE;
|
||||||
|
is_wordbreak = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: /* QUOTED state */
|
||||||
|
pstate = escape_toggle(pstate);
|
||||||
|
*writepos++ = *readpos;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_wordbreak) {
|
||||||
|
if (*readpos == wordbreak) {
|
||||||
|
if (wordbreak == SQUOTE || wordbreak == DQUOTE) {
|
||||||
|
pstate = PARSE_UNQUOTED;
|
||||||
}
|
}
|
||||||
if (!argc) {
|
}
|
||||||
|
else if (*readpos == ESCAPECHAR) {
|
||||||
|
pstate = escape_toggle(pstate);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*writepos++ = *readpos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*writepos = '\0';
|
||||||
|
|
||||||
|
if (pstate != PARSE_BLANK && pstate != PARSE_UNQUOTED) {
|
||||||
|
puts("shell: incorrect quoting");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* then we fill the argv array */
|
/* then we fill the argv array */
|
||||||
char *argv[argc + 1];
|
int collected;
|
||||||
argv[argc] = NULL;
|
char *argv[argc];
|
||||||
pos = line;
|
|
||||||
for (unsigned i = 0; i < argc; ++i) {
|
readpos = line;
|
||||||
while (!*pos) {
|
for (collected = 0; collected < argc; collected++) {
|
||||||
++pos;
|
argv[collected] = readpos;
|
||||||
}
|
readpos += strlen(readpos) + 1;
|
||||||
if (*pos == '"' || *pos == '\'') {
|
|
||||||
++pos;
|
|
||||||
}
|
|
||||||
argv[i] = pos;
|
|
||||||
while (*pos) {
|
|
||||||
++pos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (char **arg = argv; contains_esc_seq && *arg; ++arg) {
|
|
||||||
for (char *c = *arg; *c; ++c) {
|
|
||||||
if (*c != '\\') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (char *d = c; *d; ++d) {
|
|
||||||
*d = d[1];
|
|
||||||
}
|
|
||||||
if (--contains_esc_seq == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* then we call the appropriate handler */
|
/* then we call the appropriate handler */
|
||||||
|
|||||||
@ -43,19 +43,61 @@ CONTROL_D = DLE+'\x04'
|
|||||||
PROMPT = '> '
|
PROMPT = '> '
|
||||||
|
|
||||||
CMDS = (
|
CMDS = (
|
||||||
|
# test start
|
||||||
('start_test', '[TEST_START]'),
|
('start_test', '[TEST_START]'),
|
||||||
(CONTROL_C, PROMPT),
|
(CONTROL_C, PROMPT),
|
||||||
('\n', PROMPT),
|
('\n', PROMPT),
|
||||||
|
|
||||||
|
# test simple word separation
|
||||||
|
('echo a string', '"echo""a""string"'),
|
||||||
|
('echo multiple spaces between argv', '"echo""multiple""spaces""between""argv"'),
|
||||||
|
('echo \t tabs\t\t processed \t\tlike\t \t\tspaces', '"echo""tabs""processed""like""spaces"'),
|
||||||
|
|
||||||
|
# test long line
|
||||||
('123456789012345678901234567890123456789012345678901234567890',
|
('123456789012345678901234567890123456789012345678901234567890',
|
||||||
'shell: command not found: '
|
'shell: command not found: '
|
||||||
'123456789012345678901234567890123456789012345678901234567890'),
|
'123456789012345678901234567890123456789012345678901234567890'),
|
||||||
('unknown_command', 'shell: command not found: unknown_command'),
|
('unknown_command', 'shell: command not found: unknown_command'),
|
||||||
|
|
||||||
|
# test leading/trailing BLANK
|
||||||
|
(' echo leading spaces', '"echo""leading""spaces"'),
|
||||||
|
('\t\t\t\t\techo leading tabs', '"echo""leading""tabs"'),
|
||||||
|
('echo trailing spaces ', '"echo""trailing""spaces"'),
|
||||||
|
('echo trailing tabs\t\t\t\t\t', '"echo""trailing""tabs"'),
|
||||||
|
|
||||||
|
# test backspace
|
||||||
('hello-willy\b\b\b\borld', 'shell: command not found: hello-world'),
|
('hello-willy\b\b\b\borld', 'shell: command not found: hello-world'),
|
||||||
('\b\b\b\becho', '\"echo\"'),
|
('\b\b\b\becho', '"echo"'),
|
||||||
('help', EXPECTED_HELP),
|
|
||||||
('echo a string', '\"echo\"\"a\"\"string\"'),
|
# test escaping
|
||||||
|
('echo \\\'', '"echo""\'"'),
|
||||||
|
('echo \\"', '"echo""""'),
|
||||||
|
('echo escaped\\ space', '"echo""escaped space"'),
|
||||||
|
('echo escape within \'\\s\\i\\n\\g\\l\\e\\q\\u\\o\\t\\e\'', '"echo""escape""within""singlequote"'),
|
||||||
|
('echo escape within "\\d\\o\\u\\b\\l\\e\\q\\u\\o\\t\\e"', '"echo""escape""within""doublequote"'),
|
||||||
|
("""echo "t\e st" "\\"" '\\'' a\ b""", '"echo""te st"""""\'""a b"'),
|
||||||
|
|
||||||
|
# test correct quoting
|
||||||
|
('echo "hello"world', '"echo""helloworld"'),
|
||||||
|
('echo hel"lowo"rld', '"echo""helloworld"'),
|
||||||
|
('echo hello"world"', '"echo""helloworld"'),
|
||||||
|
('echo quoted space " "', '"echo""quoted""space"" "'),
|
||||||
|
('echo abc"def\'ghijk"lmn', '"echo""abcdef\'ghijklmn"'),
|
||||||
|
('echo abc\'def"ghijk\'lmn', '"echo""abcdef"ghijklmn"'),
|
||||||
|
('echo "\'" \'"\'', '"echo""\'""""'),
|
||||||
|
|
||||||
|
# test incorrect quoting
|
||||||
|
('echo a\\', 'shell: incorrect quoting'),
|
||||||
|
('echo "', 'shell: incorrect quoting'),
|
||||||
|
('echo \'', 'shell: incorrect quoting'),
|
||||||
|
('echo abcdef"ghijklmn', 'shell: incorrect quoting'),
|
||||||
|
('echo abcdef\'ghijklmn', 'shell: incorrect quoting'),
|
||||||
|
|
||||||
|
# test default commands
|
||||||
('ps', EXPECTED_PS),
|
('ps', EXPECTED_PS),
|
||||||
('help', EXPECTED_HELP),
|
('help', EXPECTED_HELP),
|
||||||
|
|
||||||
|
# test end
|
||||||
('reboot', 'test_shell.'),
|
('reboot', 'test_shell.'),
|
||||||
('end_test', '[TEST_END]'),
|
('end_test', '[TEST_END]'),
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user