Am 19.09.2013 01:29 schrieb Stefan Tauner:
Add an optional sub-parameter to the -i parameter to allow building the image to be written from multiple files. This will also allow regions to be read from flash and written to separate image files in a later patch. Existing function read_buf_from_file() is refined and reused to read the file.
Document the whole layout handling including the new features a bit better and refine wording regarding files, images, layouts and regions as discussed.
Also, finally introduce helpers.c: a module containing utility functions to be shared which do not fit elsewhere. The first function to be added is unquote_string() which is used throughout this patch.
Abort for non-write operations if a layout file is given.
based on chromiumos' d0ea9ed71e7f86bb8e8db2ca7c32a96de25343d8 Signed-off-by: David Hendricks dhendrix@chromium.org Signed-off-by: Stefan Tauner stefan.tauner@student.tuwien.ac.at
Counter-proposal for helpers.c, addressing all my complaints: - The first token must either be completely enclosed in double quotes or not quoted at all. - There must be end-of-string or delimiter after the closing quote. - The first token shall not have stray quotes anywhere.
Function description is yours.
Tested with the following inputs (input/output in single quotes):
# ./unqote '' Input '' ret=1 First token '', remaining text '' # ./unqote '""' Input '""' ret=1 First token '', remaining text '' # ./unqote 'foo' Input 'foo' ret=0 First token 'foo', remaining text '' # ./unqote ' foo ' Input ' foo ' ret=0 First token 'foo', remaining text '' # ./unqote '"foo"' Input '"foo"' ret=0 First token 'foo', remaining text '' # ./unqote ' "foo" ' Input ' "foo" ' ret=0 First token 'foo', remaining text ' ' # ./unqote ' " foo " ' Input ' " foo " ' ret=0 First token ' foo ', remaining text ' ' # ./unqote 'foo bar' Input 'foo bar' ret=0 First token 'foo', remaining text 'bar' # ./unqote ' foo bar ' Input ' foo bar ' ret=0 First token 'foo', remaining text ' bar ' # ./unqote ' "foo" bar ' Input ' "foo" bar ' ret=0 First token 'foo', remaining text ' bar ' # ./unqote ' "foo bar ' Input ' "foo bar ' ret=-1 Aborting. # ./unqote ' "foo"" bar ' Input ' "foo"" bar ' ret=-1 Aborting. # ./unqote ' "foo"""' Input ' "foo"""' ret=-1 Aborting. # ./unqote ' "foo"bar' Input ' "foo"bar' ret=-1 Aborting.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
--- /dev/null 2013-08-19 11:01:03.588011829 +0200 +++ helpers.c 2013-09-20 03:31:28.000000000 +0200 @@ -0,0 +1,86 @@ +/* + * This file is part of the flashrom project. + * + * Copyright (C) 2013 Carl-Daniel Hailfinger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#include <string.h> +#include "flash.h" + +/** Parse a \em possibly quoted string. + * + * The function expects \a startp to point to the string to be parsed without a prefix. If the + * string starts with a quotation mark it looks for the second one and removes both in-place, if not then it + * looks for the first character contained in \a delimiters and ends the wanted string there by replacing the + * character with '\0'. If \a delimiters is NULL an empty set is assumed, hence the whole start string equals + * the wanted string. + * + * After returning \a startp will point to a string that is either the first quoted part of the + * original string with the quotation marks removed, or the first word of that string before any delimiter. + * If \a endp is not NULL it will be set to point to the first character after the parsed string, or to the + * '\0' at the end of the string pointed to by \a startp if there are no more subsequent characters. + * + * @param start Points to the input string. + * @param end Is set to the first char following the input string. + * @return -1 if a quotation mark is not matched, + * 0 on success, + * 1 if the parsed string is empty. + */ +int unquote_string(char **startp, char **endp, const char *delimiters) +{ + size_t len; + + /* Strip leading delimiters. */ + *startp += strspn(*startp, delimiters); + + /* Check for empty string. */ + if (strlen(*startp) == 0) { + *endp = *startp; + return 1; + } + + /* Handle unqoted string. */ + if (**startp != '"') { + len = strcspn(*startp, delimiters); + /* Check for quotes in the middle. */ + if (strcspn(*startp, """) < len) + return -1; + /* Check if there is anything after this string. */ + if (strlen(*startp + len) > 0) + *endp = *startp + len + 1; + else + *endp = *startp + len; + (*startp)[len] = '\0'; + return 0; + } + + /* Handle quoted string. */ + (*startp)++; + len = strcspn(*startp, """); + /* Catch unclosed quotes and closing quotes not followed immediately by delimiter or end-of-string. */ + if ((*startp)[len] == '\0' || strcspn(*startp + len + 1, delimiters) > 0) + return -1; + /* Overwrite closing quote. */ + (*startp)[len] = '\0'; + *endp = *startp + len + 1; + /* Check for empty string. */ + if (strlen(*startp) == 0) + return 1; + return 0; +}
On Fri, 20 Sep 2013 03:48:15 +0200 Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net wrote:
Am 19.09.2013 01:29 schrieb Stefan Tauner:
Add an optional sub-parameter to the -i parameter to allow building the image to be written from multiple files. This will also allow regions to be read from flash and written to separate image files in a later patch. Existing function read_buf_from_file() is refined and reused to read the file.
Document the whole layout handling including the new features a bit better and refine wording regarding files, images, layouts and regions as discussed.
Also, finally introduce helpers.c: a module containing utility functions to be shared which do not fit elsewhere. The first function to be added is unquote_string() which is used throughout this patch.
Abort for non-write operations if a layout file is given.
based on chromiumos' d0ea9ed71e7f86bb8e8db2ca7c32a96de25343d8 Signed-off-by: David Hendricks dhendrix@chromium.org Signed-off-by: Stefan Tauner stefan.tauner@student.tuwien.ac.at
Counter-proposal for helpers.c, addressing all my complaints:
- The first token must either be completely enclosed in double quotes or
not quoted at all.
- There must be end-of-string or delimiter after the closing quote.
- The first token shall not have stray quotes anywhere.
Function description is yours.
Tested with the following inputs (input/output in single quotes):
# ./unqote '' Input '' ret=1 First token '', remaining text '' # ./unqote '""' Input '""' ret=1 First token '', remaining text '' # ./unqote 'foo' Input 'foo' ret=0 First token 'foo', remaining text '' # ./unqote ' foo ' Input ' foo ' ret=0 First token 'foo', remaining text '' # ./unqote '"foo"' Input '"foo"' ret=0 First token 'foo', remaining text '' # ./unqote ' "foo" ' Input ' "foo" ' ret=0 First token 'foo', remaining text ' ' # ./unqote ' " foo " ' Input ' " foo " ' ret=0 First token ' foo ', remaining text ' ' # ./unqote 'foo bar' Input 'foo bar' ret=0 First token 'foo', remaining text 'bar' # ./unqote ' foo bar ' Input ' foo bar ' ret=0 First token 'foo', remaining text ' bar ' # ./unqote ' "foo" bar ' Input ' "foo" bar ' ret=0 First token 'foo', remaining text ' bar ' # ./unqote ' "foo bar ' Input ' "foo bar ' ret=-1 Aborting. # ./unqote ' "foo"" bar ' Input ' "foo"" bar ' ret=-1 Aborting. # ./unqote ' "foo"""' Input ' "foo"""' ret=-1 Aborting. # ./unqote ' "foo"bar' Input ' "foo"bar' ret=-1 Aborting.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
--- /dev/null 2013-08-19 11:01:03.588011829 +0200 +++ helpers.c 2013-09-20 03:31:28.000000000 +0200 @@ -0,0 +1,86 @@ +/*
- This file is part of the flashrom project.
- Copyright (C) 2013 Carl-Daniel Hailfinger
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
+#include <string.h>
+#include <string.h> +#include "flash.h"
+/** Parse a \em possibly quoted string.
- The function expects \a startp to point to the string to be parsed without a prefix. If the
- string starts with a quotation mark it looks for the second one and removes both in-place, if not then it
- looks for the first character contained in \a delimiters and ends the wanted string there by replacing the
- character with '\0'. If \a delimiters is NULL an empty set is assumed, hence the whole start string equals
- the wanted string.
- After returning \a startp will point to a string that is either the first quoted part of the
- original string with the quotation marks removed, or the first word of that string before any delimiter.
- If \a endp is not NULL it will be set to point to the first character after the parsed string, or to the
- '\0' at the end of the string pointed to by \a startp if there are no more subsequent characters.
- @param start Points to the input string.
- @param end Is set to the first char following the input string.
- @return -1 if a quotation mark is not matched,
0 on success,
1 if the parsed string is empty.
- */
+int unquote_string(char **startp, char **endp, const char *delimiters) +{
- size_t len;
- /* Strip leading delimiters. */
- *startp += strspn(*startp, delimiters);
Above explodes for delimiters == NULL. It makes it also completely useless when the caller needs to track the number of fields (defined by the delimiters).
- /* Check for empty string. */
- if (strlen(*startp) == 0) {
*endp = *startp;
return 1;
- }
- /* Handle unqoted string. */
- if (**startp != '"') {
len = strcspn(*startp, delimiters);
/* Check for quotes in the middle. */
if (strcspn(*startp, "\"") < len)
return -1;
/* Check if there is anything after this string. */
if (strlen(*startp + len) > 0)
*endp = *startp + len + 1;
Explodes for endp == NULL
else
*endp = *startp + len;
Likewise.
(*startp)[len] = '\0';
return 0;
- }
- /* Handle quoted string. */
- (*startp)++;
- len = strcspn(*startp, """);
- /* Catch unclosed quotes and closing quotes not followed immediately by delimiter or end-of-string. */
- if ((*startp)[len] == '\0' || strcspn(*startp + len + 1, delimiters) > 0)
This will trigger on """ for example (and explode on delimiters == NULL).
return -1;
- /* Overwrite closing quote. */
- (*startp)[len] = '\0';
- *endp = *startp + len + 1;
- /* Check for empty string. */
- if (strlen(*startp) == 0)
return 1;
- return 0;
+}
All in all I think it does way more than a generic unquoting function to be used in parsers should do.
Am 21.09.2013 18:17 schrieb Stefan Tauner:
On Fri, 20 Sep 2013 03:48:15 +0200 Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net wrote:
Am 19.09.2013 01:29 schrieb Stefan Tauner:
Add an optional sub-parameter to the -i parameter to allow building the image to be written from multiple files. This will also allow regions to be read from flash and written to separate image files in a later patch. Existing function read_buf_from_file() is refined and reused to read the file.
Document the whole layout handling including the new features a bit better and refine wording regarding files, images, layouts and regions as discussed.
Also, finally introduce helpers.c: a module containing utility functions to be shared which do not fit elsewhere. The first function to be added is unquote_string() which is used throughout this patch.
Abort for non-write operations if a layout file is given.
based on chromiumos' d0ea9ed71e7f86bb8e8db2ca7c32a96de25343d8 Signed-off-by: David Hendricks dhendrix@chromium.org Signed-off-by: Stefan Tauner stefan.tauner@student.tuwien.ac.at
Counter-proposal for helpers.c, addressing all my complaints:
- The first token must either be completely enclosed in double quotes or
not quoted at all.
- There must be end-of-string or delimiter after the closing quote.
- The first token shall not have stray quotes anywhere.
Function description is yours.
Tested with the following inputs (input/output in single quotes):
# ./unqote '' Input '' ret=1 First token '', remaining text '' # ./unqote '""' Input '""' ret=1 First token '', remaining text '' # ./unqote 'foo' Input 'foo' ret=0 First token 'foo', remaining text '' # ./unqote ' foo ' Input ' foo ' ret=0 First token 'foo', remaining text '' # ./unqote '"foo"' Input '"foo"' ret=0 First token 'foo', remaining text '' # ./unqote ' "foo" ' Input ' "foo" ' ret=0 First token 'foo', remaining text ' ' # ./unqote ' " foo " ' Input ' " foo " ' ret=0 First token ' foo ', remaining text ' ' # ./unqote 'foo bar' Input 'foo bar' ret=0 First token 'foo', remaining text 'bar' # ./unqote ' foo bar ' Input ' foo bar ' ret=0 First token 'foo', remaining text ' bar ' # ./unqote ' "foo" bar ' Input ' "foo" bar ' ret=0 First token 'foo', remaining text ' bar ' # ./unqote ' "foo bar ' Input ' "foo bar ' ret=-1 Aborting. # ./unqote ' "foo"" bar ' Input ' "foo"" bar ' ret=-1 Aborting. # ./unqote ' "foo"""' Input ' "foo"""' ret=-1 Aborting. # ./unqote ' "foo"bar' Input ' "foo"bar' ret=-1 Aborting.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
--- /dev/null 2013-08-19 11:01:03.588011829 +0200 +++ helpers.c 2013-09-20 03:31:28.000000000 +0200 @@ -0,0 +1,86 @@ +/*
- This file is part of the flashrom project.
- Copyright (C) 2013 Carl-Daniel Hailfinger
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
+#include <string.h>
+#include <string.h> +#include "flash.h"
+/** Parse a \em possibly quoted string.
- The function expects \a startp to point to the string to be parsed without a prefix. If the
- string starts with a quotation mark it looks for the second one and removes both in-place, if not then it
- looks for the first character contained in \a delimiters and ends the wanted string there by replacing the
- character with '\0'. If \a delimiters is NULL an empty set is assumed, hence the whole start string equals
- the wanted string.
Let me reword this so it fits the way my variant works.
The behaviour is somewhat similar to a strtok_r variant with quoting support. A quoted token has to have exactly one doublequote each at the beginning and the end and has to be followed by end-of-string ('\0') or a delimiter. \a *startp points to the string which should be parsed. If the string does not start with a quote, it is terminated at the first character contained in \a delimiters by replacing it with '\0'. If the string starts with a quote, it is terminated at the second (terminating) quote by replacing it with '\0'.
- After returning \a startp will point to a string that is either the first quoted part of the
- original string with the quotation marks removed, or the first word of that string before any delimiter.
- If \a endp is not NULL it will be set to point to the first character after the parsed string, or to the
- '\0' at the end of the string pointed to by \a startp if there are no more subsequent characters.
Your variant and my variant don't really conform to this description in case of delimiters after the closing quote. Will fix mine.
- @param start Points to the input string.
- @param end Is set to the first char following the input string.
- @return -1 if a quotation mark is not matched,
0 on success,
1 if the parsed string is empty.
- */
+int unquote_string(char **startp, char **endp, const char *delimiters) +{
- size_t len;
- /* Strip leading delimiters. */
- *startp += strspn(*startp, delimiters);
Above explodes for delimiters == NULL. It makes it also completely useless when the caller needs to track the number of fields (defined by the delimiters).
It would be useful as a tokenizer for layout files, though. I can make this optional or kill it, but the handling of arbitrary amounts of whitespace between start:end and regionname needs to be done somewhere.
- /* Check for empty string. */
- if (strlen(*startp) == 0) {
*endp = *startp;
return 1;
- }
- /* Handle unqoted string. */
- if (**startp != '"') {
len = strcspn(*startp, delimiters);
/* Check for quotes in the middle. */
if (strcspn(*startp, "\"") < len)
return -1;
/* Check if there is anything after this string. */
if (strlen(*startp + len) > 0)
*endp = *startp + len + 1;
Explodes for endp == NULL
Thanks for catching that.
else
*endp = *startp + len;
Likewise.
(*startp)[len] = '\0';
return 0;
- }
- /* Handle quoted string. */
- (*startp)++;
- len = strcspn(*startp, """);
- /* Catch unclosed quotes and closing quotes not followed immediately by delimiter or end-of-string. */
- if ((*startp)[len] == '\0' || strcspn(*startp + len + 1, delimiters) > 0)
This will trigger on """ for example
That is intentional. " is not a delimiter, and that means we are not allowed to break up a string there. As such, """ is as invalid as ""foo.
(and explode on delimiters == NULL).
return -1;
- /* Overwrite closing quote. */
- (*startp)[len] = '\0';
- *endp = *startp + len + 1;
- /* Check for empty string. */
- if (strlen(*startp) == 0)
return 1;
- return 0;
+}
All in all I think it does way more than a generic unquoting function to be used in parsers should do.
Yes, my variant is a tokenizer with quoting support. Your variant is a tokenizer which only cares about the supplied delimiters if there is no quoting and implicitly replaces the specified delimiters with " if there is quoting.
New unqote as standalone program for easier testing. It now should handle all the fun stuff you can throw at it. Suggested example inputs (single quotes are there to protect against the shell): ' foo ' ' "foo" ' 'foo ' '"foo" ' 'foo' '"foo"' 'foo bar' '"foo bar"' '' '""' and some inputs which should throw an error: '"' '"""' '"foo' 'foo"' '"foo"bar'
--- /dev/null 2013-08-19 11:01:03.588011829 +0200 +++ unquote.c 2013-09-22 02:39:31.000000000 +0200 @@ -0,0 +1,83 @@ +#include <stdio.h> +#include <string.h> +#include <stdbool.h> + +int unquote_string(char **startp, char **endp, const char *delimiters, bool strip_leading_delimiters) +{ + size_t len; + + if (delimiters == NULL) + delimiters = ""; + + if (strip_leading_delimiters) { + /* Strip leading delimiters. */ + *startp += strspn(*startp, delimiters); + } + + /* Check for empty string. */ + if (strlen(*startp) == 0) { + if (endp != NULL) + *endp = *startp; + return 1; + } + + /* Handle unqoted string. */ + if (**startp != '"') { + len = strcspn(*startp, delimiters); + /* Check for quotes in the middle. */ + if (strcspn(*startp, """) < len) + return -1; + if (endp != NULL) { + /* Check if there is anything after this string. */ + if (strlen(*startp + len) > 0) + *endp = *startp + len + 1; + else + *endp = *startp + len; + } + (*startp)[len] = '\0'; + return 0; + } + + /* Handle quoted string. */ + (*startp)++; + len = strcspn(*startp, """); + /* Catch unclosed quotes and closing quotes not followed immediately by delimiter or end-of-string. */ + if ((*startp)[len] == '\0' || strcspn(*startp + len + 1, delimiters) > 0) + return -1; + /* Overwrite closing quote. */ + (*startp)[len] = '\0'; + if (endp != NULL) { + *endp = *startp + len + 1; + if (strspn(*endp, delimiters) > 0) + (*endp)++; + } + /* Check for empty string. */ + if (strlen(*startp) == 0) + return 1; + return 0; +} + +const char *delim = " \t\r\n"; + +int main(int argc, const char* argv[]) +{ + int ret; + char *oldstartp; + char *startp; + char *endp; + + if (argc < 2) + return 1; + oldstartp = startp = strdup(argv[1]); + printf ("Input '%s'\n", startp); + ret = unquote_string(&startp, &endp, delim, 0); + printf("ret=%i\n", ret); + if (ret < 0) { + printf("Aborting.\n"); + return 1; + } + + //printf("startp moved %i bytes, endp is %i bytes from oldstartp\n", startp-oldstartp, endp-oldstartp); + printf("First token '%s', remaining text '%s'\n", startp, endp); + return 0; +}
Regards, Carl-Daniel