From: Andrew Dunstan Date: Sat, 8 Mar 2008 01:16:26 +0000 (+0000) Subject: Improve efficiency of attribute scanning in CopyReadAttributesCSV. X-Git-Tag: REL8_4_BETA1~1866 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=95c238d941216feca4116359f2b4632fb9a9081d;p=postgresql Improve efficiency of attribute scanning in CopyReadAttributesCSV. The loop is split into two parts, inside quotes, and outside quotes, saving some instructions in both parts. Heikki Linnakangas --- diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 2fb2a31271..194f3b380a 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.295 2008/01/01 19:45:48 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.296 2008/03/08 01:16:26 adunstan Exp $ * *------------------------------------------------------------------------- */ @@ -2913,7 +2913,6 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals) for (;;) { bool found_delim = false; - bool in_quote = false; bool saw_quote = false; char *start_ptr; char *end_ptr; @@ -2929,72 +2928,87 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals) start_ptr = cur_ptr; fieldvals[fieldno] = output_ptr; - /* Scan data for field */ + /* Scan data for field, + * + * The loop starts in "not quote" mode and then toggles between + * that and "in quote" mode. + * The loop exits normally if it is in "not quote" mode and a + * delimiter or line end is seen. + */ for (;;) { char c; - end_ptr = cur_ptr; - if (cur_ptr >= line_end_ptr) - break; - c = *cur_ptr++; - /* unquoted field delimiter */ - if (c == delimc && !in_quote) - { - found_delim = true; - break; - } - /* start of quoted field (or part of field) */ - if (c == quotec && !in_quote) + /* Not in quote */ + for (;;) { - saw_quote = true; - in_quote = true; - continue; + end_ptr = cur_ptr; + if (cur_ptr >= line_end_ptr) + goto endfield; + c = *cur_ptr++; + /* unquoted field delimiter */ + if (c == delimc) + { + found_delim = true; + goto endfield; + } + /* start of quoted field (or part of field) */ + if (c == quotec) + { + saw_quote = true; + break; + } + /* Add c to output string */ + *output_ptr++ = c; } - /* escape within a quoted field */ - if (c == escapec && in_quote) + + /* In quote */ + for (;;) { - /* - * peek at the next char if available, and escape it if it is - * an escape char or a quote char - */ - if (cur_ptr < line_end_ptr) - { - char nextc = *cur_ptr; + end_ptr = cur_ptr; + if (cur_ptr >= line_end_ptr) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("unterminated CSV quoted field"))); + + c = *cur_ptr++; - if (nextc == escapec || nextc == quotec) + /* escape within a quoted field */ + if (c == escapec) + { + /* + * peek at the next char if available, and escape it if it is + * an escape char or a quote char + */ + if (cur_ptr < line_end_ptr) { - *output_ptr++ = nextc; - cur_ptr++; - continue; + char nextc = *cur_ptr; + + if (nextc == escapec || nextc == quotec) + { + *output_ptr++ = nextc; + cur_ptr++; + continue; + } } } - } + /* + * end of quoted field. Must do this test after testing for escape + * in case quote char and escape char are the same (which is the + * common case). + */ + if (c == quotec) + break; - /* - * end of quoted field. Must do this test after testing for escape - * in case quote char and escape char are the same (which is the - * common case). - */ - if (c == quotec && in_quote) - { - in_quote = false; - continue; + /* Add c to output string */ + *output_ptr++ = c; } - - /* Add c to output string */ - *output_ptr++ = c; } + endfield: /* Terminate attribute value in output area */ *output_ptr++ = '\0'; - /* Shouldn't still be in quote mode */ - if (in_quote) - ereport(ERROR, - (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), - errmsg("unterminated CSV quoted field"))); - /* Check whether raw input matched null marker */ input_len = end_ptr - start_ptr; if (!saw_quote && input_len == cstate->null_print_len &&