]> granicus.if.org Git - postgresql/commitdiff
Skip text->binary conversion of unnecessary columns in contrib/file_fdw.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 12 Jul 2012 20:26:59 +0000 (16:26 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 12 Jul 2012 20:26:59 +0000 (16:26 -0400)
When reading from a text- or CSV-format file in file_fdw, the datatype
input routines can consume a significant fraction of the runtime.
Often, the query does not need all the columns, so we can get a useful
speed boost by skipping I/O conversion for unnecessary columns.

To support this, add a "convert_selectively" option to the core COPY code.
This is undocumented and not accessible from SQL (for now, anyway).

Etsuro Fujita, reviewed by KaiGai Kohei

contrib/file_fdw/file_fdw.c
src/backend/commands/copy.c

index e3b9223b3ee7a102efef7e1c18192eb64494864b..7c7fedfcdbbcebd608a860754533d72c5c13bd76 100644 (file)
@@ -16,6 +16,7 @@
 #include <unistd.h>
 
 #include "access/reloptions.h"
+#include "access/sysattr.h"
 #include "catalog/pg_foreign_table.h"
 #include "commands/copy.h"
 #include "commands/defrem.h"
@@ -29,6 +30,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
+#include "optimizer/var.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
@@ -136,6 +138,9 @@ static bool is_valid_option(const char *option, Oid context);
 static void fileGetOptions(Oid foreigntableid,
                           char **filename, List **other_options);
 static List *get_file_fdw_attribute_options(Oid relid);
+static bool check_selective_binary_conversion(RelOptInfo *baserel,
+                                                                                         Oid foreigntableid,
+                                                                                         List **columns);
 static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
                          FileFdwPlanState *fdw_private);
 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
@@ -457,12 +462,25 @@ fileGetForeignPaths(PlannerInfo *root,
        FileFdwPlanState *fdw_private = (FileFdwPlanState *) baserel->fdw_private;
        Cost            startup_cost;
        Cost            total_cost;
+       List       *columns;
+       List       *coptions = NIL;
+
+       /* Decide whether to selectively perform binary conversion */
+       if (check_selective_binary_conversion(baserel,
+                                                                                 foreigntableid,
+                                                                                 &columns))
+               coptions = list_make1(makeDefElem("convert_selectively",
+                                                                                 (Node *) columns));
 
        /* Estimate costs */
        estimate_costs(root, baserel, fdw_private,
                                   &startup_cost, &total_cost);
 
-       /* Create a ForeignPath node and add it as only possible path */
+       /*
+        * Create a ForeignPath node and add it as only possible path.  We use the
+        * fdw_private list of the path to carry the convert_selectively option;
+        * it will be propagated into the fdw_private list of the Plan node.
+        */
        add_path(baserel, (Path *)
                         create_foreignscan_path(root, baserel,
                                                                         baserel->rows,
@@ -470,7 +488,7 @@ fileGetForeignPaths(PlannerInfo *root,
                                                                         total_cost,
                                                                         NIL,           /* no pathkeys */
                                                                         NULL,          /* no outer rel either */
-                                                                        NIL));         /* no fdw_private data */
+                                                                        coptions));
 
        /*
         * If data file was sorted, and we knew it somehow, we could insert
@@ -507,7 +525,7 @@ fileGetForeignPlan(PlannerInfo *root,
                                                        scan_clauses,
                                                        scan_relid,
                                                        NIL,    /* no expressions to evaluate */
-                                                       NIL);           /* no private state either */
+                                                       best_path->fdw_private);
 }
 
 /*
@@ -544,6 +562,7 @@ fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
 static void
 fileBeginForeignScan(ForeignScanState *node, int eflags)
 {
+       ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
        char       *filename;
        List       *options;
        CopyState       cstate;
@@ -559,6 +578,9 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
        fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
                                   &filename, &options);
 
+       /* Add any options from the plan (currently only convert_selectively) */
+       options = list_concat(options, plan->fdw_private);
+
        /*
         * Create CopyState from FDW options.  We always acquire all columns, so
         * as to match the expected ScanTupleSlot signature.
@@ -694,6 +716,125 @@ fileAnalyzeForeignTable(Relation relation,
        return true;
 }
 
+/*
+ * check_selective_binary_conversion
+ *
+ * Check to see if it's useful to convert only a subset of the file's columns
+ * to binary.  If so, construct a list of the column names to be converted,
+ * return that at *columns, and return TRUE.  (Note that it's possible to
+ * determine that no columns need be converted, for instance with a COUNT(*)
+ * query.  So we can't use returning a NIL list to indicate failure.)
+ */
+static bool
+check_selective_binary_conversion(RelOptInfo *baserel,
+                                                                 Oid foreigntableid,
+                                                                 List **columns)
+{
+       ForeignTable *table;
+       ListCell   *lc;
+       Relation        rel;
+       TupleDesc       tupleDesc;
+       AttrNumber      attnum;
+       Bitmapset  *attrs_used = NULL;
+       bool            has_wholerow = false;
+       int                     numattrs;
+       int                     i;
+
+       *columns = NIL;                         /* default result */
+
+       /*
+        * Check format of the file.  If binary format, this is irrelevant.
+        */
+       table = GetForeignTable(foreigntableid);
+       foreach(lc, table->options)
+       {
+               DefElem    *def = (DefElem *) lfirst(lc);
+
+               if (strcmp(def->defname, "format") == 0)
+               {
+                       char       *format = defGetString(def);
+
+                       if (strcmp(format, "binary") == 0)
+                               return false;
+                       break;
+               }
+       }
+
+       /* Collect all the attributes needed for joins or final output. */
+       pull_varattnos((Node *) baserel->reltargetlist, baserel->relid,
+                                  &attrs_used);
+
+       /* Add all the attributes used by restriction clauses. */
+       foreach(lc, baserel->baserestrictinfo)
+       {
+               RestrictInfo   *rinfo = (RestrictInfo *) lfirst(lc);
+
+               pull_varattnos((Node *) rinfo->clause, baserel->relid,
+                                          &attrs_used);
+       }
+
+       /* Convert attribute numbers to column names. */
+       rel = heap_open(foreigntableid, AccessShareLock);
+       tupleDesc = RelationGetDescr(rel);
+
+       while ((attnum = bms_first_member(attrs_used)) >= 0)
+       {
+               /* Adjust for system attributes. */
+               attnum += FirstLowInvalidHeapAttributeNumber;
+
+               if (attnum == 0)
+               {
+                       has_wholerow = true;
+                       break;
+               }
+
+               /* Ignore system attributes. */
+               if (attnum < 0)
+                       continue;
+
+               /* Get user attributes. */
+               if (attnum > 0)
+               {
+                       Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
+                       char       *attname = NameStr(attr->attname);
+
+                       /* Skip dropped attributes (probably shouldn't see any here). */
+                       if (attr->attisdropped)
+                               continue;
+                       *columns = lappend(*columns, makeString(pstrdup(attname)));
+               }
+       }
+
+       /* Count non-dropped user attributes while we have the tupdesc. */
+       numattrs = 0;
+       for (i = 0; i < tupleDesc->natts; i++)
+       {
+               Form_pg_attribute attr = tupleDesc->attrs[i];
+
+               if (attr->attisdropped)
+                       continue;
+               numattrs++;
+       }
+
+       heap_close(rel, AccessShareLock);
+
+       /* If there's a whole-row reference, fail: we need all the columns. */
+       if (has_wholerow)
+       {
+               *columns = NIL;
+               return false;
+       }
+
+       /* If all the user attributes are needed, fail. */
+       if (numattrs == list_length(*columns))
+       {
+               *columns = NIL;
+               return false;
+       }
+
+       return true;
+}
+
 /*
  * Estimate size of a foreign table.
  *
index 98bcb2fcf3370c72b0f0a7c0df76ebe4512e9ab0..e8a125b1a584180400175d103fbb9e2bae36d553 100644 (file)
@@ -121,6 +121,9 @@ typedef struct CopyStateData
        bool       *force_quote_flags;          /* per-column CSV FQ flags */
        List       *force_notnull;      /* list of column names */
        bool       *force_notnull_flags;        /* per-column CSV FNN flags */
+       bool            convert_selectively;    /* do selective binary conversion? */
+       List       *convert_select;     /* list of column names (can be NIL) */
+       bool       *convert_select_flags;       /* per-column CSV/TEXT CS flags */
 
        /* these are just for error messages, see CopyFromErrorCallback */
        const char *cur_relname;        /* table name for error messages */
@@ -961,6 +964,26 @@ ProcessCopyOptions(CopyState cstate,
                                                 errmsg("argument to option \"%s\" must be a list of column names",
                                                                defel->defname)));
                }
+               else if (strcmp(defel->defname, "convert_selectively") == 0)
+               {
+                       /*
+                        * Undocumented, not-accessible-from-SQL option: convert only
+                        * the named columns to binary form, storing the rest as NULLs.
+                        * It's allowed for the column list to be NIL.
+                        */
+                       if (cstate->convert_selectively)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+                       cstate->convert_selectively = true;
+                       if (defel->arg == NULL || IsA(defel->arg, List))
+                               cstate->convert_select = (List *) defel->arg;
+                       else
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("argument to option \"%s\" must be a list of column names",
+                                                               defel->defname)));
+               }
                else if (strcmp(defel->defname, "encoding") == 0)
                {
                        if (cstate->file_encoding >= 0)
@@ -1307,6 +1330,29 @@ BeginCopy(bool is_from,
                }
        }
 
+       /* Convert convert_selectively name list to per-column flags */
+       if (cstate->convert_selectively)
+       {
+               List       *attnums;
+               ListCell   *cur;
+
+               cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+
+               attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
+
+               foreach(cur, attnums)
+               {
+                       int                     attnum = lfirst_int(cur);
+
+                       if (!list_member_int(cstate->attnumlist, attnum))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                                errmsg_internal("selected column \"%s\" not referenced by COPY",
+                                                                                NameStr(tupDesc->attrs[attnum - 1]->attname))));
+                       cstate->convert_select_flags[attnum - 1] = true;
+               }
+       }
+
        /* Use client encoding when ENCODING option is not specified. */
        if (cstate->file_encoding < 0)
                cstate->file_encoding = pg_get_client_encoding();
@@ -2565,6 +2611,13 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
                                                                NameStr(attr[m]->attname))));
                        string = field_strings[fieldno++];
 
+                       if (cstate->convert_select_flags &&
+                               !cstate->convert_select_flags[m])
+                       {
+                               /* ignore input field, leaving column as NULL */
+                               continue;
+                       }
+
                        if (cstate->csv_mode && string == NULL &&
                                cstate->force_notnull_flags[m])
                        {