]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/chunk.c
Final cleanup.
[postgresql] / src / backend / utils / adt / chunk.c
1 /*-------------------------------------------------------------------------
2  *
3  * chunk.c
4  *
5  * Copyright (c) 1994, Regents of the University of California
6  *
7  *
8  * IDENTIFICATION
9  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.22 1999/07/16 05:00:05 momjian Exp $
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <fcntl.h>
16
17 #include "postgres.h"
18 #ifndef HAVE_MEMMOVE
19 #include "regex/utils.h"
20 #else
21 #include <string.h>
22 #endif
23
24 #include "catalog/pg_type.h"
25 #include "fmgr.h"
26 #include "libpq/be-fsstubs.h"
27 #include "libpq/libpq-fs.h"
28 #include "optimizer/internal.h"
29 #include "utils/array.h"
30 #include "utils/memutils.h"
31
32 #define INFTY 500000000
33 #define MANY  10000
34 #define MAXPAT 20
35 #define quot_ceil(x,y)  (((x)+(y)-1)/(y))
36 #define min(x,y)                (((x) < (y))? (x) : (y))
37 #define max(x,y)                (((x) > (y))? (x) : (y))
38
39 static CHUNK_INFO cInfo;
40
41 /* non-export function prototypes */
42 static int _FindBestChunk(int size, int *dmax, int *dbest, int dim,
43                            int A[MAXPAT][MAXDIM + 1], int N);
44 static int      get_next(int *d, int k, int C, int *dmax);
45 static void initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk);
46
47 #ifdef LOARRAY
48 static void _ConvertToChunkFile(int n, int baseSize, int *dim, int *C,
49                                         int srcfd, int destfd);
50 static void read_chunk(int *chunk_no, int *C, char *a_chunk, int srcfd,
51                    int n, int baseSize, int *PX, int *dist);
52 static int      write_chunk(struct varlena * a_chunk, int ofile);
53 static int      seek_and_read(int pos, int size, char *buff, int fp, int from);
54
55 #endif
56 static int GetChunkSize(FILE *fd, int ndim, int dim[MAXDIM], int baseSize,
57                          int d[MAXDIM]);
58
59 /*------------------------------------------------------------------------
60  * _ChunkArray ---
61  *         converts an input array to chunked format using the information
62  *         provided by the access pattern.
63  * Results:
64  *         creates a new file that stores the chunked array and returns
65  *         information about the chunked file
66  *-----------------------------------------------------------------------
67  */
68 char *
69 _ChunkArray(int fd,
70                         FILE *afd,
71                         int ndim,
72                         int *dim,
73                         int baseSize,
74                         int *nbytes,
75                         char *chunkfile)
76 {
77 #ifdef LOARRAY
78         int                     cfd = 0;
79
80 #endif
81         int                     chunk[MAXDIM],
82                                 csize;
83         bool            reorgFlag;
84
85         if (chunkfile == NULL)
86                 reorgFlag = true;
87         else
88                 reorgFlag = false;
89
90 #ifdef LOARRAY
91         if (reorgFlag)
92                 /* create new LO for chunked file */
93                 chunkfile = _array_newLO(&cfd, fileFlag);
94         else
95                 cfd = LOopen(chunkfile, O_RDONLY);
96         if (cfd < 0)
97                 elog(ERROR, "Unable to open chunk file");
98 #endif
99
100         strcpy(cInfo.lo_name, chunkfile);
101
102         /* find chunk size */
103         csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
104
105 #ifdef LOARRAY
106         if (reorgFlag)
107                 /* copy data from input file to chunked file */
108                 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
109 #endif
110
111         initialize_info(&cInfo, ndim, dim, chunk);
112         *nbytes = sizeof(CHUNK_INFO);
113         return (char *) &cInfo;
114 }
115
116 /*--------------------------------------------------------------------------
117  * GetChunkSize
118  *                given an access pattern and array dimensionality etc, this program
119  *              returns the dimensions of the chunk in "d"
120  *-----------------------------------------------------------------------
121  */
122 static int
123 GetChunkSize(FILE *fd,
124                          int ndim,
125                          int dim[MAXDIM],
126                          int baseSize,
127                          int d[MAXDIM])
128 {
129         int                     N,
130                                 i,
131                                 j,
132                                 csize;
133         int                     A[MAXPAT][MAXDIM + 1],
134                                 dmax[MAXDIM];
135
136         /*
137          * ----------- read input ------------
138          */
139         fscanf(fd, "%d", &N);
140         if (N > MAXPAT)
141                 elog(ERROR, "array_in: too many access pattern elements");
142         for (i = 0; i < N; i++)
143                 for (j = 0; j < ndim + 1; j++)
144                         if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
145                                 elog(ERROR, "array_in: bad access pattern input");
146
147         /*
148          * estimate chunk size
149          */
150         for (i = 0; i < ndim; i++)
151                 for (j = 0, dmax[i] = 1; j < N; j++)
152                         if (dmax[i] < A[j][i])
153                                 dmax[i] = A[j][i];
154         csize = BLCKSZ / baseSize;
155
156         _FindBestChunk(csize, dmax, d, ndim, A, N);
157
158         return csize;
159 }
160
161 /*-------------------------------------------------------------------------
162  * _FindBestChunk
163  *                This routine does most of the number crunching to compute the
164  *                optimal chunk shape.
165  * Called by GetChunkSize
166  *------------------------------------------------------------------------
167  */
168 static int
169 _FindBestChunk(int size,
170                            int *dmax,
171                            int *dbest,
172                            int dim,
173                            int A[MAXPAT][MAXDIM + 1],
174                            int N)
175 {
176         int                     d[MAXDIM];
177         int                     tc,
178                                 mintc = INFTY;
179
180         d[0] = 0;
181         mintc = INFTY;
182         while (get_next(d, dim, size, dmax))
183         {
184
185                 /*
186                  * compute the number of page fetches for a given chunk size (*d)
187                  * and access pattern (**A)
188                  */
189                 int                     i,
190                                         j,
191                                         nc;
192
193                 for (i = 0, tc = 0; i < N; i++)
194                 {
195                         for (j = 0, nc = 1; j < dim; j++)
196                                 nc *= quot_ceil(A[i][j], d[j]);
197                         nc *= A[i][dim];
198                         tc += nc;
199                 }
200
201                 /*
202                  * tc holds the total number of page fetches
203                  */
204                 if (mintc >= tc)
205                 {
206                         mintc = tc;
207                         for (j = 0; j < dim; dbest[j] = d[j], j++)
208                                 ;
209                 }
210         }
211         return mintc;
212 }
213
214 /*----------------------------------------------------------------------
215  * get_next
216  *       Called by _GetBestChunk to get the next tuple in the lexicographic order
217  *---------------------------------------------------------------------
218  */
219 static int
220 get_next(int *d, int k, int C, int *dmax)
221 {
222         int                     i,
223                                 j,
224                                 temp;
225
226         if (!d[0])
227         {
228                 temp = C;
229                 for (j = k - 1; j >= 0; j--)
230                 {
231                         d[j] = min(temp, dmax[j]);
232                         temp = max(1, temp / d[j]);
233                 }
234                 return 1;
235         }
236
237         for (j = 0, temp = 1; j < k; j++)
238                 temp *= d[j];
239
240         for (i = k - 1; i >= 0; i--)
241         {
242                 temp = temp / d[i];
243                 if (((temp * (d[i] + 1)) < C) && (d[i] + 1 <= dmax[i]))
244                         break;
245         }
246         if (i < 0)
247                 return 0;
248
249         d[i]++;
250         j = C / temp;
251         d[i] = min(dmax[i], j / (j / d[i]));
252         temp = temp * d[i];
253         temp = C / temp;
254
255         for (j = k - 1; j > i; j--)
256         {
257                 d[j] = min(temp, dmax[j]);
258                 temp = max(1, temp / d[j]);
259         }
260         return 1;
261 }
262
263 #ifdef LOARRAY
264 static char a_chunk[BLCKSZ + VARHDRSZ]; /* VARHDRSZ since a_chunk is in
265                                                                                  * varlena format */
266
267 #endif
268
269 static void
270 initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk)
271 {
272         int                     i;
273
274         for (i = 0; i < ndim; i++)
275                 A->C[i] = chunk[i];
276 }
277
278 /*--------------------------------------------------------------------------
279  * Procedure reorganize_data():
280  *        This procedure reads the input multidimensional array that is organised
281  *        in the order specified by array "X" and breaks it up into chunks of
282  *        dimensions specified in "C".
283  *
284  *        This is a very slow process, since reading and writing of LARGE files
285  *        may be involved.
286  *
287  *-------------------------------------------------------------------------
288  */
289 #ifdef LOARRAY
290 static void
291 _ConvertToChunkFile(int n,
292                                         int baseSize,
293                                         int *dim,
294                                         int *C,
295                                         int srcfd,
296                                         int destfd)
297 {
298         int                     max_chunks[MAXDIM],
299                                 chunk_no[MAXDIM];
300         int                     PX[MAXDIM],
301                                 dist[MAXDIM];
302         int                     csize = 1,
303                                 i,
304                                 temp;
305
306         for (i = 0; i < n; chunk_no[i++] = 0)
307         {
308                 max_chunks[i] = dim[i] / C[i];
309                 csize *= C[i];
310         }
311         csize *= baseSize;
312         temp = csize + VARHDRSZ;
313         memmove(a_chunk, &temp, VARHDRSZ);
314
315         mda_get_prod(n, dim, PX);
316         mda_get_offset_values(n, dist, PX, C);
317         for (i = 0; i < n; dist[i] *= baseSize, i++)
318                 ;
319         do
320         {
321                 read_chunk(chunk_no, C, &(a_chunk[VARHDRSZ]), srcfd, n, baseSize, PX, dist);
322                 write_chunk((struct varlena *) a_chunk, destfd);
323         } while (next_tuple(n, chunk_no, max_chunks) != -1);
324 }
325
326 /*--------------------------------------------------------------------------
327  * read_chunk
328  *        reads a chunk from the input files into a_chunk, the position of the
329  *        chunk is specified by chunk_no
330  *--------------------------------------------------------------------------
331  */
332 static void
333 read_chunk(int *chunk_no,
334                    int *C,
335                    char *a_chunk,
336                    int srcfd,
337                    int n,
338                    int baseSize,
339                    int *PX,
340                    int *dist)
341 {
342         int                     i,
343                                 j,
344                                 cp,
345                                 unit_transfer;
346         int                     start_pos,
347                                 pos[MAXDIM];
348         int                     indx[MAXDIM];
349         int                     fpOff;
350
351         for (i = start_pos = 0; i < n; i++)
352         {
353                 pos[i] = chunk_no[i] * C[i];
354                 start_pos += pos[i] * PX[i];
355         }
356         start_pos *= baseSize;
357
358         /* Read a block of dimesion C starting at co-ordinates pos */
359         unit_transfer = C[n - 1] * baseSize;
360
361         for (i = 0; i < n; indx[i++] = 0)
362                 ;
363         fpOff = start_pos;
364         seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
365         fpOff += unit_transfer;
366         cp = unit_transfer;
367
368         while ((j = next_tuple(n - 1, indx, C)) != -1)
369         {
370                 fpOff += dist[j];
371                 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
372                 cp += unit_transfer;
373                 fpOff += unit_transfer;
374         }
375 }
376
377 /*--------------------------------------------------------------------------
378  * write_chunk()
379  *        writes a chunk of size csize into the output file
380  *--------------------------------------------------------------------------
381  */
382 static int
383 write_chunk(struct varlena * a_chunk, int ofile)
384 {
385         int                     got_n = 0;
386
387 #ifdef LOARRAY
388         got_n = LOwrite(ofile, a_chunk);
389 #endif
390         return got_n;
391 }
392
393 /*--------------------------------------------------------------------------
394  * seek_and_read()
395  *        seeks to the asked location in the input file and reads the
396  *        appropriate number of blocks
397  *       Called By: read_chunk()
398  *--------------------------------------------------------------------------
399  */
400 static int
401 seek_and_read(int pos, int size, char *buff, int fp, int from)
402 {
403         struct varlena *v = NULL;
404
405         /* Assuming only one file */
406         if (lo_lseek(fp, pos, from) < 0)
407                 elog(ERROR, "File seek error");
408 #ifdef LOARRAY
409         v = (struct varlena *) LOread(fp, size);
410 #endif
411         if (VARSIZE(v) - VARHDRSZ < size)
412                 elog(ERROR, "File read error");
413         memmove(buff, VARDATA(v), size);
414         pfree(v);
415         return 1;
416
417 }
418
419 #endif   /* LOARRAY */
420
421 /*----------------------------------------------------------------------------
422  * _ReadChunkArray
423  *                returns the subarray specified bu the range indices "st" and "endp"
424  *                from the chunked array stored in file "fp"
425  *---------------------------------------------------------------------------
426  */
427 int
428 _ReadChunkArray(int *st,
429                                 int *endp,
430                                 int bsize,
431                                 int fp,
432                                 char *destfp,
433                                 ArrayType *array,
434                                 int isDestLO,
435                                 bool *isNull)
436 {
437         int                     i,
438                                 j,
439                                 jj;
440         int                     n,
441                                 temp,
442                                 words_read;
443         int                     chunk_span[MAXDIM],
444                                 chunk_off[MAXDIM];
445         int                     chunk_st[MAXDIM],
446                                 chunk_end[MAXDIM];
447         int                     block_seek;
448
449         int                     bptr,
450                            *C,
451                                 csize,
452                            *dim,
453                            *lb;
454         int                     range_st[MAXDIM],
455                                 range_end[MAXDIM],
456                                 range[MAXDIM],
457                                 array_span[MAXDIM];
458         int                     PA[MAXDIM],
459                                 PCHUNK[MAXDIM],
460                                 PC[MAXDIM];
461         int                     to_read;
462         int                     cdist[MAXDIM],
463                                 adist[MAXDIM];
464         int                     dist[MAXDIM],
465                                 temp_seek;
466
467         int                     srcOff;                 /* Needed since LO don't understand
468                                                                  * SEEK_CUR */
469         char       *baseDestFp = (char *) destfp;
470
471         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
472
473         n = ARR_NDIM(array);
474         dim = ARR_DIMS(array);
475         lb = ARR_LBOUND(array);
476         C = A->C;
477
478         csize = C[n - 1];
479         PC[n - 1] = 1;
480         temp = dim[n - 1] / C[n - 1];
481         for (i = n - 2; i >= 0; i--)
482         {
483                 PC[i] = PC[i + 1] * temp;
484                 temp = dim[i] / C[i];
485                 csize *= C[i];
486         }
487
488         for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
489                 ;
490         mda_get_prod(n, C, PCHUNK);
491         mda_get_range(n, array_span, st, endp);
492         mda_get_prod(n, array_span, PA);
493
494         array2chunk_coord(n, C, st, chunk_st);
495         array2chunk_coord(n, C, endp, chunk_end);
496         mda_get_range(n, chunk_span, chunk_st, chunk_end);
497         mda_get_offset_values(n, dist, PC, chunk_span);
498
499         for (i = 0; i < n; i++)
500         {
501                 range_st[i] = st[i];
502                 range_end[i] = min(chunk_st[i] * C[i] + C[i] - 1, endp[i]);
503         }
504
505         for (i = j = 0; i < n; i++)
506                 j += chunk_st[i] * PC[i];
507         temp_seek = srcOff = j * csize * bsize;
508         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
509                 RETURN_NULL;
510
511         jj = n - 1;
512         for (i = 0; i < n; chunk_off[i++] = 0)
513                 ;
514         words_read = 0;
515         temp_seek = 0;
516         do
517         {
518                 /* Write chunk (chunk_st) to output buffer */
519                 mda_get_range(n, array_span, range_st, range_end);
520                 mda_get_offset_values(n, adist, PA, array_span);
521                 mda_get_offset_values(n, cdist, PCHUNK, array_span);
522                 for (i = 0; i < n; range[i] = range_st[i] - st[i], i++);
523                 bptr = tuple2linear(n, range, PA);
524                 for (i = 0; i < n; range[i++] = 0);
525                 j = n - 1;
526                 bptr *= bsize;
527                 if (isDestLO)
528                 {
529                         if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
530                                 RETURN_NULL;
531                 }
532                 else
533                         destfp = baseDestFp + bptr;
534                 for (i = 0, block_seek = 0; i < n; i++)
535                         block_seek += (range_st[i] - (chunk_st[i] + chunk_off[i])
536                                                    * C[i]) * PCHUNK[i];
537                 if (dist[jj] + block_seek + temp_seek)
538                 {
539                         temp = (dist[jj] * csize + block_seek + temp_seek) * bsize;
540                         srcOff += temp;
541                         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
542                                 RETURN_NULL;
543                 }
544                 for (i = n - 1, to_read = bsize; i >= 0;
545                          to_read *= min(C[i], array_span[i]), i--)
546                         if (cdist[i] || adist[i])
547                                 break;
548                 do
549                 {
550                         if (cdist[j])
551                         {
552                                 srcOff += (cdist[j] * bsize);
553                                 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
554                                         RETURN_NULL;
555                         }
556                         block_seek += cdist[j];
557                         bptr += adist[j] * bsize;
558                         if (isDestLO)
559                         {
560                                 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
561                                         RETURN_NULL;
562                         }
563                         else
564                                 destfp = baseDestFp + bptr;
565                         temp = _LOtransfer((char **) &destfp, to_read, 1, (char **) &fp, 1, isDestLO);
566                         if (temp < to_read)
567                                 RETURN_NULL;
568                         srcOff += to_read;
569                         words_read += to_read;
570                         bptr += to_read;
571                         block_seek += (to_read / bsize);
572
573                         /*
574                          * compute next tuple in *range
575                          */
576                         {
577                                 int                     x;
578
579                                 if (!(i + 1))
580                                         j = -1;
581                                 else
582                                 {
583                                         range[i] = (range[i] + 1) % array_span[i];
584                                         for (x = i; x * (!range[x]); x--)
585                                                 range[x - 1] = (range[x - 1] + 1) % array_span[x - 1];
586                                         if (x)
587                                                 j = x;
588                                         else
589                                         {
590                                                 if (range[0])
591                                                         j = 0;
592                                                 else
593                                                         j = -1;
594                                         }
595                                 }
596                         }
597
598                         /*
599                          * end of compute next tuple -- j is set to -1 if tuple
600                          * generation is over
601                          */
602                 } while (j != -1);
603
604                 block_seek = csize - block_seek;
605                 temp_seek = block_seek;
606                 jj = next_tuple(n, chunk_off, chunk_span);
607                 if (jj == -1)
608                         break;
609                 range_st[jj] = (chunk_st[jj] + chunk_off[jj]) * C[jj];
610                 range_end[jj] = min(range_st[jj] + C[jj] - 1, endp[jj]);
611
612                 for (i = jj + 1; i < n; i++)
613                 {
614                         range_st[i] = st[i];
615                         range_end[i] = min((chunk_st[i] + chunk_off[i]) * C[i] + C[i] - 1, endp[i]);
616                 }
617         } while (jj != -1);
618         return words_read;
619 }
620
621 /*------------------------------------------------------------------------
622  * _ReadChunkArray1El
623  *               returns one element of the chunked array as specified by the index "st"
624  *               the chunked file descriptor is "fp"
625  *-------------------------------------------------------------------------
626  */
627 struct varlena *
628 _ReadChunkArray1El(int *st,
629                                    int bsize,
630                                    int fp,
631                                    ArrayType *array,
632                                    bool *isNull)
633 {
634         int                     i,
635                                 j,
636                                 n,
637                                 temp,
638                                 srcOff;
639         int                     chunk_st[MAXDIM];
640
641         int                *C,
642                                 csize,
643                            *dim,
644                            *lb;
645         int                     PCHUNK[MAXDIM],
646                                 PC[MAXDIM];
647
648         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
649
650         n = ARR_NDIM(array);
651         lb = ARR_LBOUND(array);
652         C = A->C;
653         dim = ARR_DIMS(array);
654
655         csize = C[n - 1];
656         PC[n - 1] = 1;
657         temp = dim[n - 1] / C[n - 1];
658         for (i = n - 2; i >= 0; i--)
659         {
660                 PC[i] = PC[i + 1] * temp;
661                 temp = dim[i] / C[i];
662                 csize *= C[i];
663         }
664
665         for (i = 0; i < n; st[i] -= lb[i], i++);
666         mda_get_prod(n, C, PCHUNK);
667
668         array2chunk_coord(n, C, st, chunk_st);
669
670         for (i = j = 0; i < n; i++)
671                 j += chunk_st[i] * PC[i];
672         srcOff = j * csize;
673
674         for (i = 0; i < n; i++)
675                 srcOff += (st[i] - chunk_st[i] * C[i]) * PCHUNK[i];
676
677         srcOff *= bsize;
678         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
679                 RETURN_NULL;
680 #ifdef LOARRAY
681         return (struct varlena *) LOread(fp, bsize);
682 #endif
683         return (struct varlena *) 0;
684 }