]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/chunk.c
pgindent run before 6.3 release, with Thomas' requested changes.
[postgresql] / src / backend / utils / adt / chunk.c
1 /*-------------------------------------------------------------------------
2  *
3  * chunk.c--
4  *
5  * Copyright (c) 1994, Regents of the University of California
6  *
7  *
8  * IDENTIFICATION
9  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.16 1998/02/26 04:36:56 momjian Exp $
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <fcntl.h>
16
17 #include "postgres.h"
18 #include "fmgr.h"
19
20 #include "catalog/pg_type.h"
21 #include "libpq/be-fsstubs.h"
22 #include "libpq/libpq-fs.h"
23 #include "optimizer/internal.h"
24 #include "utils/array.h"
25 #include "utils/memutils.h"
26
27 #ifndef HAVE_MEMMOVE
28 #include <regex/utils.h>
29 #else
30 #include <string.h>
31 #endif
32
33 #define INFTY 500000000
34 #define MANY  10000
35 #define MAXPAT 20
36 #define quot_ceil(x,y)  (((x)+(y)-1)/(y))
37 #define min(x,y)                (((x) < (y))? (x) : (y))
38 #define max(x,y)                (((x) > (y))? (x) : (y))
39
40 static CHUNK_INFO cInfo;
41
42 /* non-export function prototypes */
43 static int
44 _FindBestChunk(int size, int dmax[], int dbest[], int dim,
45                            int A[MAXPAT][MAXDIM + 1], int N);
46 static int      get_next(int d[], int k, int C, int dmax[]);
47 static void initialize_info(CHUNK_INFO *A, int ndim, int dim[], int chunk[]);
48
49 #ifdef LOARRAY
50 static void
51 _ConvertToChunkFile(int n, int baseSize, int dim[], int C[],
52                                         int srcfd, int destfd);
53 static void
54 read_chunk(int chunk_no[], int C[], char a_chunk[], int srcfd,
55                    int n, int baseSize, int PX[], int dist[]);
56 static int      write_chunk(struct varlena * a_chunk, int ofile);
57 static int      seek_and_read(int pos, int size, char buff[], int fp, int from);
58
59 #endif
60 static int
61 GetChunkSize(FILE *fd, int ndim, int dim[MAXDIM], int baseSize,
62                          int d[MAXDIM]);
63
64 /*------------------------------------------------------------------------
65  * _ChunkArray ---
66  *         converts an input array to chunked format using the information
67  *         provided by the access pattern.
68  * Results:
69  *         creates a new file that stores the chunked array and returns
70  *         information about the chunked file
71  *-----------------------------------------------------------------------
72  */
73 char *
74 _ChunkArray(int fd,
75                         FILE *afd,
76                         int ndim,
77                         int dim[],
78                         int baseSize,
79                         int *nbytes,
80                         char *chunkfile)
81 {
82 #ifdef LOARRAY
83         int                     cfd = 0;
84
85 #endif
86         int                     chunk[MAXDIM],
87                                 csize;
88         bool            reorgFlag;
89
90         if (chunkfile == NULL)
91                 reorgFlag = true;
92         else
93                 reorgFlag = false;
94
95 #ifdef LOARRAY
96         if (reorgFlag)
97                 /* create new LO for chunked file */
98                 chunkfile = _array_newLO(&cfd, fileFlag);
99         else
100                 cfd = LOopen(chunkfile, O_RDONLY);
101         if (cfd < 0)
102                 elog(ERROR, "Unable to open chunk file");
103 #endif
104
105         strcpy(cInfo.lo_name, chunkfile);
106
107         /* find chunk size */
108         csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
109
110 #ifdef LOARRAY
111         if (reorgFlag)
112                 /* copy data from input file to chunked file */
113                 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
114 #endif
115
116         initialize_info(&cInfo, ndim, dim, chunk);
117         *nbytes = sizeof(CHUNK_INFO);
118         return (char *) &cInfo;
119 }
120
121 /*--------------------------------------------------------------------------
122  * GetChunkSize --
123  *                given an access pattern and array dimensionality etc, this program
124  *              returns the dimensions of the chunk in "d"
125  *-----------------------------------------------------------------------
126  */
127 static int
128 GetChunkSize(FILE *fd,
129                          int ndim,
130                          int dim[MAXDIM],
131                          int baseSize,
132                          int d[MAXDIM])
133 {
134         int                     N,
135                                 i,
136                                 j,
137                                 csize;
138         int                     A[MAXPAT][MAXDIM + 1],
139                                 dmax[MAXDIM];
140
141         /*
142          * ----------- read input ------------
143          */
144         fscanf(fd, "%d", &N);
145         if (N > MAXPAT)
146                 elog(ERROR, "array_in: too many access pattern elements");
147         for (i = 0; i < N; i++)
148                 for (j = 0; j < ndim + 1; j++)
149                         if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
150                                 elog(ERROR, "array_in: bad access pattern input");
151
152         /*
153          * estimate chunk size
154          */
155         for (i = 0; i < ndim; i++)
156                 for (j = 0, dmax[i] = 1; j < N; j++)
157                         if (dmax[i] < A[j][i])
158                                 dmax[i] = A[j][i];
159         csize = BLCKSZ / baseSize;
160
161         _FindBestChunk(csize, dmax, d, ndim, A, N);
162
163         return csize;
164 }
165
166 /*-------------------------------------------------------------------------
167  * _FindBestChunk --
168  *                This routine does most of the number crunching to compute the
169  *                optimal chunk shape.
170  * Called by GetChunkSize
171  *------------------------------------------------------------------------
172  */
173 static int
174 _FindBestChunk(int size,
175                            int dmax[],
176                            int dbest[],
177                            int dim,
178                            int A[MAXPAT][MAXDIM + 1],
179                            int N)
180 {
181         int                     d[MAXDIM];
182         int                     tc,
183                                 mintc = INFTY;
184
185         d[0] = 0;
186         mintc = INFTY;
187         while (get_next(d, dim, size, dmax))
188         {
189
190                 /*
191                  * compute the number of page fetches for a given chunk size (d[])
192                  * and access pattern (A[][])
193                  */
194                 int                     i,
195                                         j,
196                                         nc;
197
198                 for (i = 0, tc = 0; i < N; i++)
199                 {
200                         for (j = 0, nc = 1; j < dim; j++)
201                                 nc *= quot_ceil(A[i][j], d[j]);
202                         nc *= A[i][dim];
203                         tc += nc;
204                 }
205
206                 /*
207                  * tc holds the total number of page fetches
208                  */
209                 if (mintc >= tc)
210                 {
211                         mintc = tc;
212                         for (j = 0; j < dim; dbest[j] = d[j], j++)
213                                 ;
214                 }
215         }
216         return (mintc);
217 }
218
219 /*----------------------------------------------------------------------
220  * get_next --
221  *       Called by _GetBestChunk to get the next tuple in the lexicographic order
222  *---------------------------------------------------------------------
223  */
224 static int
225 get_next(int d[], int k, int C, int dmax[])
226 {
227         int                     i,
228                                 j,
229                                 temp;
230
231         if (!d[0])
232         {
233                 temp = C;
234                 for (j = k - 1; j >= 0; j--)
235                 {
236                         d[j] = min(temp, dmax[j]);
237                         temp = max(1, temp / d[j]);
238                 }
239                 return (1);
240         }
241
242         for (j = 0, temp = 1; j < k; j++)
243                 temp *= d[j];
244
245         for (i = k - 1; i >= 0; i--)
246         {
247                 temp = temp / d[i];
248                 if (((temp * (d[i] + 1)) < C) && (d[i] + 1 <= dmax[i]))
249                         break;
250         }
251         if (i < 0)
252                 return (0);
253
254         d[i]++;
255         j = C / temp;
256         d[i] = min(dmax[i], j / (j / d[i]));
257         temp = temp * d[i];
258         temp = C / temp;
259
260         for (j = k - 1; j > i; j--)
261         {
262                 d[j] = min(temp, dmax[j]);
263                 temp = max(1, temp / d[j]);
264         }
265         return (1);
266 }
267
268 #ifdef LOARRAY
269 static char a_chunk[BLCKSZ + VARHDRSZ]; /* VARHDRSZ since a_chunk is in
270                                                                                  * varlena format */
271
272 #endif
273
274 static void
275 initialize_info(CHUNK_INFO *A, int ndim, int dim[], int chunk[])
276 {
277         int                     i;
278
279         for (i = 0; i < ndim; i++)
280                 A->C[i] = chunk[i];
281 }
282
283 /*--------------------------------------------------------------------------
284  * Procedure reorganize_data():
285  *        This procedure reads the input multidimensional array that is organised
286  *        in the order specified by array "X" and breaks it up into chunks of
287  *        dimensions specified in "C".
288  *
289  *        This is a very slow process, since reading and writing of LARGE files
290  *        may be involved.
291  *
292  *-------------------------------------------------------------------------
293  */
294 #ifdef LOARRAY
295 static void
296 _ConvertToChunkFile(int n,
297                                         int baseSize,
298                                         int dim[],
299                                         int C[],
300                                         int srcfd,
301                                         int destfd)
302 {
303         int                     max_chunks[MAXDIM],
304                                 chunk_no[MAXDIM];
305         int                     PX[MAXDIM],
306                                 dist[MAXDIM];
307         int                     csize = 1,
308                                 i,
309                                 temp;
310
311         for (i = 0; i < n; chunk_no[i++] = 0)
312         {
313                 max_chunks[i] = dim[i] / C[i];
314                 csize *= C[i];
315         }
316         csize *= baseSize;
317         temp = csize + VARHDRSZ;
318         memmove(a_chunk, &temp, VARHDRSZ);
319
320         mda_get_prod(n, dim, PX);
321         mda_get_offset_values(n, dist, PX, C);
322         for (i = 0; i < n; dist[i] *= baseSize, i++)
323                 ;
324         do
325         {
326                 read_chunk(chunk_no, C, &(a_chunk[VARHDRSZ]), srcfd, n, baseSize, PX, dist);
327                 write_chunk((struct varlena *) a_chunk, destfd);
328         } while (next_tuple(n, chunk_no, max_chunks) != -1);
329 }
330
331 /*--------------------------------------------------------------------------
332  * read_chunk
333  *        reads a chunk from the input files into a_chunk, the position of the
334  *        chunk is specified by chunk_no
335  *--------------------------------------------------------------------------
336  */
337 static void
338 read_chunk(int chunk_no[],
339                    int C[],
340                    char a_chunk[],
341                    int srcfd,
342                    int n,
343                    int baseSize,
344                    int PX[],
345                    int dist[])
346 {
347         int                     i,
348                                 j,
349                                 cp,
350                                 unit_transfer;
351         int                     start_pos,
352                                 pos[MAXDIM];
353         int                     indx[MAXDIM];
354         int                     fpOff;
355
356         for (i = start_pos = 0; i < n; i++)
357         {
358                 pos[i] = chunk_no[i] * C[i];
359                 start_pos += pos[i] * PX[i];
360         }
361         start_pos *= baseSize;
362
363         /* Read a block of dimesion C starting at co-ordinates pos */
364         unit_transfer = C[n - 1] * baseSize;
365
366         for (i = 0; i < n; indx[i++] = 0)
367                 ;
368         fpOff = start_pos;
369         seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
370         fpOff += unit_transfer;
371         cp = unit_transfer;
372
373         while ((j = next_tuple(n - 1, indx, C)) != -1)
374         {
375                 fpOff += dist[j];
376                 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
377                 cp += unit_transfer;
378                 fpOff += unit_transfer;
379         }
380 }
381
382 /*--------------------------------------------------------------------------
383  * write_chunk()
384  *        writes a chunk of size csize into the output file
385  *--------------------------------------------------------------------------
386  */
387 static int
388 write_chunk(struct varlena * a_chunk, int ofile)
389 {
390         int                     got_n = 0;
391
392 #ifdef LOARRAY
393         got_n = LOwrite(ofile, a_chunk);
394 #endif
395         return (got_n);
396 }
397
398 /*--------------------------------------------------------------------------
399  * seek_and_read()
400  *        seeks to the asked location in the input file and reads the
401  *        appropriate number of blocks
402  *       Called By: read_chunk()
403  *--------------------------------------------------------------------------
404  */
405 static int
406 seek_and_read(int pos, int size, char buff[], int fp, int from)
407 {
408         struct varlena *v = NULL;
409
410         /* Assuming only one file */
411         if (lo_lseek(fp, pos, from) < 0)
412                 elog(ERROR, "File seek error");
413 #ifdef LOARRAY
414         v = (struct varlena *) LOread(fp, size);
415 #endif
416         if (VARSIZE(v) - VARHDRSZ < size)
417                 elog(ERROR, "File read error");
418         memmove(buff, VARDATA(v), size);
419         pfree(v);
420         return (1);
421
422 }
423
424 #endif                                                  /* LOARRAY */
425
426 /*----------------------------------------------------------------------------
427  * _ReadChunkArray --
428  *                returns the subarray specified bu the range indices "st" and "endp"
429  *                from the chunked array stored in file "fp"
430  *---------------------------------------------------------------------------
431  */
432 int
433 _ReadChunkArray(int st[],
434                                 int endp[],
435                                 int bsize,
436                                 int fp,
437                                 char *destfp,
438                                 ArrayType *array,
439                                 int isDestLO,
440                                 bool *isNull)
441 {
442         int                     i,
443                                 j,
444                                 jj;
445         int                     n,
446                                 temp,
447                                 words_read;
448         int                     chunk_span[MAXDIM],
449                                 chunk_off[MAXDIM];
450         int                     chunk_st[MAXDIM],
451                                 chunk_end[MAXDIM];
452         int                     block_seek;
453
454         int                     bptr,
455                            *C,
456                                 csize,
457                            *dim,
458                            *lb;
459         int                     range_st[MAXDIM],
460                                 range_end[MAXDIM],
461                                 range[MAXDIM],
462                                 array_span[MAXDIM];
463         int                     PA[MAXDIM],
464                                 PCHUNK[MAXDIM],
465                                 PC[MAXDIM];
466         int                     to_read;
467         int                     cdist[MAXDIM],
468                                 adist[MAXDIM];
469         int                     dist[MAXDIM],
470                                 temp_seek;
471
472         int                     srcOff;                 /* Needed since LO don't understand
473                                                                  * SEEK_CUR */
474         char       *baseDestFp = (char *) destfp;
475
476         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
477
478         n = ARR_NDIM(array);
479         dim = ARR_DIMS(array);
480         lb = ARR_LBOUND(array);
481         C = A->C;
482
483         csize = C[n - 1];
484         PC[n - 1] = 1;
485         temp = dim[n - 1] / C[n - 1];
486         for (i = n - 2; i >= 0; i--)
487         {
488                 PC[i] = PC[i + 1] * temp;
489                 temp = dim[i] / C[i];
490                 csize *= C[i];
491         }
492
493         for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
494                 ;
495         mda_get_prod(n, C, PCHUNK);
496         mda_get_range(n, array_span, st, endp);
497         mda_get_prod(n, array_span, PA);
498
499         array2chunk_coord(n, C, st, chunk_st);
500         array2chunk_coord(n, C, endp, chunk_end);
501         mda_get_range(n, chunk_span, chunk_st, chunk_end);
502         mda_get_offset_values(n, dist, PC, chunk_span);
503
504         for (i = 0; i < n; i++)
505         {
506                 range_st[i] = st[i];
507                 range_end[i] = min(chunk_st[i] * C[i] + C[i] - 1, endp[i]);
508         }
509
510         for (i = j = 0; i < n; i++)
511                 j += chunk_st[i] * PC[i];
512         temp_seek = srcOff = j * csize * bsize;
513         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
514                 RETURN_NULL;
515
516         jj = n - 1;
517         for (i = 0; i < n; chunk_off[i++] = 0)
518                 ;
519         words_read = 0;
520         temp_seek = 0;
521         do
522         {
523                 /* Write chunk (chunk_st) to output buffer */
524                 mda_get_range(n, array_span, range_st, range_end);
525                 mda_get_offset_values(n, adist, PA, array_span);
526                 mda_get_offset_values(n, cdist, PCHUNK, array_span);
527                 for (i = 0; i < n; range[i] = range_st[i] - st[i], i++);
528                 bptr = tuple2linear(n, range, PA);
529                 for (i = 0; i < n; range[i++] = 0);
530                 j = n - 1;
531                 bptr *= bsize;
532                 if (isDestLO)
533                 {
534                         if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
535                                 RETURN_NULL;
536                 }
537                 else
538                         destfp = baseDestFp + bptr;
539                 for (i = 0, block_seek = 0; i < n; i++)
540                         block_seek += (range_st[i] - (chunk_st[i] + chunk_off[i])
541                                                    * C[i]) * PCHUNK[i];
542                 if (dist[jj] + block_seek + temp_seek)
543                 {
544                         temp = (dist[jj] * csize + block_seek + temp_seek) * bsize;
545                         srcOff += temp;
546                         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
547                                 RETURN_NULL;
548                 }
549                 for (i = n - 1, to_read = bsize; i >= 0;
550                          to_read *= min(C[i], array_span[i]), i--)
551                         if (cdist[i] || adist[i])
552                                 break;
553                 do
554                 {
555                         if (cdist[j])
556                         {
557                                 srcOff += (cdist[j] * bsize);
558                                 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
559                                         RETURN_NULL;
560                         }
561                         block_seek += cdist[j];
562                         bptr += adist[j] * bsize;
563                         if (isDestLO)
564                         {
565                                 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
566                                         RETURN_NULL;
567                         }
568                         else
569                                 destfp = baseDestFp + bptr;
570                         temp = _LOtransfer((char **) &destfp, to_read, 1, (char **) &fp, 1, isDestLO);
571                         if (temp < to_read)
572                                 RETURN_NULL;
573                         srcOff += to_read;
574                         words_read += to_read;
575                         bptr += to_read;
576                         block_seek += (to_read / bsize);
577
578                         /*
579                          * compute next tuple in range[]
580                          */
581                         {
582                                 int                     x;
583
584                                 if (!(i + 1))
585                                         j = -1;
586                                 else
587                                 {
588                                         range[i] = (range[i] + 1) % array_span[i];
589                                         for (x = i; x * (!range[x]); x--)
590                                                 range[x - 1] = (range[x - 1] + 1) % array_span[x - 1];
591                                         if (x)
592                                                 j = x;
593                                         else
594                                         {
595                                                 if (range[0])
596                                                         j = 0;
597                                                 else
598                                                         j = -1;
599                                         }
600                                 }
601                         }
602
603                         /*
604                          * end of compute next tuple -- j is set to -1 if tuple
605                          * generation is over
606                          */
607                 } while (j != -1);
608
609                 block_seek = csize - block_seek;
610                 temp_seek = block_seek;
611                 jj = next_tuple(n, chunk_off, chunk_span);
612                 if (jj == -1)
613                         break;
614                 range_st[jj] = (chunk_st[jj] + chunk_off[jj]) * C[jj];
615                 range_end[jj] = min(range_st[jj] + C[jj] - 1, endp[jj]);
616
617                 for (i = jj + 1; i < n; i++)
618                 {
619                         range_st[i] = st[i];
620                         range_end[i] = min((chunk_st[i] + chunk_off[i]) * C[i] + C[i] - 1, endp[i]);
621                 }
622         } while (jj != -1);
623         return (words_read);
624 }
625
626 /*------------------------------------------------------------------------
627  * _ReadChunkArray1El --
628  *               returns one element of the chunked array as specified by the index "st"
629  *               the chunked file descriptor is "fp"
630  *-------------------------------------------------------------------------
631  */
632 struct varlena *
633 _ReadChunkArray1El(int st[],
634                                    int bsize,
635                                    int fp,
636                                    ArrayType *array,
637                                    bool *isNull)
638 {
639         int                     i,
640                                 j,
641                                 n,
642                                 temp,
643                                 srcOff;
644         int                     chunk_st[MAXDIM];
645
646         int                *C,
647                                 csize,
648                            *dim,
649                            *lb;
650         int                     PCHUNK[MAXDIM],
651                                 PC[MAXDIM];
652
653         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
654
655         n = ARR_NDIM(array);
656         lb = ARR_LBOUND(array);
657         C = A->C;
658         dim = ARR_DIMS(array);
659
660         csize = C[n - 1];
661         PC[n - 1] = 1;
662         temp = dim[n - 1] / C[n - 1];
663         for (i = n - 2; i >= 0; i--)
664         {
665                 PC[i] = PC[i + 1] * temp;
666                 temp = dim[i] / C[i];
667                 csize *= C[i];
668         }
669
670         for (i = 0; i < n; st[i] -= lb[i], i++);
671         mda_get_prod(n, C, PCHUNK);
672
673         array2chunk_coord(n, C, st, chunk_st);
674
675         for (i = j = 0; i < n; i++)
676                 j += chunk_st[i] * PC[i];
677         srcOff = j * csize;
678
679         for (i = 0; i < n; i++)
680                 srcOff += (st[i] - chunk_st[i] * C[i]) * PCHUNK[i];
681
682         srcOff *= bsize;
683         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
684                 RETURN_NULL;
685 #ifdef LOARRAY
686         return (struct varlena *) LOread(fp, bsize);
687 #endif
688         return (struct varlena *) 0;
689 }