]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/chunk.c
Another PGINDENT run that changes variable indenting and case label indenting. Also...
[postgresql] / src / backend / utils / adt / chunk.c
1 /*-------------------------------------------------------------------------
2  *
3  * chunk.c--
4  *
5  * Copyright (c) 1994, Regents of the University of California
6  *
7  *
8  * IDENTIFICATION
9  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.8 1997/09/08 02:30:31 momjian Exp $
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <fcntl.h>
16
17 #include "postgres.h"
18
19 #include <libpq/be-fsstubs.h>
20 #include "utils/memutils.h"
21 #include "libpq/libpq-fs.h"
22
23 #include "storage/fd.h"                 /* for SEEK_ */
24
25 #include "catalog/pg_type.h"
26
27 #include "fmgr.h"
28 #include "utils/array.h"
29
30 #include "optimizer/internal.h"
31 #ifndef HAVE_MEMMOVE
32 #include <regex/utils.h>
33 #else
34 #include <string.h>
35 #endif
36
37
38 #define INFTY 500000000
39 #define MANY  10000
40 #define MAXPAT 20
41 #define quot_ceil(x,y)  (((x)+(y)-1)/(y))
42 #define min(x,y)                (((x) < (y))? (x) : (y))
43 #define max(x,y)                (((x) > (y))? (x) : (y))
44
45 static CHUNK_INFO cInfo;
46
47 /* non-export function prototypes */
48 static int
49 _FindBestChunk(int size, int dmax[], int dbest[], int dim,
50                            int A[MAXPAT][MAXDIM + 1], int N);
51 static int      get_next(int d[], int k, int C, int dmax[]);
52 static void initialize_info(CHUNK_INFO * A, int ndim, int dim[], int chunk[]);
53
54 #ifdef LOARRAY
55 static void
56 _ConvertToChunkFile(int n, int baseSize, int dim[], int C[],
57                                         int srcfd, int destfd);
58 static void
59 read_chunk(int chunk_no[], int C[], char a_chunk[], int srcfd,
60                    int n, int baseSize, int PX[], int dist[]);
61 static int      write_chunk(struct varlena * a_chunk, int ofile);
62 static int      seek_and_read(int pos, int size, char buff[], int fp, int from);
63
64 #endif
65 static int
66 GetChunkSize(FILE * fd, int ndim, int dim[MAXDIM], int baseSize,
67                          int d[MAXDIM]);
68
69 /*------------------------------------------------------------------------
70  * _ChunkArray ---
71  *         converts an input array to chunked format using the information
72  *         provided by the access pattern.
73  * Results:
74  *         creates a new file that stores the chunked array and returns
75  *         information about the chunked file
76  *-----------------------------------------------------------------------
77  */
78 char       *
79 _ChunkArray(int fd,
80                         FILE * afd,
81                         int ndim,
82                         int dim[],
83                         int baseSize,
84                         int *nbytes,
85                         char *chunkfile)
86 {
87 #ifdef LOARRAY
88         int                     cfd = 0;
89
90 #endif
91         int                     chunk[MAXDIM],
92                                 csize;
93         bool            reorgFlag;
94
95         if (chunkfile == NULL)
96                 reorgFlag = true;
97         else
98                 reorgFlag = false;
99
100 #ifdef LOARRAY
101         if (reorgFlag)
102                 /* create new LO for chunked file */
103                 chunkfile = _array_newLO(&cfd, fileFlag);
104         else
105                 cfd = LOopen(chunkfile, O_RDONLY);
106         if (cfd < 0)
107                 elog(WARN, "Unable to open chunk file");
108 #endif
109
110         strcpy(cInfo.lo_name, chunkfile);
111
112         /* find chunk size */
113         csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
114
115 #ifdef LOARRAY
116         if (reorgFlag)
117                 /* copy data from input file to chunked file */
118                 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
119 #endif
120
121         initialize_info(&cInfo, ndim, dim, chunk);
122         *nbytes = sizeof(CHUNK_INFO);
123         return (char *) &cInfo;
124 }
125
126 /*--------------------------------------------------------------------------
127  * GetChunkSize --
128  *                given an access pattern and array dimensionality etc, this program
129  *              returns the dimensions of the chunk in "d"
130  *-----------------------------------------------------------------------
131  */
132 static int
133 GetChunkSize(FILE * fd,
134                          int ndim,
135                          int dim[MAXDIM],
136                          int baseSize,
137                          int d[MAXDIM])
138 {
139         int                     N,
140                                 i,
141                                 j,
142                                 csize;
143         int                     A[MAXPAT][MAXDIM + 1],
144                                 dmax[MAXDIM];
145
146         /*
147          * ----------- read input ------------
148          */
149         fscanf(fd, "%d", &N);
150         if (N > MAXPAT)
151                 elog(WARN, "array_in: too many access pattern elements");
152         for (i = 0; i < N; i++)
153                 for (j = 0; j < ndim + 1; j++)
154                         if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
155                                 elog(WARN, "array_in: bad access pattern input");
156
157         /*
158          * estimate chunk size
159          */
160         for (i = 0; i < ndim; i++)
161                 for (j = 0, dmax[i] = 1; j < N; j++)
162                         if (dmax[i] < A[j][i])
163                                 dmax[i] = A[j][i];
164         csize = BLCKSZ / baseSize;
165
166         _FindBestChunk(csize, dmax, d, ndim, A, N);
167
168         return csize;
169 }
170
171 /*-------------------------------------------------------------------------
172  * _FindBestChunk --
173  *                This routine does most of the number crunching to compute the
174  *                optimal chunk shape.
175  * Called by GetChunkSize
176  *------------------------------------------------------------------------
177  */
178 static int
179 _FindBestChunk(int size,
180                            int dmax[],
181                            int dbest[],
182                            int dim,
183                            int A[MAXPAT][MAXDIM + 1],
184                            int N)
185 {
186         int                     d[MAXDIM];
187         int                     tc,
188                                 mintc = INFTY;
189
190         d[0] = 0;
191         mintc = INFTY;
192         while (get_next(d, dim, size, dmax))
193         {
194
195                 /*
196                  * compute the number of page fetches for a given chunk size (d[])
197                  * and access pattern (A[][])
198                  */
199                 register int i,
200                                         j,
201                                         nc;
202
203                 for (i = 0, tc = 0; i < N; i++)
204                 {
205                         for (j = 0, nc = 1; j < dim; j++)
206                                 nc *= quot_ceil(A[i][j], d[j]);
207                         nc *= A[i][dim];
208                         tc += nc;
209                 }
210
211                 /*
212                  * tc holds the total number of page fetches
213                  */
214                 if (mintc >= tc)
215                 {
216                         mintc = tc;
217                         for (j = 0; j < dim; dbest[j] = d[j], j++)
218                                 ;
219                 }
220         }
221         return (mintc);
222 }
223
224 /*----------------------------------------------------------------------
225  * get_next --
226  *       Called by _GetBestChunk to get the next tuple in the lexicographic order
227  *---------------------------------------------------------------------
228  */
229 static int
230 get_next(int d[], int k, int C, int dmax[])
231 {
232         register int i,
233                                 j,
234                                 temp;
235
236         if (!d[0])
237         {
238                 temp = C;
239                 for (j = k - 1; j >= 0; j--)
240                 {
241                         d[j] = min(temp, dmax[j]);
242                         temp = max(1, temp / d[j]);
243                 }
244                 return (1);
245         }
246
247         for (j = 0, temp = 1; j < k; j++)
248                 temp *= d[j];
249
250         for (i = k - 1; i >= 0; i--)
251         {
252                 temp = temp / d[i];
253                 if (((temp * (d[i] + 1)) < C) && (d[i] + 1 <= dmax[i]))
254                         break;
255         }
256         if (i < 0)
257                 return (0);
258
259         d[i]++;
260         j = C / temp;
261         d[i] = min(dmax[i], j / (j / d[i]));
262         temp = temp * d[i];
263         temp = C / temp;
264
265         for (j = k - 1; j > i; j--)
266         {
267                 d[j] = min(temp, dmax[j]);
268                 temp = max(1, temp / d[j]);
269         }
270         return (1);
271 }
272
273 #ifdef LOARRAY
274 static char a_chunk[BLCKSZ + 4];/* 4 since a_chunk is in varlena format */
275
276 #endif
277
278 static void
279 initialize_info(CHUNK_INFO * A, int ndim, int dim[], int chunk[])
280 {
281         int                     i;
282
283         for (i = 0; i < ndim; i++)
284                 A->C[i] = chunk[i];
285 }
286
287 /*--------------------------------------------------------------------------
288  * Procedure reorganize_data():
289  *        This procedure reads the input multidimensional array that is organised
290  *        in the order specified by array "X" and breaks it up into chunks of
291  *        dimensions specified in "C".
292  *
293  *        This is a very slow process, since reading and writing of LARGE files
294  *        may be involved.
295  *
296  *-------------------------------------------------------------------------
297  */
298 #ifdef LOARRAY
299 static void
300 _ConvertToChunkFile(int n,
301                                         int baseSize,
302                                         int dim[],
303                                         int C[],
304                                         int srcfd,
305                                         int destfd)
306 {
307         int                     max_chunks[MAXDIM],
308                                 chunk_no[MAXDIM];
309         int                     PX[MAXDIM],
310                                 dist[MAXDIM];
311         int                     csize = 1,
312                                 i,
313                                 temp;
314
315         for (i = 0; i < n; chunk_no[i++] = 0)
316         {
317                 max_chunks[i] = dim[i] / C[i];
318                 csize *= C[i];
319         }
320         csize *= baseSize;
321         temp = csize + 4;
322         memmove(a_chunk, &temp, 4);
323
324         mda_get_prod(n, dim, PX);
325         mda_get_offset_values(n, dist, PX, C);
326         for (i = 0; i < n; dist[i] *= baseSize, i++)
327                 ;
328         do
329         {
330                 read_chunk(chunk_no, C, &(a_chunk[4]), srcfd, n, baseSize, PX, dist);
331                 write_chunk((struct varlena *) a_chunk, destfd);
332         } while (next_tuple(n, chunk_no, max_chunks) != -1);
333 }
334
335 /*--------------------------------------------------------------------------
336  * read_chunk
337  *        reads a chunk from the input files into a_chunk, the position of the
338  *        chunk is specified by chunk_no
339  *--------------------------------------------------------------------------
340  */
341 static void
342 read_chunk(int chunk_no[],
343                    int C[],
344                    char a_chunk[],
345                    int srcfd,
346                    int n,
347                    int baseSize,
348                    int PX[],
349                    int dist[])
350 {
351         int                     i,
352                                 j,
353                                 cp,
354                                 unit_transfer;
355         int                     start_pos,
356                                 pos[MAXDIM];
357         int                     indx[MAXDIM];
358         int                     fpOff;
359
360         for (i = start_pos = 0; i < n; i++)
361         {
362                 pos[i] = chunk_no[i] * C[i];
363                 start_pos += pos[i] * PX[i];
364         }
365         start_pos *= baseSize;
366
367         /* Read a block of dimesion C starting at co-ordinates pos */
368         unit_transfer = C[n - 1] * baseSize;
369
370         for (i = 0; i < n; indx[i++] = 0)
371                 ;
372         fpOff = start_pos;
373         seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
374         fpOff += unit_transfer;
375         cp = unit_transfer;
376
377         while ((j = next_tuple(n - 1, indx, C)) != -1)
378         {
379                 fpOff += dist[j];
380                 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
381                 cp += unit_transfer;
382                 fpOff += unit_transfer;
383         }
384 }
385
386 /*--------------------------------------------------------------------------
387  * write_chunk()
388  *        writes a chunk of size csize into the output file
389  *--------------------------------------------------------------------------
390  */
391 static int
392 write_chunk(struct varlena * a_chunk, int ofile)
393 {
394         int                     got_n = 0;
395
396 #ifdef LOARRAY
397         got_n = LOwrite(ofile, a_chunk);
398 #endif
399         return (got_n);
400 }
401
402 /*--------------------------------------------------------------------------
403  * seek_and_read()
404  *        seeks to the asked location in the input file and reads the
405  *        appropriate number of blocks
406  *       Called By: read_chunk()
407  *--------------------------------------------------------------------------
408  */
409 static int
410 seek_and_read(int pos, int size, char buff[], int fp, int from)
411 {
412         struct varlena *v = NULL;
413
414         /* Assuming only one file */
415         if (lo_lseek(fp, pos, from) < 0)
416                 elog(WARN, "File seek error");
417 #ifdef LOARRAY
418         v = (struct varlena *) LOread(fp, size);
419 #endif
420         if (VARSIZE(v) - 4 < size)
421                 elog(WARN, "File read error");
422         memmove(buff, VARDATA(v), size);
423         pfree(v);
424         return (1);
425
426 }
427
428 #endif                                                  /* LOARRAY */
429
430 /*----------------------------------------------------------------------------
431  * _ReadChunkArray --
432  *                returns the subarray specified bu the range indices "st" and "endp"
433  *                from the chunked array stored in file "fp"
434  *---------------------------------------------------------------------------
435  */
436 int
437 _ReadChunkArray(int st[],
438                                 int endp[],
439                                 int bsize,
440                                 int fp,
441                                 char *destfp,
442                                 ArrayType * array,
443                                 int isDestLO,
444                                 bool * isNull)
445 {
446         int                     i,
447                                 j,
448                                 jj;
449         int                     n,
450                                 temp,
451                                 words_read;
452         int                     chunk_span[MAXDIM],
453                                 chunk_off[MAXDIM];
454         int                     chunk_st[MAXDIM],
455                                 chunk_end[MAXDIM];
456         int                     block_seek;
457
458         int                     bptr,
459                            *C,
460                                 csize,
461                            *dim,
462                            *lb;
463         int                     range_st[MAXDIM],
464                                 range_end[MAXDIM],
465                                 range[MAXDIM],
466                                 array_span[MAXDIM];
467         int                     PA[MAXDIM],
468                                 PCHUNK[MAXDIM],
469                                 PC[MAXDIM];
470         int                     to_read;
471         int                     cdist[MAXDIM],
472                                 adist[MAXDIM];
473         int                     dist[MAXDIM],
474                                 temp_seek;
475
476         int                     srcOff;                 /* Needed since LO don't understand
477                                                                  * SEEK_CUR */
478         char       *baseDestFp = (char *) destfp;
479
480         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
481
482         n = ARR_NDIM(array);
483         dim = ARR_DIMS(array);
484         lb = ARR_LBOUND(array);
485         C = A->C;
486
487         csize = C[n - 1];
488         PC[n - 1] = 1;
489         temp = dim[n - 1] / C[n - 1];
490         for (i = n - 2; i >= 0; i--)
491         {
492                 PC[i] = PC[i + 1] * temp;
493                 temp = dim[i] / C[i];
494                 csize *= C[i];
495         }
496
497         for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
498                 ;
499         mda_get_prod(n, C, PCHUNK);
500         mda_get_range(n, array_span, st, endp);
501         mda_get_prod(n, array_span, PA);
502
503         array2chunk_coord(n, C, st, chunk_st);
504         array2chunk_coord(n, C, endp, chunk_end);
505         mda_get_range(n, chunk_span, chunk_st, chunk_end);
506         mda_get_offset_values(n, dist, PC, chunk_span);
507
508         for (i = 0; i < n; i++)
509         {
510                 range_st[i] = st[i];
511                 range_end[i] = min(chunk_st[i] * C[i] + C[i] - 1, endp[i]);
512         }
513
514         for (i = j = 0; i < n; i++)
515                 j += chunk_st[i] * PC[i];
516         temp_seek = srcOff = j * csize * bsize;
517         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
518                 RETURN_NULL;
519
520         jj = n - 1;
521         for (i = 0; i < n; chunk_off[i++] = 0)
522                 ;
523         words_read = 0;
524         temp_seek = 0;
525         do
526         {
527                 /* Write chunk (chunk_st) to output buffer */
528                 mda_get_range(n, array_span, range_st, range_end);
529                 mda_get_offset_values(n, adist, PA, array_span);
530                 mda_get_offset_values(n, cdist, PCHUNK, array_span);
531                 for (i = 0; i < n; range[i] = range_st[i] - st[i], i++);
532                 bptr = tuple2linear(n, range, PA);
533                 for (i = 0; i < n; range[i++] = 0);
534                 j = n - 1;
535                 bptr *= bsize;
536                 if (isDestLO)
537                 {
538                         if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
539                                 RETURN_NULL;
540                 }
541                 else
542                         destfp = baseDestFp + bptr;
543                 for (i = 0, block_seek = 0; i < n; i++)
544                         block_seek += (range_st[i] - (chunk_st[i] + chunk_off[i])
545                                                    * C[i]) * PCHUNK[i];
546                 if (dist[jj] + block_seek + temp_seek)
547                 {
548                         temp = (dist[jj] * csize + block_seek + temp_seek) * bsize;
549                         srcOff += temp;
550                         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
551                                 RETURN_NULL;
552                 }
553                 for (i = n - 1, to_read = bsize; i >= 0;
554                          to_read *= min(C[i], array_span[i]), i--)
555                         if (cdist[i] || adist[i])
556                                 break;
557                 do
558                 {
559                         if (cdist[j])
560                         {
561                                 srcOff += (cdist[j] * bsize);
562                                 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
563                                         RETURN_NULL;
564                         }
565                         block_seek += cdist[j];
566                         bptr += adist[j] * bsize;
567                         if (isDestLO)
568                         {
569                                 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
570                                         RETURN_NULL;
571                         }
572                         else
573                                 destfp = baseDestFp + bptr;
574                         temp = _LOtransfer((char **) &destfp, to_read, 1, (char **) &fp, 1, isDestLO);
575                         if (temp < to_read)
576                                 RETURN_NULL;
577                         srcOff += to_read;
578                         words_read += to_read;
579                         bptr += to_read;
580                         block_seek += (to_read / bsize);
581
582                         /*
583                          * compute next tuple in range[]
584                          */
585                         {
586                                 int                     x;
587
588                                 if (!(i + 1))
589                                         j = -1;
590                                 else
591                                 {
592                                         range[i] = (range[i] + 1) % array_span[i];
593                                         for (x = i; x * (!range[x]); x--)
594                                                 range[x - 1] = (range[x - 1] + 1) % array_span[x - 1];
595                                         if (x)
596                                                 j = x;
597                                         else
598                                         {
599                                                 if (range[0])
600                                                         j = 0;
601                                                 else
602                                                         j = -1;
603                                         }
604                                 }
605                         }
606
607                         /*
608                          * end of compute next tuple -- j is set to -1 if tuple
609                          * generation is over
610                          */
611                 } while (j != -1);
612
613                 block_seek = csize - block_seek;
614                 temp_seek = block_seek;
615                 jj = next_tuple(n, chunk_off, chunk_span);
616                 if (jj == -1)
617                         break;
618                 range_st[jj] = (chunk_st[jj] + chunk_off[jj]) * C[jj];
619                 range_end[jj] = min(range_st[jj] + C[jj] - 1, endp[jj]);
620
621                 for (i = jj + 1; i < n; i++)
622                 {
623                         range_st[i] = st[i];
624                         range_end[i] = min((chunk_st[i] + chunk_off[i]) * C[i] + C[i] - 1, endp[i]);
625                 }
626         } while (jj != -1);
627         return (words_read);
628 }
629
630 /*------------------------------------------------------------------------
631  * _ReadChunkArray1El --
632  *               returns one element of the chunked array as specified by the index "st"
633  *               the chunked file descriptor is "fp"
634  *-------------------------------------------------------------------------
635  */
636 struct varlena *
637 _ReadChunkArray1El(int st[],
638                                    int bsize,
639                                    int fp,
640                                    ArrayType * array,
641                                    bool * isNull)
642 {
643         int                     i,
644                                 j,
645                                 n,
646                                 temp,
647                                 srcOff;
648         int                     chunk_st[MAXDIM];
649
650         int                *C,
651                                 csize,
652                            *dim,
653                            *lb;
654         int                     PCHUNK[MAXDIM],
655                                 PC[MAXDIM];
656
657         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
658
659         n = ARR_NDIM(array);
660         lb = ARR_LBOUND(array);
661         C = A->C;
662         dim = ARR_DIMS(array);
663
664         csize = C[n - 1];
665         PC[n - 1] = 1;
666         temp = dim[n - 1] / C[n - 1];
667         for (i = n - 2; i >= 0; i--)
668         {
669                 PC[i] = PC[i + 1] * temp;
670                 temp = dim[i] / C[i];
671                 csize *= C[i];
672         }
673
674         for (i = 0; i < n; st[i] -= lb[i], i++);
675         mda_get_prod(n, C, PCHUNK);
676
677         array2chunk_coord(n, C, st, chunk_st);
678
679         for (i = j = 0; i < n; i++)
680                 j += chunk_st[i] * PC[i];
681         srcOff = j * csize;
682
683         for (i = 0; i < n; i++)
684                 srcOff += (st[i] - chunk_st[i] * C[i]) * PCHUNK[i];
685
686         srcOff *= bsize;
687         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
688                 RETURN_NULL;
689 #ifdef LOARRAY
690         return (struct varlena *) LOread(fp, bsize);
691 #endif
692         return (struct varlena *) 0;
693 }