]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/chunk.c
Change #include's to use <> and "" as appropriate.
[postgresql] / src / backend / utils / adt / chunk.c
1 /*-------------------------------------------------------------------------
2  *
3  * chunk.c
4  *
5  * Copyright (c) 1994, Regents of the University of California
6  *
7  *
8  * IDENTIFICATION
9  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.21 1999/07/15 23:03:26 momjian Exp $
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <fcntl.h>
16
17 #include "postgres.h"
18 #include "fmgr.h"
19
20 #include "catalog/pg_type.h"
21 #include "libpq/be-fsstubs.h"
22 #include "libpq/libpq-fs.h"
23 #include "optimizer/internal.h"
24 #include "utils/array.h"
25 #include "utils/memutils.h"
26
27 #ifndef HAVE_MEMMOVE
28 #include "regex/utils.h"
29 #else
30 #include <string.h>
31 #endif
32
33 #define INFTY 500000000
34 #define MANY  10000
35 #define MAXPAT 20
36 #define quot_ceil(x,y)  (((x)+(y)-1)/(y))
37 #define min(x,y)                (((x) < (y))? (x) : (y))
38 #define max(x,y)                (((x) > (y))? (x) : (y))
39
40 static CHUNK_INFO cInfo;
41
42 /* non-export function prototypes */
43 static int _FindBestChunk(int size, int *dmax, int *dbest, int dim,
44                            int A[MAXPAT][MAXDIM + 1], int N);
45 static int      get_next(int *d, int k, int C, int *dmax);
46 static void initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk);
47
48 #ifdef LOARRAY
49 static void _ConvertToChunkFile(int n, int baseSize, int *dim, int *C,
50                                         int srcfd, int destfd);
51 static void read_chunk(int *chunk_no, int *C, char *a_chunk, int srcfd,
52                    int n, int baseSize, int *PX, int *dist);
53 static int      write_chunk(struct varlena * a_chunk, int ofile);
54 static int      seek_and_read(int pos, int size, char *buff, int fp, int from);
55
56 #endif
57 static int GetChunkSize(FILE *fd, int ndim, int dim[MAXDIM], int baseSize,
58                          int d[MAXDIM]);
59
60 /*------------------------------------------------------------------------
61  * _ChunkArray ---
62  *         converts an input array to chunked format using the information
63  *         provided by the access pattern.
64  * Results:
65  *         creates a new file that stores the chunked array and returns
66  *         information about the chunked file
67  *-----------------------------------------------------------------------
68  */
69 char *
70 _ChunkArray(int fd,
71                         FILE *afd,
72                         int ndim,
73                         int *dim,
74                         int baseSize,
75                         int *nbytes,
76                         char *chunkfile)
77 {
78 #ifdef LOARRAY
79         int                     cfd = 0;
80
81 #endif
82         int                     chunk[MAXDIM],
83                                 csize;
84         bool            reorgFlag;
85
86         if (chunkfile == NULL)
87                 reorgFlag = true;
88         else
89                 reorgFlag = false;
90
91 #ifdef LOARRAY
92         if (reorgFlag)
93                 /* create new LO for chunked file */
94                 chunkfile = _array_newLO(&cfd, fileFlag);
95         else
96                 cfd = LOopen(chunkfile, O_RDONLY);
97         if (cfd < 0)
98                 elog(ERROR, "Unable to open chunk file");
99 #endif
100
101         strcpy(cInfo.lo_name, chunkfile);
102
103         /* find chunk size */
104         csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
105
106 #ifdef LOARRAY
107         if (reorgFlag)
108                 /* copy data from input file to chunked file */
109                 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
110 #endif
111
112         initialize_info(&cInfo, ndim, dim, chunk);
113         *nbytes = sizeof(CHUNK_INFO);
114         return (char *) &cInfo;
115 }
116
117 /*--------------------------------------------------------------------------
118  * GetChunkSize
119  *                given an access pattern and array dimensionality etc, this program
120  *              returns the dimensions of the chunk in "d"
121  *-----------------------------------------------------------------------
122  */
123 static int
124 GetChunkSize(FILE *fd,
125                          int ndim,
126                          int dim[MAXDIM],
127                          int baseSize,
128                          int d[MAXDIM])
129 {
130         int                     N,
131                                 i,
132                                 j,
133                                 csize;
134         int                     A[MAXPAT][MAXDIM + 1],
135                                 dmax[MAXDIM];
136
137         /*
138          * ----------- read input ------------
139          */
140         fscanf(fd, "%d", &N);
141         if (N > MAXPAT)
142                 elog(ERROR, "array_in: too many access pattern elements");
143         for (i = 0; i < N; i++)
144                 for (j = 0; j < ndim + 1; j++)
145                         if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
146                                 elog(ERROR, "array_in: bad access pattern input");
147
148         /*
149          * estimate chunk size
150          */
151         for (i = 0; i < ndim; i++)
152                 for (j = 0, dmax[i] = 1; j < N; j++)
153                         if (dmax[i] < A[j][i])
154                                 dmax[i] = A[j][i];
155         csize = BLCKSZ / baseSize;
156
157         _FindBestChunk(csize, dmax, d, ndim, A, N);
158
159         return csize;
160 }
161
162 /*-------------------------------------------------------------------------
163  * _FindBestChunk
164  *                This routine does most of the number crunching to compute the
165  *                optimal chunk shape.
166  * Called by GetChunkSize
167  *------------------------------------------------------------------------
168  */
169 static int
170 _FindBestChunk(int size,
171                            int *dmax,
172                            int *dbest,
173                            int dim,
174                            int A[MAXPAT][MAXDIM + 1],
175                            int N)
176 {
177         int                     d[MAXDIM];
178         int                     tc,
179                                 mintc = INFTY;
180
181         d[0] = 0;
182         mintc = INFTY;
183         while (get_next(d, dim, size, dmax))
184         {
185
186                 /*
187                  * compute the number of page fetches for a given chunk size (*d)
188                  * and access pattern (**A)
189                  */
190                 int                     i,
191                                         j,
192                                         nc;
193
194                 for (i = 0, tc = 0; i < N; i++)
195                 {
196                         for (j = 0, nc = 1; j < dim; j++)
197                                 nc *= quot_ceil(A[i][j], d[j]);
198                         nc *= A[i][dim];
199                         tc += nc;
200                 }
201
202                 /*
203                  * tc holds the total number of page fetches
204                  */
205                 if (mintc >= tc)
206                 {
207                         mintc = tc;
208                         for (j = 0; j < dim; dbest[j] = d[j], j++)
209                                 ;
210                 }
211         }
212         return mintc;
213 }
214
215 /*----------------------------------------------------------------------
216  * get_next
217  *       Called by _GetBestChunk to get the next tuple in the lexicographic order
218  *---------------------------------------------------------------------
219  */
220 static int
221 get_next(int *d, int k, int C, int *dmax)
222 {
223         int                     i,
224                                 j,
225                                 temp;
226
227         if (!d[0])
228         {
229                 temp = C;
230                 for (j = k - 1; j >= 0; j--)
231                 {
232                         d[j] = min(temp, dmax[j]);
233                         temp = max(1, temp / d[j]);
234                 }
235                 return 1;
236         }
237
238         for (j = 0, temp = 1; j < k; j++)
239                 temp *= d[j];
240
241         for (i = k - 1; i >= 0; i--)
242         {
243                 temp = temp / d[i];
244                 if (((temp * (d[i] + 1)) < C) && (d[i] + 1 <= dmax[i]))
245                         break;
246         }
247         if (i < 0)
248                 return 0;
249
250         d[i]++;
251         j = C / temp;
252         d[i] = min(dmax[i], j / (j / d[i]));
253         temp = temp * d[i];
254         temp = C / temp;
255
256         for (j = k - 1; j > i; j--)
257         {
258                 d[j] = min(temp, dmax[j]);
259                 temp = max(1, temp / d[j]);
260         }
261         return 1;
262 }
263
264 #ifdef LOARRAY
265 static char a_chunk[BLCKSZ + VARHDRSZ]; /* VARHDRSZ since a_chunk is in
266                                                                                  * varlena format */
267
268 #endif
269
270 static void
271 initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk)
272 {
273         int                     i;
274
275         for (i = 0; i < ndim; i++)
276                 A->C[i] = chunk[i];
277 }
278
279 /*--------------------------------------------------------------------------
280  * Procedure reorganize_data():
281  *        This procedure reads the input multidimensional array that is organised
282  *        in the order specified by array "X" and breaks it up into chunks of
283  *        dimensions specified in "C".
284  *
285  *        This is a very slow process, since reading and writing of LARGE files
286  *        may be involved.
287  *
288  *-------------------------------------------------------------------------
289  */
290 #ifdef LOARRAY
291 static void
292 _ConvertToChunkFile(int n,
293                                         int baseSize,
294                                         int *dim,
295                                         int *C,
296                                         int srcfd,
297                                         int destfd)
298 {
299         int                     max_chunks[MAXDIM],
300                                 chunk_no[MAXDIM];
301         int                     PX[MAXDIM],
302                                 dist[MAXDIM];
303         int                     csize = 1,
304                                 i,
305                                 temp;
306
307         for (i = 0; i < n; chunk_no[i++] = 0)
308         {
309                 max_chunks[i] = dim[i] / C[i];
310                 csize *= C[i];
311         }
312         csize *= baseSize;
313         temp = csize + VARHDRSZ;
314         memmove(a_chunk, &temp, VARHDRSZ);
315
316         mda_get_prod(n, dim, PX);
317         mda_get_offset_values(n, dist, PX, C);
318         for (i = 0; i < n; dist[i] *= baseSize, i++)
319                 ;
320         do
321         {
322                 read_chunk(chunk_no, C, &(a_chunk[VARHDRSZ]), srcfd, n, baseSize, PX, dist);
323                 write_chunk((struct varlena *) a_chunk, destfd);
324         } while (next_tuple(n, chunk_no, max_chunks) != -1);
325 }
326
327 /*--------------------------------------------------------------------------
328  * read_chunk
329  *        reads a chunk from the input files into a_chunk, the position of the
330  *        chunk is specified by chunk_no
331  *--------------------------------------------------------------------------
332  */
333 static void
334 read_chunk(int *chunk_no,
335                    int *C,
336                    char *a_chunk,
337                    int srcfd,
338                    int n,
339                    int baseSize,
340                    int *PX,
341                    int *dist)
342 {
343         int                     i,
344                                 j,
345                                 cp,
346                                 unit_transfer;
347         int                     start_pos,
348                                 pos[MAXDIM];
349         int                     indx[MAXDIM];
350         int                     fpOff;
351
352         for (i = start_pos = 0; i < n; i++)
353         {
354                 pos[i] = chunk_no[i] * C[i];
355                 start_pos += pos[i] * PX[i];
356         }
357         start_pos *= baseSize;
358
359         /* Read a block of dimesion C starting at co-ordinates pos */
360         unit_transfer = C[n - 1] * baseSize;
361
362         for (i = 0; i < n; indx[i++] = 0)
363                 ;
364         fpOff = start_pos;
365         seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
366         fpOff += unit_transfer;
367         cp = unit_transfer;
368
369         while ((j = next_tuple(n - 1, indx, C)) != -1)
370         {
371                 fpOff += dist[j];
372                 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
373                 cp += unit_transfer;
374                 fpOff += unit_transfer;
375         }
376 }
377
378 /*--------------------------------------------------------------------------
379  * write_chunk()
380  *        writes a chunk of size csize into the output file
381  *--------------------------------------------------------------------------
382  */
383 static int
384 write_chunk(struct varlena * a_chunk, int ofile)
385 {
386         int                     got_n = 0;
387
388 #ifdef LOARRAY
389         got_n = LOwrite(ofile, a_chunk);
390 #endif
391         return got_n;
392 }
393
394 /*--------------------------------------------------------------------------
395  * seek_and_read()
396  *        seeks to the asked location in the input file and reads the
397  *        appropriate number of blocks
398  *       Called By: read_chunk()
399  *--------------------------------------------------------------------------
400  */
401 static int
402 seek_and_read(int pos, int size, char *buff, int fp, int from)
403 {
404         struct varlena *v = NULL;
405
406         /* Assuming only one file */
407         if (lo_lseek(fp, pos, from) < 0)
408                 elog(ERROR, "File seek error");
409 #ifdef LOARRAY
410         v = (struct varlena *) LOread(fp, size);
411 #endif
412         if (VARSIZE(v) - VARHDRSZ < size)
413                 elog(ERROR, "File read error");
414         memmove(buff, VARDATA(v), size);
415         pfree(v);
416         return 1;
417
418 }
419
420 #endif   /* LOARRAY */
421
422 /*----------------------------------------------------------------------------
423  * _ReadChunkArray
424  *                returns the subarray specified bu the range indices "st" and "endp"
425  *                from the chunked array stored in file "fp"
426  *---------------------------------------------------------------------------
427  */
428 int
429 _ReadChunkArray(int *st,
430                                 int *endp,
431                                 int bsize,
432                                 int fp,
433                                 char *destfp,
434                                 ArrayType *array,
435                                 int isDestLO,
436                                 bool *isNull)
437 {
438         int                     i,
439                                 j,
440                                 jj;
441         int                     n,
442                                 temp,
443                                 words_read;
444         int                     chunk_span[MAXDIM],
445                                 chunk_off[MAXDIM];
446         int                     chunk_st[MAXDIM],
447                                 chunk_end[MAXDIM];
448         int                     block_seek;
449
450         int                     bptr,
451                            *C,
452                                 csize,
453                            *dim,
454                            *lb;
455         int                     range_st[MAXDIM],
456                                 range_end[MAXDIM],
457                                 range[MAXDIM],
458                                 array_span[MAXDIM];
459         int                     PA[MAXDIM],
460                                 PCHUNK[MAXDIM],
461                                 PC[MAXDIM];
462         int                     to_read;
463         int                     cdist[MAXDIM],
464                                 adist[MAXDIM];
465         int                     dist[MAXDIM],
466                                 temp_seek;
467
468         int                     srcOff;                 /* Needed since LO don't understand
469                                                                  * SEEK_CUR */
470         char       *baseDestFp = (char *) destfp;
471
472         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
473
474         n = ARR_NDIM(array);
475         dim = ARR_DIMS(array);
476         lb = ARR_LBOUND(array);
477         C = A->C;
478
479         csize = C[n - 1];
480         PC[n - 1] = 1;
481         temp = dim[n - 1] / C[n - 1];
482         for (i = n - 2; i >= 0; i--)
483         {
484                 PC[i] = PC[i + 1] * temp;
485                 temp = dim[i] / C[i];
486                 csize *= C[i];
487         }
488
489         for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
490                 ;
491         mda_get_prod(n, C, PCHUNK);
492         mda_get_range(n, array_span, st, endp);
493         mda_get_prod(n, array_span, PA);
494
495         array2chunk_coord(n, C, st, chunk_st);
496         array2chunk_coord(n, C, endp, chunk_end);
497         mda_get_range(n, chunk_span, chunk_st, chunk_end);
498         mda_get_offset_values(n, dist, PC, chunk_span);
499
500         for (i = 0; i < n; i++)
501         {
502                 range_st[i] = st[i];
503                 range_end[i] = min(chunk_st[i] * C[i] + C[i] - 1, endp[i]);
504         }
505
506         for (i = j = 0; i < n; i++)
507                 j += chunk_st[i] * PC[i];
508         temp_seek = srcOff = j * csize * bsize;
509         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
510                 RETURN_NULL;
511
512         jj = n - 1;
513         for (i = 0; i < n; chunk_off[i++] = 0)
514                 ;
515         words_read = 0;
516         temp_seek = 0;
517         do
518         {
519                 /* Write chunk (chunk_st) to output buffer */
520                 mda_get_range(n, array_span, range_st, range_end);
521                 mda_get_offset_values(n, adist, PA, array_span);
522                 mda_get_offset_values(n, cdist, PCHUNK, array_span);
523                 for (i = 0; i < n; range[i] = range_st[i] - st[i], i++);
524                 bptr = tuple2linear(n, range, PA);
525                 for (i = 0; i < n; range[i++] = 0);
526                 j = n - 1;
527                 bptr *= bsize;
528                 if (isDestLO)
529                 {
530                         if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
531                                 RETURN_NULL;
532                 }
533                 else
534                         destfp = baseDestFp + bptr;
535                 for (i = 0, block_seek = 0; i < n; i++)
536                         block_seek += (range_st[i] - (chunk_st[i] + chunk_off[i])
537                                                    * C[i]) * PCHUNK[i];
538                 if (dist[jj] + block_seek + temp_seek)
539                 {
540                         temp = (dist[jj] * csize + block_seek + temp_seek) * bsize;
541                         srcOff += temp;
542                         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
543                                 RETURN_NULL;
544                 }
545                 for (i = n - 1, to_read = bsize; i >= 0;
546                          to_read *= min(C[i], array_span[i]), i--)
547                         if (cdist[i] || adist[i])
548                                 break;
549                 do
550                 {
551                         if (cdist[j])
552                         {
553                                 srcOff += (cdist[j] * bsize);
554                                 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
555                                         RETURN_NULL;
556                         }
557                         block_seek += cdist[j];
558                         bptr += adist[j] * bsize;
559                         if (isDestLO)
560                         {
561                                 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
562                                         RETURN_NULL;
563                         }
564                         else
565                                 destfp = baseDestFp + bptr;
566                         temp = _LOtransfer((char **) &destfp, to_read, 1, (char **) &fp, 1, isDestLO);
567                         if (temp < to_read)
568                                 RETURN_NULL;
569                         srcOff += to_read;
570                         words_read += to_read;
571                         bptr += to_read;
572                         block_seek += (to_read / bsize);
573
574                         /*
575                          * compute next tuple in *range
576                          */
577                         {
578                                 int                     x;
579
580                                 if (!(i + 1))
581                                         j = -1;
582                                 else
583                                 {
584                                         range[i] = (range[i] + 1) % array_span[i];
585                                         for (x = i; x * (!range[x]); x--)
586                                                 range[x - 1] = (range[x - 1] + 1) % array_span[x - 1];
587                                         if (x)
588                                                 j = x;
589                                         else
590                                         {
591                                                 if (range[0])
592                                                         j = 0;
593                                                 else
594                                                         j = -1;
595                                         }
596                                 }
597                         }
598
599                         /*
600                          * end of compute next tuple -- j is set to -1 if tuple
601                          * generation is over
602                          */
603                 } while (j != -1);
604
605                 block_seek = csize - block_seek;
606                 temp_seek = block_seek;
607                 jj = next_tuple(n, chunk_off, chunk_span);
608                 if (jj == -1)
609                         break;
610                 range_st[jj] = (chunk_st[jj] + chunk_off[jj]) * C[jj];
611                 range_end[jj] = min(range_st[jj] + C[jj] - 1, endp[jj]);
612
613                 for (i = jj + 1; i < n; i++)
614                 {
615                         range_st[i] = st[i];
616                         range_end[i] = min((chunk_st[i] + chunk_off[i]) * C[i] + C[i] - 1, endp[i]);
617                 }
618         } while (jj != -1);
619         return words_read;
620 }
621
622 /*------------------------------------------------------------------------
623  * _ReadChunkArray1El
624  *               returns one element of the chunked array as specified by the index "st"
625  *               the chunked file descriptor is "fp"
626  *-------------------------------------------------------------------------
627  */
628 struct varlena *
629 _ReadChunkArray1El(int *st,
630                                    int bsize,
631                                    int fp,
632                                    ArrayType *array,
633                                    bool *isNull)
634 {
635         int                     i,
636                                 j,
637                                 n,
638                                 temp,
639                                 srcOff;
640         int                     chunk_st[MAXDIM];
641
642         int                *C,
643                                 csize,
644                            *dim,
645                            *lb;
646         int                     PCHUNK[MAXDIM],
647                                 PC[MAXDIM];
648
649         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
650
651         n = ARR_NDIM(array);
652         lb = ARR_LBOUND(array);
653         C = A->C;
654         dim = ARR_DIMS(array);
655
656         csize = C[n - 1];
657         PC[n - 1] = 1;
658         temp = dim[n - 1] / C[n - 1];
659         for (i = n - 2; i >= 0; i--)
660         {
661                 PC[i] = PC[i + 1] * temp;
662                 temp = dim[i] / C[i];
663                 csize *= C[i];
664         }
665
666         for (i = 0; i < n; st[i] -= lb[i], i++);
667         mda_get_prod(n, C, PCHUNK);
668
669         array2chunk_coord(n, C, st, chunk_st);
670
671         for (i = j = 0; i < n; i++)
672                 j += chunk_st[i] * PC[i];
673         srcOff = j * csize;
674
675         for (i = 0; i < n; i++)
676                 srcOff += (st[i] - chunk_st[i] * C[i]) * PCHUNK[i];
677
678         srcOff *= bsize;
679         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
680                 RETURN_NULL;
681 #ifdef LOARRAY
682         return (struct varlena *) LOread(fp, bsize);
683 #endif
684         return (struct varlena *) 0;
685 }