]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/chunk.c
Move some system includes into c.h, and remove duplicates.
[postgresql] / src / backend / utils / adt / chunk.c
1 /*-------------------------------------------------------------------------
2  *
3  * chunk.c
4  *
5  * Copyright (c) 1994, Regents of the University of California
6  *
7  *
8  * IDENTIFICATION
9  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.23 1999/07/17 20:17:54 momjian Exp $
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <fcntl.h>
16
17 #include "postgres.h"
18
19 #include "catalog/pg_type.h"
20 #include "fmgr.h"
21 #include "libpq/be-fsstubs.h"
22 #include "libpq/libpq-fs.h"
23 #include "optimizer/internal.h"
24 #include "utils/array.h"
25 #include "utils/memutils.h"
26
27 #define INFTY 500000000
28 #define MANY  10000
29 #define MAXPAT 20
30 #define quot_ceil(x,y)  (((x)+(y)-1)/(y))
31 #define min(x,y)                (((x) < (y))? (x) : (y))
32 #define max(x,y)                (((x) > (y))? (x) : (y))
33
34 static CHUNK_INFO cInfo;
35
36 /* non-export function prototypes */
37 static int _FindBestChunk(int size, int *dmax, int *dbest, int dim,
38                            int A[MAXPAT][MAXDIM + 1], int N);
39 static int      get_next(int *d, int k, int C, int *dmax);
40 static void initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk);
41
42 #ifdef LOARRAY
43 static void _ConvertToChunkFile(int n, int baseSize, int *dim, int *C,
44                                         int srcfd, int destfd);
45 static void read_chunk(int *chunk_no, int *C, char *a_chunk, int srcfd,
46                    int n, int baseSize, int *PX, int *dist);
47 static int      write_chunk(struct varlena * a_chunk, int ofile);
48 static int      seek_and_read(int pos, int size, char *buff, int fp, int from);
49
50 #endif
51 static int GetChunkSize(FILE *fd, int ndim, int dim[MAXDIM], int baseSize,
52                          int d[MAXDIM]);
53
54 /*------------------------------------------------------------------------
55  * _ChunkArray ---
56  *         converts an input array to chunked format using the information
57  *         provided by the access pattern.
58  * Results:
59  *         creates a new file that stores the chunked array and returns
60  *         information about the chunked file
61  *-----------------------------------------------------------------------
62  */
63 char *
64 _ChunkArray(int fd,
65                         FILE *afd,
66                         int ndim,
67                         int *dim,
68                         int baseSize,
69                         int *nbytes,
70                         char *chunkfile)
71 {
72 #ifdef LOARRAY
73         int                     cfd = 0;
74
75 #endif
76         int                     chunk[MAXDIM],
77                                 csize;
78         bool            reorgFlag;
79
80         if (chunkfile == NULL)
81                 reorgFlag = true;
82         else
83                 reorgFlag = false;
84
85 #ifdef LOARRAY
86         if (reorgFlag)
87                 /* create new LO for chunked file */
88                 chunkfile = _array_newLO(&cfd, fileFlag);
89         else
90                 cfd = LOopen(chunkfile, O_RDONLY);
91         if (cfd < 0)
92                 elog(ERROR, "Unable to open chunk file");
93 #endif
94
95         strcpy(cInfo.lo_name, chunkfile);
96
97         /* find chunk size */
98         csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
99
100 #ifdef LOARRAY
101         if (reorgFlag)
102                 /* copy data from input file to chunked file */
103                 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
104 #endif
105
106         initialize_info(&cInfo, ndim, dim, chunk);
107         *nbytes = sizeof(CHUNK_INFO);
108         return (char *) &cInfo;
109 }
110
111 /*--------------------------------------------------------------------------
112  * GetChunkSize
113  *                given an access pattern and array dimensionality etc, this program
114  *              returns the dimensions of the chunk in "d"
115  *-----------------------------------------------------------------------
116  */
117 static int
118 GetChunkSize(FILE *fd,
119                          int ndim,
120                          int dim[MAXDIM],
121                          int baseSize,
122                          int d[MAXDIM])
123 {
124         int                     N,
125                                 i,
126                                 j,
127                                 csize;
128         int                     A[MAXPAT][MAXDIM + 1],
129                                 dmax[MAXDIM];
130
131         /*
132          * ----------- read input ------------
133          */
134         fscanf(fd, "%d", &N);
135         if (N > MAXPAT)
136                 elog(ERROR, "array_in: too many access pattern elements");
137         for (i = 0; i < N; i++)
138                 for (j = 0; j < ndim + 1; j++)
139                         if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
140                                 elog(ERROR, "array_in: bad access pattern input");
141
142         /*
143          * estimate chunk size
144          */
145         for (i = 0; i < ndim; i++)
146                 for (j = 0, dmax[i] = 1; j < N; j++)
147                         if (dmax[i] < A[j][i])
148                                 dmax[i] = A[j][i];
149         csize = BLCKSZ / baseSize;
150
151         _FindBestChunk(csize, dmax, d, ndim, A, N);
152
153         return csize;
154 }
155
156 /*-------------------------------------------------------------------------
157  * _FindBestChunk
158  *                This routine does most of the number crunching to compute the
159  *                optimal chunk shape.
160  * Called by GetChunkSize
161  *------------------------------------------------------------------------
162  */
163 static int
164 _FindBestChunk(int size,
165                            int *dmax,
166                            int *dbest,
167                            int dim,
168                            int A[MAXPAT][MAXDIM + 1],
169                            int N)
170 {
171         int                     d[MAXDIM];
172         int                     tc,
173                                 mintc = INFTY;
174
175         d[0] = 0;
176         mintc = INFTY;
177         while (get_next(d, dim, size, dmax))
178         {
179
180                 /*
181                  * compute the number of page fetches for a given chunk size (*d)
182                  * and access pattern (**A)
183                  */
184                 int                     i,
185                                         j,
186                                         nc;
187
188                 for (i = 0, tc = 0; i < N; i++)
189                 {
190                         for (j = 0, nc = 1; j < dim; j++)
191                                 nc *= quot_ceil(A[i][j], d[j]);
192                         nc *= A[i][dim];
193                         tc += nc;
194                 }
195
196                 /*
197                  * tc holds the total number of page fetches
198                  */
199                 if (mintc >= tc)
200                 {
201                         mintc = tc;
202                         for (j = 0; j < dim; dbest[j] = d[j], j++)
203                                 ;
204                 }
205         }
206         return mintc;
207 }
208
209 /*----------------------------------------------------------------------
210  * get_next
211  *       Called by _GetBestChunk to get the next tuple in the lexicographic order
212  *---------------------------------------------------------------------
213  */
214 static int
215 get_next(int *d, int k, int C, int *dmax)
216 {
217         int                     i,
218                                 j,
219                                 temp;
220
221         if (!d[0])
222         {
223                 temp = C;
224                 for (j = k - 1; j >= 0; j--)
225                 {
226                         d[j] = min(temp, dmax[j]);
227                         temp = max(1, temp / d[j]);
228                 }
229                 return 1;
230         }
231
232         for (j = 0, temp = 1; j < k; j++)
233                 temp *= d[j];
234
235         for (i = k - 1; i >= 0; i--)
236         {
237                 temp = temp / d[i];
238                 if (((temp * (d[i] + 1)) < C) && (d[i] + 1 <= dmax[i]))
239                         break;
240         }
241         if (i < 0)
242                 return 0;
243
244         d[i]++;
245         j = C / temp;
246         d[i] = min(dmax[i], j / (j / d[i]));
247         temp = temp * d[i];
248         temp = C / temp;
249
250         for (j = k - 1; j > i; j--)
251         {
252                 d[j] = min(temp, dmax[j]);
253                 temp = max(1, temp / d[j]);
254         }
255         return 1;
256 }
257
258 #ifdef LOARRAY
259 static char a_chunk[BLCKSZ + VARHDRSZ]; /* VARHDRSZ since a_chunk is in
260                                                                                  * varlena format */
261
262 #endif
263
264 static void
265 initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk)
266 {
267         int                     i;
268
269         for (i = 0; i < ndim; i++)
270                 A->C[i] = chunk[i];
271 }
272
273 /*--------------------------------------------------------------------------
274  * Procedure reorganize_data():
275  *        This procedure reads the input multidimensional array that is organised
276  *        in the order specified by array "X" and breaks it up into chunks of
277  *        dimensions specified in "C".
278  *
279  *        This is a very slow process, since reading and writing of LARGE files
280  *        may be involved.
281  *
282  *-------------------------------------------------------------------------
283  */
284 #ifdef LOARRAY
285 static void
286 _ConvertToChunkFile(int n,
287                                         int baseSize,
288                                         int *dim,
289                                         int *C,
290                                         int srcfd,
291                                         int destfd)
292 {
293         int                     max_chunks[MAXDIM],
294                                 chunk_no[MAXDIM];
295         int                     PX[MAXDIM],
296                                 dist[MAXDIM];
297         int                     csize = 1,
298                                 i,
299                                 temp;
300
301         for (i = 0; i < n; chunk_no[i++] = 0)
302         {
303                 max_chunks[i] = dim[i] / C[i];
304                 csize *= C[i];
305         }
306         csize *= baseSize;
307         temp = csize + VARHDRSZ;
308         memmove(a_chunk, &temp, VARHDRSZ);
309
310         mda_get_prod(n, dim, PX);
311         mda_get_offset_values(n, dist, PX, C);
312         for (i = 0; i < n; dist[i] *= baseSize, i++)
313                 ;
314         do
315         {
316                 read_chunk(chunk_no, C, &(a_chunk[VARHDRSZ]), srcfd, n, baseSize, PX, dist);
317                 write_chunk((struct varlena *) a_chunk, destfd);
318         } while (next_tuple(n, chunk_no, max_chunks) != -1);
319 }
320
321 /*--------------------------------------------------------------------------
322  * read_chunk
323  *        reads a chunk from the input files into a_chunk, the position of the
324  *        chunk is specified by chunk_no
325  *--------------------------------------------------------------------------
326  */
327 static void
328 read_chunk(int *chunk_no,
329                    int *C,
330                    char *a_chunk,
331                    int srcfd,
332                    int n,
333                    int baseSize,
334                    int *PX,
335                    int *dist)
336 {
337         int                     i,
338                                 j,
339                                 cp,
340                                 unit_transfer;
341         int                     start_pos,
342                                 pos[MAXDIM];
343         int                     indx[MAXDIM];
344         int                     fpOff;
345
346         for (i = start_pos = 0; i < n; i++)
347         {
348                 pos[i] = chunk_no[i] * C[i];
349                 start_pos += pos[i] * PX[i];
350         }
351         start_pos *= baseSize;
352
353         /* Read a block of dimesion C starting at co-ordinates pos */
354         unit_transfer = C[n - 1] * baseSize;
355
356         for (i = 0; i < n; indx[i++] = 0)
357                 ;
358         fpOff = start_pos;
359         seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
360         fpOff += unit_transfer;
361         cp = unit_transfer;
362
363         while ((j = next_tuple(n - 1, indx, C)) != -1)
364         {
365                 fpOff += dist[j];
366                 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
367                 cp += unit_transfer;
368                 fpOff += unit_transfer;
369         }
370 }
371
372 /*--------------------------------------------------------------------------
373  * write_chunk()
374  *        writes a chunk of size csize into the output file
375  *--------------------------------------------------------------------------
376  */
377 static int
378 write_chunk(struct varlena * a_chunk, int ofile)
379 {
380         int                     got_n = 0;
381
382 #ifdef LOARRAY
383         got_n = LOwrite(ofile, a_chunk);
384 #endif
385         return got_n;
386 }
387
388 /*--------------------------------------------------------------------------
389  * seek_and_read()
390  *        seeks to the asked location in the input file and reads the
391  *        appropriate number of blocks
392  *       Called By: read_chunk()
393  *--------------------------------------------------------------------------
394  */
395 static int
396 seek_and_read(int pos, int size, char *buff, int fp, int from)
397 {
398         struct varlena *v = NULL;
399
400         /* Assuming only one file */
401         if (lo_lseek(fp, pos, from) < 0)
402                 elog(ERROR, "File seek error");
403 #ifdef LOARRAY
404         v = (struct varlena *) LOread(fp, size);
405 #endif
406         if (VARSIZE(v) - VARHDRSZ < size)
407                 elog(ERROR, "File read error");
408         memmove(buff, VARDATA(v), size);
409         pfree(v);
410         return 1;
411
412 }
413
414 #endif   /* LOARRAY */
415
416 /*----------------------------------------------------------------------------
417  * _ReadChunkArray
418  *                returns the subarray specified bu the range indices "st" and "endp"
419  *                from the chunked array stored in file "fp"
420  *---------------------------------------------------------------------------
421  */
422 int
423 _ReadChunkArray(int *st,
424                                 int *endp,
425                                 int bsize,
426                                 int fp,
427                                 char *destfp,
428                                 ArrayType *array,
429                                 int isDestLO,
430                                 bool *isNull)
431 {
432         int                     i,
433                                 j,
434                                 jj;
435         int                     n,
436                                 temp,
437                                 words_read;
438         int                     chunk_span[MAXDIM],
439                                 chunk_off[MAXDIM];
440         int                     chunk_st[MAXDIM],
441                                 chunk_end[MAXDIM];
442         int                     block_seek;
443
444         int                     bptr,
445                            *C,
446                                 csize,
447                            *dim,
448                            *lb;
449         int                     range_st[MAXDIM],
450                                 range_end[MAXDIM],
451                                 range[MAXDIM],
452                                 array_span[MAXDIM];
453         int                     PA[MAXDIM],
454                                 PCHUNK[MAXDIM],
455                                 PC[MAXDIM];
456         int                     to_read;
457         int                     cdist[MAXDIM],
458                                 adist[MAXDIM];
459         int                     dist[MAXDIM],
460                                 temp_seek;
461
462         int                     srcOff;                 /* Needed since LO don't understand
463                                                                  * SEEK_CUR */
464         char       *baseDestFp = (char *) destfp;
465
466         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
467
468         n = ARR_NDIM(array);
469         dim = ARR_DIMS(array);
470         lb = ARR_LBOUND(array);
471         C = A->C;
472
473         csize = C[n - 1];
474         PC[n - 1] = 1;
475         temp = dim[n - 1] / C[n - 1];
476         for (i = n - 2; i >= 0; i--)
477         {
478                 PC[i] = PC[i + 1] * temp;
479                 temp = dim[i] / C[i];
480                 csize *= C[i];
481         }
482
483         for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
484                 ;
485         mda_get_prod(n, C, PCHUNK);
486         mda_get_range(n, array_span, st, endp);
487         mda_get_prod(n, array_span, PA);
488
489         array2chunk_coord(n, C, st, chunk_st);
490         array2chunk_coord(n, C, endp, chunk_end);
491         mda_get_range(n, chunk_span, chunk_st, chunk_end);
492         mda_get_offset_values(n, dist, PC, chunk_span);
493
494         for (i = 0; i < n; i++)
495         {
496                 range_st[i] = st[i];
497                 range_end[i] = min(chunk_st[i] * C[i] + C[i] - 1, endp[i]);
498         }
499
500         for (i = j = 0; i < n; i++)
501                 j += chunk_st[i] * PC[i];
502         temp_seek = srcOff = j * csize * bsize;
503         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
504                 RETURN_NULL;
505
506         jj = n - 1;
507         for (i = 0; i < n; chunk_off[i++] = 0)
508                 ;
509         words_read = 0;
510         temp_seek = 0;
511         do
512         {
513                 /* Write chunk (chunk_st) to output buffer */
514                 mda_get_range(n, array_span, range_st, range_end);
515                 mda_get_offset_values(n, adist, PA, array_span);
516                 mda_get_offset_values(n, cdist, PCHUNK, array_span);
517                 for (i = 0; i < n; range[i] = range_st[i] - st[i], i++);
518                 bptr = tuple2linear(n, range, PA);
519                 for (i = 0; i < n; range[i++] = 0);
520                 j = n - 1;
521                 bptr *= bsize;
522                 if (isDestLO)
523                 {
524                         if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
525                                 RETURN_NULL;
526                 }
527                 else
528                         destfp = baseDestFp + bptr;
529                 for (i = 0, block_seek = 0; i < n; i++)
530                         block_seek += (range_st[i] - (chunk_st[i] + chunk_off[i])
531                                                    * C[i]) * PCHUNK[i];
532                 if (dist[jj] + block_seek + temp_seek)
533                 {
534                         temp = (dist[jj] * csize + block_seek + temp_seek) * bsize;
535                         srcOff += temp;
536                         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
537                                 RETURN_NULL;
538                 }
539                 for (i = n - 1, to_read = bsize; i >= 0;
540                          to_read *= min(C[i], array_span[i]), i--)
541                         if (cdist[i] || adist[i])
542                                 break;
543                 do
544                 {
545                         if (cdist[j])
546                         {
547                                 srcOff += (cdist[j] * bsize);
548                                 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
549                                         RETURN_NULL;
550                         }
551                         block_seek += cdist[j];
552                         bptr += adist[j] * bsize;
553                         if (isDestLO)
554                         {
555                                 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
556                                         RETURN_NULL;
557                         }
558                         else
559                                 destfp = baseDestFp + bptr;
560                         temp = _LOtransfer((char **) &destfp, to_read, 1, (char **) &fp, 1, isDestLO);
561                         if (temp < to_read)
562                                 RETURN_NULL;
563                         srcOff += to_read;
564                         words_read += to_read;
565                         bptr += to_read;
566                         block_seek += (to_read / bsize);
567
568                         /*
569                          * compute next tuple in *range
570                          */
571                         {
572                                 int                     x;
573
574                                 if (!(i + 1))
575                                         j = -1;
576                                 else
577                                 {
578                                         range[i] = (range[i] + 1) % array_span[i];
579                                         for (x = i; x * (!range[x]); x--)
580                                                 range[x - 1] = (range[x - 1] + 1) % array_span[x - 1];
581                                         if (x)
582                                                 j = x;
583                                         else
584                                         {
585                                                 if (range[0])
586                                                         j = 0;
587                                                 else
588                                                         j = -1;
589                                         }
590                                 }
591                         }
592
593                         /*
594                          * end of compute next tuple -- j is set to -1 if tuple
595                          * generation is over
596                          */
597                 } while (j != -1);
598
599                 block_seek = csize - block_seek;
600                 temp_seek = block_seek;
601                 jj = next_tuple(n, chunk_off, chunk_span);
602                 if (jj == -1)
603                         break;
604                 range_st[jj] = (chunk_st[jj] + chunk_off[jj]) * C[jj];
605                 range_end[jj] = min(range_st[jj] + C[jj] - 1, endp[jj]);
606
607                 for (i = jj + 1; i < n; i++)
608                 {
609                         range_st[i] = st[i];
610                         range_end[i] = min((chunk_st[i] + chunk_off[i]) * C[i] + C[i] - 1, endp[i]);
611                 }
612         } while (jj != -1);
613         return words_read;
614 }
615
616 /*------------------------------------------------------------------------
617  * _ReadChunkArray1El
618  *               returns one element of the chunked array as specified by the index "st"
619  *               the chunked file descriptor is "fp"
620  *-------------------------------------------------------------------------
621  */
622 struct varlena *
623 _ReadChunkArray1El(int *st,
624                                    int bsize,
625                                    int fp,
626                                    ArrayType *array,
627                                    bool *isNull)
628 {
629         int                     i,
630                                 j,
631                                 n,
632                                 temp,
633                                 srcOff;
634         int                     chunk_st[MAXDIM];
635
636         int                *C,
637                                 csize,
638                            *dim,
639                            *lb;
640         int                     PCHUNK[MAXDIM],
641                                 PC[MAXDIM];
642
643         CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
644
645         n = ARR_NDIM(array);
646         lb = ARR_LBOUND(array);
647         C = A->C;
648         dim = ARR_DIMS(array);
649
650         csize = C[n - 1];
651         PC[n - 1] = 1;
652         temp = dim[n - 1] / C[n - 1];
653         for (i = n - 2; i >= 0; i--)
654         {
655                 PC[i] = PC[i + 1] * temp;
656                 temp = dim[i] / C[i];
657                 csize *= C[i];
658         }
659
660         for (i = 0; i < n; st[i] -= lb[i], i++);
661         mda_get_prod(n, C, PCHUNK);
662
663         array2chunk_coord(n, C, st, chunk_st);
664
665         for (i = j = 0; i < n; i++)
666                 j += chunk_st[i] * PC[i];
667         srcOff = j * csize;
668
669         for (i = 0; i < n; i++)
670                 srcOff += (st[i] - chunk_st[i] * C[i]) * PCHUNK[i];
671
672         srcOff *= bsize;
673         if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
674                 RETURN_NULL;
675 #ifdef LOARRAY
676         return (struct varlena *) LOread(fp, bsize);
677 #endif
678         return (struct varlena *) 0;
679 }