1 /*-------------------------------------------------------------------------
5 * Copyright (c) 1994, Regents of the University of California
9 * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.21 1999/07/15 23:03:26 momjian Exp $
11 *-------------------------------------------------------------------------
14 #include <sys/types.h>
20 #include "catalog/pg_type.h"
21 #include "libpq/be-fsstubs.h"
22 #include "libpq/libpq-fs.h"
23 #include "optimizer/internal.h"
24 #include "utils/array.h"
25 #include "utils/memutils.h"
28 #include "regex/utils.h"
33 #define INFTY 500000000
36 #define quot_ceil(x,y) (((x)+(y)-1)/(y))
37 #define min(x,y) (((x) < (y))? (x) : (y))
38 #define max(x,y) (((x) > (y))? (x) : (y))
40 static CHUNK_INFO cInfo;
42 /* non-export function prototypes */
43 static int _FindBestChunk(int size, int *dmax, int *dbest, int dim,
44 int A[MAXPAT][MAXDIM + 1], int N);
45 static int get_next(int *d, int k, int C, int *dmax);
46 static void initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk);
49 static void _ConvertToChunkFile(int n, int baseSize, int *dim, int *C,
50 int srcfd, int destfd);
51 static void read_chunk(int *chunk_no, int *C, char *a_chunk, int srcfd,
52 int n, int baseSize, int *PX, int *dist);
53 static int write_chunk(struct varlena * a_chunk, int ofile);
54 static int seek_and_read(int pos, int size, char *buff, int fp, int from);
57 static int GetChunkSize(FILE *fd, int ndim, int dim[MAXDIM], int baseSize,
60 /*------------------------------------------------------------------------
62 * converts an input array to chunked format using the information
63 * provided by the access pattern.
65 * creates a new file that stores the chunked array and returns
66 * information about the chunked file
67 *-----------------------------------------------------------------------
86 if (chunkfile == NULL)
93 /* create new LO for chunked file */
94 chunkfile = _array_newLO(&cfd, fileFlag);
96 cfd = LOopen(chunkfile, O_RDONLY);
98 elog(ERROR, "Unable to open chunk file");
101 strcpy(cInfo.lo_name, chunkfile);
103 /* find chunk size */
104 csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
108 /* copy data from input file to chunked file */
109 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
112 initialize_info(&cInfo, ndim, dim, chunk);
113 *nbytes = sizeof(CHUNK_INFO);
114 return (char *) &cInfo;
117 /*--------------------------------------------------------------------------
119 * given an access pattern and array dimensionality etc, this program
120 * returns the dimensions of the chunk in "d"
121 *-----------------------------------------------------------------------
124 GetChunkSize(FILE *fd,
134 int A[MAXPAT][MAXDIM + 1],
138 * ----------- read input ------------
140 fscanf(fd, "%d", &N);
142 elog(ERROR, "array_in: too many access pattern elements");
143 for (i = 0; i < N; i++)
144 for (j = 0; j < ndim + 1; j++)
145 if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
146 elog(ERROR, "array_in: bad access pattern input");
149 * estimate chunk size
151 for (i = 0; i < ndim; i++)
152 for (j = 0, dmax[i] = 1; j < N; j++)
153 if (dmax[i] < A[j][i])
155 csize = BLCKSZ / baseSize;
157 _FindBestChunk(csize, dmax, d, ndim, A, N);
162 /*-------------------------------------------------------------------------
164 * This routine does most of the number crunching to compute the
165 * optimal chunk shape.
166 * Called by GetChunkSize
167 *------------------------------------------------------------------------
170 _FindBestChunk(int size,
174 int A[MAXPAT][MAXDIM + 1],
183 while (get_next(d, dim, size, dmax))
187 * compute the number of page fetches for a given chunk size (*d)
188 * and access pattern (**A)
194 for (i = 0, tc = 0; i < N; i++)
196 for (j = 0, nc = 1; j < dim; j++)
197 nc *= quot_ceil(A[i][j], d[j]);
203 * tc holds the total number of page fetches
208 for (j = 0; j < dim; dbest[j] = d[j], j++)
215 /*----------------------------------------------------------------------
217 * Called by _GetBestChunk to get the next tuple in the lexicographic order
218 *---------------------------------------------------------------------
221 get_next(int *d, int k, int C, int *dmax)
230 for (j = k - 1; j >= 0; j--)
232 d[j] = min(temp, dmax[j]);
233 temp = max(1, temp / d[j]);
238 for (j = 0, temp = 1; j < k; j++)
241 for (i = k - 1; i >= 0; i--)
244 if (((temp * (d[i] + 1)) < C) && (d[i] + 1 <= dmax[i]))
252 d[i] = min(dmax[i], j / (j / d[i]));
256 for (j = k - 1; j > i; j--)
258 d[j] = min(temp, dmax[j]);
259 temp = max(1, temp / d[j]);
265 static char a_chunk[BLCKSZ + VARHDRSZ]; /* VARHDRSZ since a_chunk is in
271 initialize_info(CHUNK_INFO *A, int ndim, int *dim, int *chunk)
275 for (i = 0; i < ndim; i++)
279 /*--------------------------------------------------------------------------
280 * Procedure reorganize_data():
281 * This procedure reads the input multidimensional array that is organised
282 * in the order specified by array "X" and breaks it up into chunks of
283 * dimensions specified in "C".
285 * This is a very slow process, since reading and writing of LARGE files
288 *-------------------------------------------------------------------------
292 _ConvertToChunkFile(int n,
299 int max_chunks[MAXDIM],
307 for (i = 0; i < n; chunk_no[i++] = 0)
309 max_chunks[i] = dim[i] / C[i];
313 temp = csize + VARHDRSZ;
314 memmove(a_chunk, &temp, VARHDRSZ);
316 mda_get_prod(n, dim, PX);
317 mda_get_offset_values(n, dist, PX, C);
318 for (i = 0; i < n; dist[i] *= baseSize, i++)
322 read_chunk(chunk_no, C, &(a_chunk[VARHDRSZ]), srcfd, n, baseSize, PX, dist);
323 write_chunk((struct varlena *) a_chunk, destfd);
324 } while (next_tuple(n, chunk_no, max_chunks) != -1);
327 /*--------------------------------------------------------------------------
329 * reads a chunk from the input files into a_chunk, the position of the
330 * chunk is specified by chunk_no
331 *--------------------------------------------------------------------------
334 read_chunk(int *chunk_no,
352 for (i = start_pos = 0; i < n; i++)
354 pos[i] = chunk_no[i] * C[i];
355 start_pos += pos[i] * PX[i];
357 start_pos *= baseSize;
359 /* Read a block of dimesion C starting at co-ordinates pos */
360 unit_transfer = C[n - 1] * baseSize;
362 for (i = 0; i < n; indx[i++] = 0)
365 seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
366 fpOff += unit_transfer;
369 while ((j = next_tuple(n - 1, indx, C)) != -1)
372 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
374 fpOff += unit_transfer;
378 /*--------------------------------------------------------------------------
380 * writes a chunk of size csize into the output file
381 *--------------------------------------------------------------------------
384 write_chunk(struct varlena * a_chunk, int ofile)
389 got_n = LOwrite(ofile, a_chunk);
394 /*--------------------------------------------------------------------------
396 * seeks to the asked location in the input file and reads the
397 * appropriate number of blocks
398 * Called By: read_chunk()
399 *--------------------------------------------------------------------------
402 seek_and_read(int pos, int size, char *buff, int fp, int from)
404 struct varlena *v = NULL;
406 /* Assuming only one file */
407 if (lo_lseek(fp, pos, from) < 0)
408 elog(ERROR, "File seek error");
410 v = (struct varlena *) LOread(fp, size);
412 if (VARSIZE(v) - VARHDRSZ < size)
413 elog(ERROR, "File read error");
414 memmove(buff, VARDATA(v), size);
422 /*----------------------------------------------------------------------------
424 * returns the subarray specified bu the range indices "st" and "endp"
425 * from the chunked array stored in file "fp"
426 *---------------------------------------------------------------------------
429 _ReadChunkArray(int *st,
444 int chunk_span[MAXDIM],
446 int chunk_st[MAXDIM],
455 int range_st[MAXDIM],
468 int srcOff; /* Needed since LO don't understand
470 char *baseDestFp = (char *) destfp;
472 CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
475 dim = ARR_DIMS(array);
476 lb = ARR_LBOUND(array);
481 temp = dim[n - 1] / C[n - 1];
482 for (i = n - 2; i >= 0; i--)
484 PC[i] = PC[i + 1] * temp;
485 temp = dim[i] / C[i];
489 for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
491 mda_get_prod(n, C, PCHUNK);
492 mda_get_range(n, array_span, st, endp);
493 mda_get_prod(n, array_span, PA);
495 array2chunk_coord(n, C, st, chunk_st);
496 array2chunk_coord(n, C, endp, chunk_end);
497 mda_get_range(n, chunk_span, chunk_st, chunk_end);
498 mda_get_offset_values(n, dist, PC, chunk_span);
500 for (i = 0; i < n; i++)
503 range_end[i] = min(chunk_st[i] * C[i] + C[i] - 1, endp[i]);
506 for (i = j = 0; i < n; i++)
507 j += chunk_st[i] * PC[i];
508 temp_seek = srcOff = j * csize * bsize;
509 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
513 for (i = 0; i < n; chunk_off[i++] = 0)
519 /* Write chunk (chunk_st) to output buffer */
520 mda_get_range(n, array_span, range_st, range_end);
521 mda_get_offset_values(n, adist, PA, array_span);
522 mda_get_offset_values(n, cdist, PCHUNK, array_span);
523 for (i = 0; i < n; range[i] = range_st[i] - st[i], i++);
524 bptr = tuple2linear(n, range, PA);
525 for (i = 0; i < n; range[i++] = 0);
530 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
534 destfp = baseDestFp + bptr;
535 for (i = 0, block_seek = 0; i < n; i++)
536 block_seek += (range_st[i] - (chunk_st[i] + chunk_off[i])
538 if (dist[jj] + block_seek + temp_seek)
540 temp = (dist[jj] * csize + block_seek + temp_seek) * bsize;
542 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
545 for (i = n - 1, to_read = bsize; i >= 0;
546 to_read *= min(C[i], array_span[i]), i--)
547 if (cdist[i] || adist[i])
553 srcOff += (cdist[j] * bsize);
554 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
557 block_seek += cdist[j];
558 bptr += adist[j] * bsize;
561 if (lo_lseek((int) destfp, bptr, SEEK_SET) < 0)
565 destfp = baseDestFp + bptr;
566 temp = _LOtransfer((char **) &destfp, to_read, 1, (char **) &fp, 1, isDestLO);
570 words_read += to_read;
572 block_seek += (to_read / bsize);
575 * compute next tuple in *range
584 range[i] = (range[i] + 1) % array_span[i];
585 for (x = i; x * (!range[x]); x--)
586 range[x - 1] = (range[x - 1] + 1) % array_span[x - 1];
600 * end of compute next tuple -- j is set to -1 if tuple
605 block_seek = csize - block_seek;
606 temp_seek = block_seek;
607 jj = next_tuple(n, chunk_off, chunk_span);
610 range_st[jj] = (chunk_st[jj] + chunk_off[jj]) * C[jj];
611 range_end[jj] = min(range_st[jj] + C[jj] - 1, endp[jj]);
613 for (i = jj + 1; i < n; i++)
616 range_end[i] = min((chunk_st[i] + chunk_off[i]) * C[i] + C[i] - 1, endp[i]);
622 /*------------------------------------------------------------------------
624 * returns one element of the chunked array as specified by the index "st"
625 * the chunked file descriptor is "fp"
626 *-------------------------------------------------------------------------
629 _ReadChunkArray1El(int *st,
640 int chunk_st[MAXDIM];
649 CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
652 lb = ARR_LBOUND(array);
654 dim = ARR_DIMS(array);
658 temp = dim[n - 1] / C[n - 1];
659 for (i = n - 2; i >= 0; i--)
661 PC[i] = PC[i + 1] * temp;
662 temp = dim[i] / C[i];
666 for (i = 0; i < n; st[i] -= lb[i], i++);
667 mda_get_prod(n, C, PCHUNK);
669 array2chunk_coord(n, C, st, chunk_st);
671 for (i = j = 0; i < n; i++)
672 j += chunk_st[i] * PC[i];
675 for (i = 0; i < n; i++)
676 srcOff += (st[i] - chunk_st[i] * C[i]) * PCHUNK[i];
679 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
682 return (struct varlena *) LOread(fp, bsize);
684 return (struct varlena *) 0;