1 /*-------------------------------------------------------------------------
5 * Copyright (c) 1994, Regents of the University of California
9 * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.3 1996/11/06 10:30:40 scrappy Exp $
11 *-------------------------------------------------------------------------
16 #include <libpq/be-fsstubs.h>
17 #include "utils/memutils.h"
18 #include "libpq/libpq-fs.h"
20 #include "storage/fd.h" /* for SEEK_ */
22 #include "catalog/pg_type.h"
25 #include "utils/array.h"
27 #include "optimizer/internal.h"
29 # include <regex/utils.h>
35 #define INFTY 500000000
38 #define quot_ceil(x,y) (((x)+(y)-1)/(y))
39 #define min(x,y) (((x) < (y))? (x) : (y))
40 #define max(x,y) (((x) > (y))? (x) : (y))
42 static CHUNK_INFO cInfo;
44 /* non-export function prototypes */
45 static int _FindBestChunk(int size, int dmax[], int dbest[], int dim,
46 int A[MAXPAT][MAXDIM+1], int N);
47 static int get_next(int d[], int k, int C, int dmax[]);
48 static void initialize_info(CHUNK_INFO *A, int ndim, int dim[], int chunk[]);
49 static void _ConvertToChunkFile(int n, int baseSize, int dim[], int C[],
50 int srcfd, int destfd);
51 static void read_chunk(int chunk_no[], int C[], char a_chunk[], int srcfd,
52 int n, int baseSize, int PX[], int dist[]);
53 static int write_chunk(struct varlena * a_chunk, int ofile);
54 static int seek_and_read(int pos, int size, char buff[], int fp, int from);
56 /*------------------------------------------------------------------------
58 * converts an input array to chunked format using the information
59 * provided by the access pattern.
61 * creates a new file that stores the chunked array and returns
62 * information about the chunked file
63 *-----------------------------------------------------------------------
75 int chunk[MAXDIM], csize;
78 if (chunkfile == NULL)
85 /* create new LO for chunked file */
86 chunkfile = _array_newLO( &cfd, fileFlag );
88 cfd = LOopen(chunkfile, O_RDONLY);
91 elog(WARN, "Enable to open chunk file");
92 strcpy (cInfo.lo_name, chunkfile);
95 csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
98 /* copy data from input file to chunked file */
99 _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
101 initialize_info(&cInfo, ndim, dim, chunk);
102 *nbytes = sizeof(CHUNK_INFO);
103 return (char *) &cInfo ;
106 /*--------------------------------------------------------------------------
108 * given an access pattern and array dimensionality etc, this program
109 * returns the dimensions of the chunk in "d"
110 *-----------------------------------------------------------------------
113 GetChunkSize(FILE *fd,
120 int A[MAXPAT][MAXDIM+1], dmax[MAXDIM];
123 * ----------- read input ------------
125 fscanf(fd, "%d", &N);
127 elog(WARN, "array_in: too many access pattern elements");
128 for (i = 0; i < N; i++)
129 for (j = 0; j < ndim+1; j++)
130 if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
131 elog (WARN, "array_in: bad access pattern input");
134 * estimate chunk size
136 for (i = 0; i < ndim; i++)
137 for (j = 0, dmax[i] = 1; j < N; j++)
138 if (dmax[i] < A[j][i])
140 csize = _PAGE_SIZE_/baseSize;
142 _FindBestChunk (csize, dmax, d, ndim, A, N);
147 /*-------------------------------------------------------------------------
149 * This routine does most of the number crunching to compute the
150 * optimal chunk shape.
151 * Called by GetChunkSize
152 *------------------------------------------------------------------------
155 _FindBestChunk(int size,
159 int A[MAXPAT][MAXDIM+1],
163 int tc, mintc = INFTY;
167 while (get_next(d,dim,size, dmax)) {
169 * compute the number of page fetches for a given
170 * chunk size (d[]) and access pattern (A[][])
172 register int i,j, nc;
173 for (i = 0, tc = 0; i < N; i++){
174 for (j = 0, nc = 1; j < dim; j++)
175 nc *= quot_ceil(A[i][j], d[j]);
180 * tc holds the total number of page fetches
184 for (j = 0; j < dim; dbest[j] = d[j], j++)
191 /*----------------------------------------------------------------------
193 * Called by _GetBestChunk to get the next tuple in the lexicographic order
194 *---------------------------------------------------------------------
197 get_next(int d[], int k, int C, int dmax[])
199 register int i,j, temp;
203 for (j = k-1; j >= 0; j--){
204 d[j] = min(temp, dmax[j]);
205 temp = max(1, temp/d[j]);
210 for (j = 0, temp = 1; j < k; j++)
213 for (i=k-1; i >= 0; i--){
215 if (((temp*(d[i]+1)) < C) && (d[i]+1 <= dmax[i]))
223 d[i] = min(dmax[i], j/(j/d[i]));
227 for (j = k-1; j > i; j--){
228 d[j] = min(temp, dmax[j]);
229 temp = max(1, temp/d[j]);
234 static char a_chunk[_PAGE_SIZE_ + 4]; /* 4 since a_chunk is in
238 initialize_info(CHUNK_INFO *A, int ndim, int dim[], int chunk[])
242 for ( i = 0; i < ndim; i++)
246 /*--------------------------------------------------------------------------
247 * Procedure reorganize_data():
248 * This procedure reads the input multidimensional array that is organised
249 * in the order specified by array "X" and breaks it up into chunks of
250 * dimensions specified in "C".
252 * This is a very slow process, since reading and writing of LARGE files
255 *-------------------------------------------------------------------------
258 _ConvertToChunkFile(int n,
265 int max_chunks[MAXDIM], chunk_no[MAXDIM];
266 int PX[MAXDIM], dist[MAXDIM];
267 int csize = 1, i, temp;
269 for (i = 0; i < n; chunk_no[i++] = 0) {
270 max_chunks[i] = dim[i]/C[i];
275 memmove(a_chunk, &temp, 4);
277 mda_get_prod(n, dim, PX);
278 mda_get_offset_values(n, dist, PX, C);
279 for (i = 0; i < n; dist[i] *= baseSize, i++)
282 read_chunk(chunk_no, C, &(a_chunk[4]), srcfd, n, baseSize, PX, dist);
283 write_chunk((struct varlena*)a_chunk, destfd);
284 } while (next_tuple(n, chunk_no, max_chunks) != -1);
287 /*--------------------------------------------------------------------------
289 * reads a chunk from the input files into a_chunk, the position of the
290 * chunk is specified by chunk_no
291 *--------------------------------------------------------------------------
294 read_chunk(int chunk_no[],
303 int i, j, cp, unit_transfer;
304 int start_pos, pos[MAXDIM];
308 for ( i = start_pos = 0; i < n; i++) {
309 pos[i] = chunk_no[i] * C[i];
310 start_pos += pos[i]*PX[i];
312 start_pos *= baseSize;
314 /* Read a block of dimesion C starting at co-ordinates pos */
315 unit_transfer = C[n-1] * baseSize;
317 for (i = 0; i < n; indx[i++] = 0)
320 seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
321 fpOff += unit_transfer;
324 while ((j = next_tuple(n-1, indx, C)) != -1) {
326 seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
328 fpOff += unit_transfer;
332 /*--------------------------------------------------------------------------
334 * writes a chunk of size csize into the output file
335 *--------------------------------------------------------------------------
338 write_chunk(struct varlena * a_chunk, int ofile)
342 got_n = LOwrite (ofile, a_chunk);
347 /*--------------------------------------------------------------------------
349 * seeks to the asked location in the input file and reads the
350 * appropriate number of blocks
351 * Called By: read_chunk()
352 *--------------------------------------------------------------------------
355 seek_and_read(int pos, int size, char buff[], int fp, int from)
359 /* Assuming only one file */
360 if ( lo_lseek(fp, pos, from ) < 0)
361 elog(WARN, "File seek error");
363 v = (struct varlena *) LOread(fp, size);
365 if (VARSIZE(v) - 4 < size)
366 elog(WARN, "File read error");
367 memmove(buff, VARDATA(v), size);
373 /*----------------------------------------------------------------------------
375 * returns the subarray specified bu the range indices "st" and "endp"
376 * from the chunked array stored in file "fp"
377 *---------------------------------------------------------------------------
380 _ReadChunkArray(int st[],
390 int n, temp, words_read;
391 int chunk_span[MAXDIM], chunk_off[MAXDIM];
392 int chunk_st[MAXDIM], chunk_end[MAXDIM];
395 int bptr, *C, csize, *dim, *lb;
396 int range_st[MAXDIM], range_end[MAXDIM],
397 range[MAXDIM], array_span[MAXDIM];
398 int PA[MAXDIM], PCHUNK[MAXDIM], PC[MAXDIM];
400 int cdist[MAXDIM], adist[MAXDIM];
401 int dist[MAXDIM], temp_seek;
403 int srcOff; /* Needed since LO don't understand SEEK_CUR*/
404 char *baseDestFp = (char *)destfp;
406 CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
408 dim = ARR_DIMS(array);
409 lb = ARR_LBOUND(array);
414 temp = dim[n - 1]/C[n-1];
415 for (i = n-2; i >= 0; i--){
416 PC[i] = PC[i+1] * temp;
417 temp = dim[i] / C[i];
421 for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
423 mda_get_prod(n, C, PCHUNK);
424 mda_get_range(n, array_span, st, endp);
425 mda_get_prod(n, array_span, PA);
427 array2chunk_coord(n, C, st, chunk_st);
428 array2chunk_coord(n, C, endp, chunk_end);
429 mda_get_range(n, chunk_span, chunk_st, chunk_end);
430 mda_get_offset_values(n, dist, PC, chunk_span);
432 for (i = 0; i < n; i++) {
434 range_end[i] = min(chunk_st[i]*C[i]+C[i]-1, endp[i]);
437 for (i = j = 0; i < n; i++)
438 j+= chunk_st[i]*PC[i];
439 temp_seek = srcOff = j * csize * bsize;
440 if (lo_lseek(fp, srcOff, SEEK_SET) < 0) RETURN_NULL;
443 for (i = 0; i < n; chunk_off[i++] = 0)
445 words_read = 0; temp_seek = 0;
447 /* Write chunk (chunk_st) to output buffer */
448 mda_get_range(n, array_span, range_st, range_end);
449 mda_get_offset_values(n, adist, PA, array_span);
450 mda_get_offset_values(n, cdist, PCHUNK, array_span);
451 for (i=0; i < n; range[i] = range_st[i]-st[i], i++);
452 bptr = tuple2linear(n, range, PA);
453 for (i = 0; i < n; range[i++] = 0);
454 j = n-1; bptr *= bsize;
456 if (lo_lseek(destfp, bptr, SEEK_SET) < 0)
460 destfp = baseDestFp + bptr;
461 for(i = 0, block_seek = 0; i < n; i++)
462 block_seek += (range_st[i]-(chunk_st[i] + chunk_off[i])
464 if (dist[jj] + block_seek + temp_seek) {
465 temp = (dist[jj]*csize+block_seek+temp_seek)*bsize;
467 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
470 for (i = n-1, to_read = bsize; i >= 0;
471 to_read *= min(C[i], array_span[i]), i--)
472 if (cdist[i] || adist[i])
476 srcOff += (cdist[j]*bsize);
477 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
480 block_seek += cdist[j];
481 bptr += adist[j]*bsize;
483 if (lo_lseek(destfp, bptr, SEEK_SET) < 0)
487 destfp = baseDestFp + bptr;
488 temp = _LOtransfer ((char**)&destfp, to_read, 1, (char**)&fp, 1, isDestLO);
494 block_seek += (to_read/bsize);
496 * compute next tuple in range[]
503 range[i] = (range[i]+1)%array_span[i];
504 for (x = i; x*(!range[x]); x--)
505 range[x-1] = (range[x-1]+1)%array_span[x-1];
517 * end of compute next tuple --
518 * j is set to -1 if tuple generation is over
522 block_seek = csize - block_seek;
523 temp_seek = block_seek;
524 jj = next_tuple(n, chunk_off, chunk_span);
527 range_st[jj] = (chunk_st[jj]+chunk_off[jj])*C[jj];
528 range_end[jj] = min(range_st[jj] + C[jj]-1, endp[jj]);
530 for (i = jj+1; i < n; i++) {
532 range_end[i] = min((chunk_st[i]+chunk_off[i])*C[i]+C[i]-1, endp[i]);
538 /*------------------------------------------------------------------------
539 * _ReadChunkArray1El --
540 * returns one element of the chunked array as specified by the index "st"
541 * the chunked file descriptor is "fp"
542 *-------------------------------------------------------------------------
545 _ReadChunkArray1El(int st[],
551 int i, j, n, temp, srcOff;
552 int chunk_st[MAXDIM];
554 int *C, csize, *dim, *lb;
555 int PCHUNK[MAXDIM], PC[MAXDIM];
557 CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
560 lb = ARR_LBOUND(array);
562 dim = ARR_DIMS(array);
566 temp = dim[n - 1]/C[n-1];
567 for (i = n-2; i >= 0; i--){
568 PC[i] = PC[i+1] * temp;
569 temp = dim[i] / C[i];
573 for (i = 0; i < n; st[i] -= lb[i], i++);
574 mda_get_prod(n, C, PCHUNK);
576 array2chunk_coord(n, C, st, chunk_st);
578 for (i = j = 0; i < n; i++)
579 j+= chunk_st[i]*PC[i];
582 for(i = 0; i < n; i++)
583 srcOff += (st[i]-chunk_st[i]*C[i])*PCHUNK[i];
586 if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
589 return (struct varlena *) LOread(fp, bsize);
591 return (struct varlena *) 0;