CSRCS= reclaim.c allochblk.c misc.c alloc.c mach_dep.c os_dep.c mark_roots.c headers.c mark.c obj_map.c pcr_interface.c black_list.c finalize.c new_hblk.c real_malloc.c dynamic_load.c debug_malloc.c malloc.c stubborn.c checksums.c
-SRCS= $(CSRCS) mips_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.s gc.h gc_headers.h gc_private.h config.h gc_inline.h gc.man if_mach.c if_not_there.c
+CORD_SRCS= cord/cord_basics.c cord/cord_extras.c cord/de.c cord/cord_test.c cord/cord.h cord/ec.h cord/cord_position.h
+
+CORD_OBJS= cord/cord_basics.o cord/cord_extras.o
+
+SRCS= $(CSRCS) mips_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.s sparc_mach_dep.s gc.h gc_headers.h gc_private.h config.h gc_inline.h gc.man if_mach.c if_not_there.c $(CORD_SRCS)
+
+INCLUDE_FILES= gc.h cord/cord.h cord/ec.h cord/cord_position.h
+
+# Libraries needed for curses applications. Only needed for de.
+CURSES= -lcurses -ltermlib
# The following is irrelevant on most systems. But a few
# versions of make otherwise fork the shell specified in
# the SHELL environment variable.
SHELL= /bin/sh
+AR= ar
+RANLIB= ranlib
CC= cc
CFLAGS= -O -DSILENT
# Setjmp_test may yield overly optimistic results when compiled
# not time-critical anyway.
# Set SPECIALCFLAGS to -q nodirect_code on Encore.
+ALPHACFLAGS = -non_shared
+# Extra flags for linking compilation on DEC Alpha
+
all: gc.a gctest
pcr: PCR-Makefile gc_private.h gc_headers.h gc.h config.h mach_dep.o $(SRCS)
$(OBJS) test.o: $(srcdir)/gc_private.h $(srcdir)/gc_headers.h $(srcdir)/gc.h $(srcdir)/config.h
gc.a: $(OBJS)
- ar ru gc.a $(OBJS)
- ranlib gc.a || cat /dev/null
+ $(AR) ru gc.a $(OBJS)
+ $(RANLIB) gc.a || cat /dev/null
# ignore ranlib failure; that usually means it doesn't exist, and isn't needed
+cords: $(CORD_OBJS) cord/cord_test
+ $(AR) ru gc.a $(CORD_OBJS)
+ $(RANLIB) gc.a || cat /dev/null
+ ln cord/cord.h include/cord.h
+ ln cord/ec.h include/ec.h
+ ln cord/cord_position.h include/cord_position.h
+
mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_mach_dep.s $(srcdir)/rs6000_mach_dep.s if_mach if_not_there
rm -f mach_dep.o
./if_mach MIPS "" as -o mach_dep.o $(srcdir)/mips_mach_dep.s
./if_mach RS6000 "" as -o mach_dep.o $(srcdir)/rs6000_mach_dep.s
./if_mach ALPHA "" as -o mach_dep.o $(srcdir)/alpha_mach_dep.s
+ ./if_mach SPARC SUNOS5 as -o mach_dep.o $(srcdir)/sparc_mach_dep.s
./if_not_there mach_dep.o $(CC) -c $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+mark_roots.o: $(srcdir)/mark_roots.c
+ rm -f mark_roots.o
+ ./if_mach ALPHA "" $(CC) -c $(CFLAGS) -Wo,-notail $(srcdir)/mark_roots.c
+ ./if_not_there mark_roots.o $(CC) -c $(CFLAGS) $(srcdir)/mark_roots.c
+# work-around for DEC optimizer tail recursion elimination bug
+
+cord/cord_basics.o: $(srcdir)/cord/cord_basics.c $(INCLUDE_FILES)
+ $(CC) $(CFLAGS) -c -o cord/cord_basics.o $(srcdir)/cord/cord_basics.c
+
+cord/cord_extras.o: $(srcdir)/cord/cord_extras.c $(INCLUDE_FILES)
+ $(CC) $(CFLAGS) -c -o cord/cord_extras.o $(srcdir)/cord/cord_extras.c
+
+cord/cord_test: $(srcdir)/cord/cord_test.c $(CORD_OBJS) gc.a
+ $(CC) $(CFLAGS) -o cord/cord_test $(srcdir)/cord/cord_test.c $(CORD_OBJS) gc.a
+
+cord/de: $(srcdir)/cord/de.c $(CORD_OBJS) gc.a
+ $(CC) $(CFLAGS) -o cord/de $(srcdir)/cord/de.c $(CORD_OBJS) gc.a $(CURSES)
+
if_mach: $(srcdir)/if_mach.c $(srcdir)/config.h
$(CC) $(CFLAGS) -o if_mach $(srcdir)/if_mach.c
clean:
rm -f gc.a test.o gctest output-local output-diff $(OBJS) \
- setjmp_test mon.out gmon.out a.out core if_not_there if_mach
+ setjmp_test mon.out gmon.out a.out core if_not_there if_mach \
+ $(CORD_OBJS) cord/cord_test cord/de
-rm -f *~
gctest: test.o gc.a if_mach if_not_there
rm -f gctest
- ./if_mach ALPHA "" $(CC) $(CFLAGS) -o gctest -non_shared test.o gc.a
+ ./if_mach ALPHA "" $(CC) $(CFLAGS) -o gctest $(ALPHACFLAGS) test.o gc.a
./if_not_there gctest $(CC) $(CFLAGS) -o gctest test.o gc.a
# If an optimized setjmp_test generates a segmentation fault,
# Try compiling setjmp_test unoptimized.
setjmp_test: $(srcdir)/setjmp_test.c $(srcdir)/gc.h if_mach if_not_there
rm -f setjmp_test
- ./if_mach ALPHA "" $(CC) $(CFLAGS) -o setjmp_test -non_shared $(srcdir)/setjmp_test.c
+ ./if_mach ALPHA "" $(CC) $(CFLAGS) -o setjmp_test $(ALPHACFLAGS) $(srcdir)/setjmp_test.c
./if_not_there setjmp_test $(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_test.c
test: setjmp_test gctest
./setjmp_test
./gctest
+ make cord/cord_test
+ cord/cord_test
tar:
tar cvf gc.tar $(SRCS) Makefile PCR-Makefile OS2_MAKEFILE README test.c setjmp_test.c \
- SMakefile.amiga SCoptions.amiga README.amiga
+ SMakefile.amiga SCoptions.amiga README.amiga cord/README include/gc.h
compress gc.tar
lint: $(CSRCS) test.c
provided the above notices are retained on all copies.
-This is version 3.3. Note that functions were renamed since version 1.9
+This is version 3.6. Note that functions were renamed since version 1.9
to make naming consistent with PCR collectors.
HISTORY -
Early versions of this collector were developed as a part of research
projects supported in part by the National Science Foundation
and the Defense Advance Research Projects Agency.
+Much of the code was rewritten by Hans-J. Boehm at Xerox PARC.
The SPARC specific code was contributed by Mark Weiser
(weiser@parc.xerox.com). The Encore Multimax modifications were supplied by
Kevin Kenny (kenny@m.cs.uiuc.edu). The adaptation to the RT is largely due
specific code. Manuel Serrano (serrano@cornas.inria.fr) supplied linux and
Sony News specific code. Al Dosser provided Alpha/OSF/1 code. He and
Dave Detlefs(detlefs@src.dec.com) also provided several generic bug fixes.
+Alistair G. Crooks(agc@uts.amdahl.com) supplied the NetBSD and 386BSD ports.
+Brent Benson (brent@jade.ssd.csd.harris.com) ported the collector to
+a Motorola 88K processor running CX/UX (Harris NightHawk).
+Ari Huttunen (Ari.Huttunen@hut.fi) generalized the OS/2 port to
+nonIBM development environments (a nontrivial task).
David Chase, then at Olivetti Research, suggested several improvements.
(Blame for misinstallation of these modifications goes to the first author,
however.)
- Much of the code was rewritten by Hans-J. Boehm at Xerox PARC.
-
- This is intended to be a general purpose, garbage collecting storage
+ This is intended to be a general purpose, garbage collecting storage
allocator. The algorithms used are described in:
Boehm, H., and M. Weiser, "Garbage Collection in an Uncooperative Environment",
version 8 UNIX (tm).) However none of this work appears to have been widely
disseminated.
- Rudimentary tools for use of the collector as a leak detector are included.
+ Rudimentary tools for use of the collector as a leak detector are included, as
+is a fairly sophisticated string package "cord" that makes use of the collector.
+(See cord/README.)
GENERAL DESCRIPTION
pointer to the beginning of every accessible object, in addition to any
interior pointers.) There are two facilities for altering this behavior.
The macro ALL_INTERIOR_POINTERS may be defined in gc_private.h to
-cause any pointer into an object to retain the object. A routine
-GC_register_displacement is provided to allow for more controlled
-interior pointer use in the heap. Defining ALL_INTERIOR_POINTERS
-is somewhat dangerous. See gc_private.h for details. The routine
+cause any pointer into an object (or one past the end) to retain the
+object. A routine GC_register_displacement is provided to allow for
+more controlled interior pointer use in the heap. Defining
+ALL_INTERIOR_POINTERS is somewhat dangerous, in that it can result
+in unnecessary memroy retention. However this is much less of a
+problem than with older collector versions. The routine
GC_register_displacement is described in gc.h.
Note that pointers inside memory allocated by the standard "malloc" are not
region may be prematurely deallocated. It is thus suggested that the
standard "malloc" be used only for memory regions, such as I/O buffers, that
are guaranteed not to contain pointers. Pointers in C language automatic,
-static, or register variables, are correctly recognized.
+static, or register variables, are correctly recognized. (Note that
+GC_malloc_uncollectable has semantics similar to standard malloc,
+but allocates objects that are traced by the collector.)
The collector does not generally know how to find pointers in data
areas that are associated with dynamic libraries. This is easy to
remedy IF you know how to find those data areas on your operating
-system (see GC_add_roots). Code for doing this under SunOS4.X only is
-included (see dynamic_load.c). (Note that it includes a special version
-of dlopen, GC_dlopen, that should be called instead of the standard one.
-By default, this is not compiled in, since it requires the -ldl library.)
-Note that the garbage collector does not need to be informed of shared
+system (see GC_add_roots). Code for doing this under SunOS and IRIX 5.X is
+included (see dynamic_load.c).
+
+ Note that the garbage collector does not need to be informed of shared
read-only data. However if the shared library mechanism can introduce
discontiguous data areas that may contain pointers, then the collector does
need to be informed.
standard ANSI C mallocs, it is intended to be safe to invoke malloc
from a signal handler while another malloc is in progress, provided
the original malloc is not restarted. (Empirically, many UNIX
-applications already asssume this.) The allocator/collector can
-also be configured for thread-safe operation. (Full signal safety can
-also be acheived, but only at the cost of two system calls per malloc,
-which is usually unacceptable.)
+applications already asssume this.) Even this modest level of signal-
+safety may be too expensive on some systems. If so, ENABLE_SIGNALS
+and DISABLE_SIGNALS may be redefined to the empty statement in gc_private.h.
+
+ The allocator/collector can also be configured for thread-safe operation.
+(Full signal safety can also be acheived, but only at the cost of two system
+calls per malloc, which is usually unacceptable.)
INSTALLATION AND PORTABILITY
a machine that's not already supported. Gctest is a somewhat superficial
test of collector functionality. Failure is indicated by a core dump or
a message to the effect that the collector is broken. Gctest takes about
-20 seconds to run on a SPARCstation 2. On a slower machine,
+35 seconds to run on a SPARCstation 2. On a slower machine,
expect it to take a while. It may use up to 8 MB of memory. (The
-multi-threaded version will use more.)
+multi-threaded version will use more.) "Make test" will also, as
+its last step, attempt to build and test the "cord" string library.
+This will fail without an ANSI C compiler.
The Makefile will generate a library gc.a which you should link against.
+Typing "make cords" will add the cord library to gc.a.
It is suggested that if you need to replace a piece of the collector
(e.g. GC_mark_roots.c) you simply list your version ahead of gc.a on the
ld command line, rather than replacing the one in gc.a. (This will
generate numerous warnings under some versions of AIX, but it still
works.)
+ All include files that need to be used by clients will be put in the
+include subdirectory. (Normally this is just gc.h. "Make cord" adds
+"cord.h" and "ec.h".)
+
The collector currently is designed to run essentially unmodified on
the following machines:
Sun 3
Sun 4 under SunOS 4.X or Solaris2.X
Vax under 4.3BSD, Ultrix
- Intel 386 or 486 under OS/2 (single threaded) or linux.
+ Intel 386 or 486 under many operating systems, but not MSDOS.
Sequent Symmetry (single threaded)
Encore Multimax (single threaded)
MIPS M/120 (and presumably M/2000) (RISC/os 4.0 with BSD libraries)
On some machines, it is difficult to obtain such a value that is
valid across a variety of MMUs, OS releases, etc. A number of
alternatives exist for using the collector in spite of this. See the
- discussion in gc_private.h immediately preceding the various
+ discussion in config.h.h immediately preceding the various
definitions of STACKBOTTOM.
2. mach_dep.c.
per MB of accessible memory that needs to be scanned. Your mileage
may vary.) The incremental/generational collection facility helps,
but is portable only if "stubborn" allocation is used.
+ Please address bug reports to boehm@xerox.com. If you are contemplating
+a major addition, you might also send mail to ask whether it's already
+been done.
RECENT VERSIONS:
Version 1.5 and earlier did not ensure 8 byte alignment for objects
allocated on a sparc based machine.
- Please address bug reports to boehm@xerox.com. If you are contemplating
-a major addition, you might also send mail to ask whether it's already
-been done.
-
Version 1.8 added ULTRIX support in gc_private.h.
Version 1.9 fixed a major bug in gc_realloc.
GC_unregister_disappearing_link.
All of the above were pointed out by Neil Sharman
(neil@cs.mu.oz.au).
-- Common symbols allocated by the SunOS4.X dynamic loader were not included in the root set.
+- Common symbols allocated by the SunOS4.X dynamic loader
+ were not included in the root set.
- Bug in GC_finalize (reported by Brian Beuning and Al Dosser)
- Merged Amiga port from Jesper Peterson (untested)
-- Merged NeXT port from Thomas Funke (significantly modified and untested)
-
\ No newline at end of file
+- Merged NeXT port from Thomas Funke (significantly
+ modified and untested)
+
+ Version 3.4:
+- Fixed a performance bug in GC_realloc.
+- Updated the amiga port.
+- Added NetBSD and 386BSD ports.
+- Added cord library.
+- Added trivial performance enhancement for
+ ALL_INTERIOR_POINTERS. (Don't scan last word.)
+
+ Version 3.5
+- Minor collections now mark from roots only once, if that
+ doesn't cause an excessive pause.
+- The stack clearing heuristic was refined to prevent anomalies
+ with very heavily recursive programs and sparse stacks.
+- Fixed a bug that prevented mark stack growth in some cases.
+ GC_objects_are_marked should be set to TRUE after a call
+ to GC_push_roots and as part of GC_push_marked, since
+ both can now set mark bits. I think this is only a performance
+ bug, but I wouldn't bet on it. It's certainly very hard to argue
+ that the old version was correct.
+- Fixed an incremental collection bug that prevented it from
+ working at all when HBLKSIZE != getpagesize()
+- Changed dynamic_loading.c to include gc_private.h before testing
+ DYNAMIC_LOADING. SunOS dynamic library scanning
+ must have been broken in 3.4.
+- Object size rounding now adapts to program behavior.
+- Added a workaround (provided by Manuel Serrano and
+ colleagues) to a long-standing SunOS 4.X (and 3.X?) ld bug
+ that I had incorrectly assumed to have been squished.
+ The collector was broken if the text segment size was within
+ 32 bytes of a multiple of 8K bytes, and if the beginning of
+ the data segment contained interesting roots. The workaround
+ assumes a demand-loadable executable. The original may have
+ have "worked" in some other cases.
+- Added dynamic library support under IRIX5.
+- Added support for EMX under OS/2 (thanks to Ari Huttunen).
+
+Version 3.6:
+- fixed a bug in the mark stack growth code that was introduced
+ in 3.4.
+- fixed Makefile to work around DEC AXP compiler tail recursion
+ bug.
reclaim.o : reclaim.c $(INC)
allochblk.o : allochblk.c $(INC)
misc.o : misc.c $(INC)
-mach_dep.o : mach_dep.c $(INC)
os_dep.o : os_dep.c $(INC)
mark_roots.o : mark_roots.c $(INC)
headers.o : headers.c $(INC)
checksums.o : checksums.c $(INC)
test.o : test.c $(INC)
+mach_dep.o : mach_dep.c $(INC)
+ sc noopt mach_dep.c # optimizer mangles reg save hack
+
gc.lib: $(OBJS)
oml gc.lib r $(OBJS)
* provided the above notices are retained on all copies.
*
*/
+/* Boehm, November 18, 1993 12:30 pm PST */
# include <stdio.h>
int GC_incremental = 0; /* By default, stop the world. */
-int GC_full_freq = 3; /* Every 4th collection is a full */
+int GC_full_freq = 4; /* Every 5th collection is a full */
/* collection. */
char * GC_copyright[] =
}
-/* Clear up a few frames worth og garbage left at the top of the stack. */
+/* Clear up a few frames worth of garbage left at the top of the stack. */
/* This is used to prevent us from accidentally treating garbade left */
/* on the stack by other parts of the collector as roots. This */
/* differs from the code in misc.c, which actually tries to keep the */
GC_initiate_full();
n_partial_gcs = 0;
} else {
- GC_initiate_partial(GC_gc_no+1);
+ /* We try to mark with the world stopped. */
+ /* If we run out of time, this turns into */
+ /* incremental marking. */
+ if (GC_stopped_mark(FALSE)) GC_finish_collection();
n_partial_gcs++;
}
}
GC_promote_black_lists();
/* GC_reclaim_or_delete_all(); -- not needed: no intervening allocation */
GC_clear_marks();
- STOP_WORLD();
- GC_stopped_mark();
- START_WORLD();
+ (void) GC_stopped_mark(TRUE);
GC_finish_collection();
}
* roughly a GC_RATE pages. Every once in a while, we do more than that.
*/
# define GC_RATE 8
+
+int GC_deficit = 0; /* The number of extra calls to GC_mark_some */
+ /* that we have made. */
+ /* Negative values are equivalent to 0. */
void GC_collect_a_little(n)
int n;
{
register int i;
if (GC_collection_in_progress()) {
- for (i = 0; i < GC_RATE*n; i++) {
+ for (i = GC_deficit; i < GC_RATE*n; i++) {
if (GC_mark_some()) {
/* Need to finish a collection */
- STOP_WORLD();
- GC_stopped_mark();
- START_WORLD();
+ (void) GC_stopped_mark(TRUE);
GC_finish_collection();
break;
}
}
+ if (GC_deficit > 0) GC_deficit -= GC_RATE*n;
} else {
GC_maybe_gc();
}
}
/*
- * World-stopped mark phase. Assumes lock is held, signals are disabled,
- * and the world is stopped.
+ * Assumes lock is held, signals are disabled.
+ * We stop the world.
+ * If final is TRUE, then we finish the collection, no matter how long
+ * it takes.
+ * Otherwise we may fail and return FALSE if this takes too long.
+ * Increment GC_gc_no if we succeed.
*/
-void GC_stopped_mark()
+bool GC_stopped_mark(final)
+bool final;
{
-# ifdef PRINTTIMES
- CLOCK_TYPE start_time;
- CLOCK_TYPE done_time;
+ CLOCK_TYPE start_time;
+ CLOCK_TYPE current_time;
+ unsigned long time_diff;
+ register int i;
- GET_TIME(start_time);
-# endif
+ GET_TIME(start_time);
+ STOP_WORLD();
# ifdef PRINTSTATS
- GC_printf2("Collection %lu reclaimed %ld bytes\n",
- (unsigned long) GC_gc_no,
- (long)WORDS_TO_BYTES(GC_mem_found));
+ GC_printf1("--> Marking for collection %lu ",
+ (unsigned long) GC_gc_no + 1);
+ GC_printf2("after %lu allocd bytes + %lu wasted bytes\n",
+ (unsigned long) WORDS_TO_BYTES(GC_words_allocd),
+ (unsigned long) WORDS_TO_BYTES(GC_words_wasted));
# endif
+
+ /* Mark from all roots. */
+ /* Minimize junk left in my registers and on the stack */
+ GC_clear_a_few_frames();
+ GC_noop(0,0,0,0,0,0);
+ GC_initiate_partial();
+ for(i = 0;;i++) {
+ if (GC_mark_some()) break;
+ if (final) continue;
+ if ((i & 3) == 0) {
+ GET_TIME(current_time);
+ time_diff = MS_TIME_DIFF(current_time,start_time);
+ if (time_diff >= TIME_LIMIT) {
+ START_WORLD();
+# ifdef PRINTSTATS
+ GC_printf0("Abandoning stopped marking after ");
+ GC_printf2("%lu iterations and %lu msecs\n",
+ (unsigned long)i,
+ (unsigned long)time_diff);
+# endif
+ GC_deficit = i; /* Give the mutator a chance. */
+ return(FALSE);
+ }
+ }
+ }
+
GC_gc_no++;
# ifdef PRINTSTATS
- GC_printf3(
- "--> Collection number %lu after %lu allocated + %lu wasted bytes\n",
- (unsigned long) GC_gc_no,
- (unsigned long) WORDS_TO_BYTES(GC_words_allocd),
- (unsigned long) WORDS_TO_BYTES(GC_words_wasted));
- GC_printf1("---> heapsize = %lu bytes\n",
+ GC_printf2("Collection %lu reclaimed %ld bytes",
+ (unsigned long) GC_gc_no - 1,
+ (long)WORDS_TO_BYTES(GC_mem_found));
+ GC_printf1(" ---> heapsize = %lu bytes\n",
(unsigned long) GC_heapsize);
/* Printf arguments may be pushed in funny places. Clear the */
/* space. */
GC_printf0("");
-# endif
-
- /* Mark from all roots. */
- /* Minimize junk left in my registers and on the stack */
- GC_clear_a_few_frames();
- GC_noop(0,0,0,0,0,0);
- GC_initiate_partial(GC_gc_no);
- while(!GC_mark_some());
+# endif
/* Check all debugged objects for consistency */
if (GC_debugging_started) {
}
# ifdef PRINTTIMES
- GET_TIME(done_time);
+ GET_TIME(current_time);
GC_printf1("World-stopped marking took %lu msecs\n",
- MS_TIME_DIFF(done_time,start_time));
+ MS_TIME_DIFF(current_time,start_time));
# endif
-
+ START_WORLD();
+ return(TRUE);
}
word words;
if (GC_n_heap_sects >= MAX_HEAP_SECTS) {
- GC_err_printf0(
- "Too many heap sections: Increase MAXHINCR or MAX_HEAP_SECTS");
ABORT("Too many heap sections: Increase MAXHINCR or MAX_HEAP_SECTS");
}
if (!GC_install_header(p)) {
# define NEXT
# define mach_type_known
# endif
+# if defined(__NetBSD__) && defined(i386)
+# define I386
+# define NETBSD
+# define mach_type_known
+# endif
+# if !defined(mach_type_known) && defined(__386BSD__)
+# define I386
+# define THREE86BSD
+# define mach_type_known
+# endif
+# if defined(_CX_UX) && defined(_M88K)
+# define M88K
+# define CX_UX
+# define mach_type_known
+# endif
/* Feel free to add more clauses here */
/* (SUNOS4,HP,NEXT, and SYSV (A/UX), */
/* and AMIGA variants) */
/* I386 ==> Intel 386 */
- /* (SEQUENT, OS2, SCO, LINUX variants) */
- /* SCO is incomplete. */
+ /* (SEQUENT, OS2, SCO, LINUX, NETBSD, */
+ /* THREE86BSD variants, */
+ /* some are incomplete or untested) */
/* NS32K ==> Encore Multimax */
/* MIPS ==> R2000 or R3000 */
/* (RISCOS, ULTRIX variants) */
/* SPARC ==> SPARC under SunOS */
/* (SUNOS4, SUNOS5 variants) */
/* ALPHA ==> DEC Alpha OSF/1 */
+ /* M88K ==> Motorola 88XX0 */
+ /* (CX/UX so far) */
/*
# ifdef SUNOS5
# define OS_TYPE "SUNOS5"
# define DATASTART ((ptr_t)((((word) (&etext)) + 0x10003) & ~0x3))
- /* Experimentally determined. */
- /* Inconsistent with man a.out, which appears */
- /* to be wrong. */
# define PROC_VDB
# endif
# ifdef SUNOS4
# define OS_TYPE "SUNOS4"
-# define DATASTART ((ptr_t)((((word) (&etext)) + 0xfff) & ~0xfff))
- /* On very old SPARCs this is too conservative. */
+ /* [If you have a weak stomach, don't read this.] */
+ /* We would like to use: */
+/* # define DATASTART ((ptr_t)((((word) (&etext)) + 0x1fff) & ~0x1fff)) */
+ /* This fails occasionally, due to an ancient, but very */
+ /* persistent ld bug. &etext is set 32 bytes too high. */
+ /* We instead read the text segment size from the a.out */
+ /* header, which happens to be mapped into our address space */
+ /* at the start of the text segment. The detective work here */
+ /* was done by Robert Ehrlich, Manuel Serrano, and Bernard */
+ /* Serpette of INRIA. */
+ /* This assumes ZMAGIC, i.e. demand-loadable executables. */
+# define DATASTART ((ptr_t)(*(int *)0x2004+0x2000))
# define MPROTECT_VDB
# endif
# define HEURISTIC1
# ifdef I386
# define MACH_TYPE "I386"
-# define ALIGNMENT 4 /* 32-bit compilers align pointers */
+# define ALIGNMENT 4 /* Appears to hold for all "32 bit" compilers */
# ifdef SEQUENT
# define OS_TYPE "SEQUENT"
extern int etext;
/* STACKBOTTOM is handled specially in GC_init_inner. */
/* OS2 actually has the right system call! */
# endif
+# ifdef NETBSD
+# define OS_TYPE "NETBSD"
+# define HEURISTIC2
+ extern char etext;
+# define DATASTART ((ptr_t)(&etext))
+# endif
+# ifdef THREE86BSD
+# define OS_TYPE "THREE86BSD"
+# define ALIGNMENT 4
+ extern char etext;
+# define DATASTART ((ptr_t)(&etext))
+# endif
# endif
# ifdef NS32K
# define MPROTECT_VDB
# endif
+# ifdef M88K
+# define MACH_TYPE "M88K"
+# define ALIGNMENT 4
+# define DATASTART ((((word)&etext + 0x3fffff) & ~0x3fffff) + 0x10000)
+# define STACKBOTTOM ((char*)0xf0000000) /* determined empirically */
+# endif
+
# ifndef STACK_GROWS_UP
# define STACK_GROWS_DOWN
# endif
--- /dev/null
+Copyright (c) 1993 by Xerox Corporation. All rights reserved.
+
+THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+Permission is hereby granted to copy this garbage collector for any purpose,
+provided the above notices are retained on all copies.
+
+Please send bug reports to Hans-J. Boehm (boehm@parc.xerox.com).
+
+This is a string packages that uses a tree-based representation.
+See gc.h for a description of the functions provided. Ec.h describes
+"extensible cords", which are essentially output streams that write
+to a cord. These allow for efficient construction of cords without
+requiring a bound on the size of a cord.
+
+de.c is a very dumb text editor that illustrates the use of cords.
+It maintains a list of file versions. Each version is simply a
+cord representing the file contents. Nonetheless, standard
+editing operations are efficient, even on very large files.
+(Its 3 line "user manual" can be obtained by invoking it without
+arguments. Note that ^R^N and ^R^P move the cursor by
+about half a screen. It does not understand tabs, which will show
+up as highlighred "I"s. Use the UNIX "expand" program first.)
+To build the editor, type "make cord/de" in the gc directory.
+
+This package assumes an ANSI C compiler such as gcc. It will
+not compile with an old-style K&R compiler.
--- /dev/null
+/*
+ * Copyright (c) 1993 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to copy this garbage collector for any purpose,
+ * provided the above notices are retained on all copies.
+ *
+ * Author: Hans-J. Boehm (boehm@parc.xerox.com)
+ */
+
+/*
+ * Cords are immutable character strings. A number of operations
+ * on long cords are much more efficient than their strings.h counterpart.
+ * In particular, concatenation takes constant time independent of the length
+ * of the arguments. (Cords are represented as trees, with internal
+ * nodes representing concatenation and leaves consisting of either C
+ * strings or a functional description of the string.)
+ *
+ * The following are reasonable applications of cords. They would perform
+ * unacceptably if C strings were used:
+ * - A compiler that produces assembly language output by repeatedly
+ * concatenating instructions onto a cord representing the output file.
+ * - A text editor that converts the input file to a cord, and then
+ * performs editing operations by producing a new cord representing
+ * the file after echa character change (and keeping the old ones in an
+ * edit history)
+ *
+ * For optimal performance, cords should be built by
+ * concatenating short sections.
+ * This interface is designed for maximum compatibility with C strings.
+ * ASCII NUL characters may be embedded in cords using CORD_from_fn.
+ * This is handled correctly, but CORD_to_char_star will produce a string
+ * with embedded NULs when given such a cord.
+ */
+# ifndef CORD_H
+
+# define CORD_H
+# include <stddef.h>
+# include <stdio.h>
+/* Cords have type const char *. This is cheating quite a bit, and not */
+/* 100% portable. But it means that nonempty character string */
+/* constants may be used as cords directly, provided the string is */
+/* never modified in place. The empty cord is represented by, and */
+/* can be written as, 0. */
+
+typedef const char * CORD;
+
+/* An empty cord is always represented as nil */
+# define CORD_EMPTY 0
+
+/* Is a nonempty cord represented as a C string? */
+#define IS_STRING(s) (*(s) != '\0')
+
+/* Concatenate two cords. If the arguments are C strings, they may */
+/* not be subsequently altered. */
+CORD CORD_cat(CORD x, CORD y);
+
+/* Concatenate a cord and a C string with known length. Except for the */
+/* empty string case, this is a special case of CORD_cat. Since the */
+/* length is known, it can be faster. */
+CORD CORD_cat_char_star(CORD x, const char * y, size_t leny);
+
+/* Compute the length of a cord */
+size_t CORD_len(CORD x);
+
+/* Cords may be represented by functions defining the ith character */
+typedef char (* CORD_fn)(size_t i, void * client_data);
+
+/* Turn a functional description into a cord. */
+CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len);
+
+/* Return the substring (subcord really) of x with length at most n, */
+/* starting at position i. (The initial character has position 0.) */
+CORD CORD_substr(CORD x, size_t i, size_t n);
+
+/* Return the argument, but rebalanced to allow more efficient */
+/* character retrieval, substring operations, and comparisons. */
+/* This is useful only for cords that were built using repeated */
+/* concatenation. Guarantees log time access to the result, unless */
+/* x was obtained through a large number of repeated substring ops */
+/* or the embedded functional descriptions take longer to evaluate. */
+/* May reallocate significant parts of the cord. The argument is not */
+/* modified; only the result is balanced. */
+CORD CORD_balance(CORD x);
+
+/* The following traverse a cord by applying a function to each */
+/* character. This is occasionally appropriate, especially where */
+/* speed is crucial. But, since C doesn't have nested functions, */
+/* clients of this sort of traversal are clumsy to write. Consider */
+/* the functions that operate on cord positions instead. */
+
+/* Function to iteratively apply to individual characters in cord. */
+typedef int (* CORD_iter_fn)(char c, void * client_data);
+
+/* Function to apply to substrings of a cord. Each substring is a */
+/* a C character string, not a general cord. */
+typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data);
+# define CORD_NO_FN ((CORD_batched_iter_fn)0)
+
+/* Apply f1 to each character in the cord, in ascending order, */
+/* starting at position i. If */
+/* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by */
+/* a single call to f2. The parameter f2 is provided only to allow */
+/* some optimization by the client. This terminates when the right */
+/* end of this string is reached, or when f1 or f2 return != 0. In the */
+/* latter case CORD_iter returns != 0. Otherwise it returns 0. */
+/* The specified value of i must be < CORD_len(x). */
+int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
+ CORD_batched_iter_fn f2, void * client_data);
+
+/* A simpler version that starts at 0, and without f2: */
+int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data);
+# define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd)
+
+/* Similar to CORD_iter5, but end-to-beginning. No provisions for */
+/* CORD_batched_iter_fn. */
+int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data);
+
+/* A simpler version that starts at the end: */
+int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data);
+
+/* Functions that operate on cord positions. The easy way to traverse */
+/* cords. A cord position is logically a pair consisting of a cord */
+/* and an index into that cord. But it is much faster to retrieve a */
+/* charcter based on a position than on an index. Unfortunately, */
+/* positions are big (order of a few 100 bytes), so allocate them with */
+/* caution. */
+/* Things in cord_position.h should be treated as opaque, except as */
+/* described below. Also note that */
+/* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function */
+/* definitions. The former may evaluate their argument more than once. */
+# include "cord_position.h"
+
+/*
+ Visible definitions from above:
+
+ typedef <OPAQUE but fairly big> CORD_pos[1];
+
+ /* Extract the cord from a position:
+ CORD CORD_pos_to_cord(CORD_pos p);
+
+ /* Extract the current index from a position:
+ size_t CORD_pos_to_index(CORD_pos p);
+
+ /* Fetch the character located at the given position:
+ char CORD_pos_fetch(register CORD_pos p);
+
+ /* Initialize the position to refer to the give cord and index.
+ /* Note that this is the most expensive function on positions:
+ void CORD_set_pos(CORD_pos p, CORD x, size_t i);
+
+ /* Advance the position to the next character.
+ /* P must be initialized and valid.
+ /* Invalidates p if past end:
+ void CORD_next(CORD_pos p);
+
+ /* Move the position to the preceding character.
+ /* P must be initialized and valid.
+ /* Invalidates p if past beginning:
+ void CORD_next(CORD_pos p);
+
+ /* Is the position valid, i.e. inside the cord?
+ int CORD_pos_valid(CORD_pos p);
+*/
+# define CORD_FOR(pos, cord) \
+ for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos))
+
+
+/* An out of memory handler to call. May be supplied by client. */
+/* Must not return. */
+extern void (* CORD_oom_fn)(void);
+
+/* Dump the representation of x to stdout in an implementation defined */
+/* manner. Intended for debugging only. */
+void CORD_dump(CORD x);
+
+/* The following could easily be implemented by the client. They are */
+/* provided in cord_extras.c for convenience. */
+
+/* Return the character in CORD_substr(x, i, 1) */
+char CORD_fetch(CORD x, size_t i);
+
+/* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y */
+int CORD_cmp(CORD x, CORD y);
+
+/* Return a cord consisting of i ASCII NULs. Dangerous in */
+/* conjunction with CORD_to_char_star. */
+CORD CORD_nul(size_t i);
+
+/* Turn a file into cord. The file must be seekable. Its contents */
+/* must remain constant. The file may be accessed as an immediate */
+/* result of this call and/or as a result of subsequent accesses to */
+/* the cord. Short files are likely to be immediately read, but */
+/* long files are likely to be read on demand, possibly relying on */
+/* stdio for buffering. */
+/* We must have exclusive access to the descriptor f, i.e. we may */
+/* read it at any time, and expect the file pointer to be */
+/* where we left it. Normally this should be invoked as */
+/* CORD_from_file(fopen(...)) */
+/* CORD_from_file arranges to close the file descriptor when it is no */
+/* longer needed (e.g. when the result becomes inaccessible). */
+CORD CORD_from_file(FILE * f);
+
+/* Equivalent to the above, except that the entire file will be read */
+/* and the file pointer will be closed immediately. */
+CORD CORD_from_file_eager(FILE * f);
+
+/* Equivalent to the above, except that the file will be read on demand.*/
+CORD CORD_from_file_lazy(FILE * f);
+
+/* Turn a cord into a C string. The result shares no structure with */
+/* x, and is thus modifiable. */
+char * CORD_to_char_star(CORD x);
+
+/* Write a cord to a file, starting at the current position. No */
+/* trailing NULs are newlines are added. */
+/* Returns EOF if a write error occurs, 1 otherwise. */
+int CORD_put(CORD x, FILE * f);
+
+/* "Not found" result for the following two functions. */
+# define CORD_NOT_FOUND ((size_t)(-1))
+
+/* A vague analog of strchr. Returns the position (an integer, not */
+/* a pointer) of the first occurrence of (char) c inside x at position */
+/* i or later. The value i must be < CORD_len(x). */
+size_t CORD_chr(CORD x, size_t i, int c);
+
+/* A vague analog of strrchr. Returns index of the last occurrence */
+/* of (char) c inside x at position i or earlier. The value i */
+/* must be < CORD_len(x). */
+size_t CORD_rchr(CORD x, size_t i, int c);
+# endif /* CORD_H */
--- /dev/null
+/*
+ * Copyright (c) 1993 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to copy this code for any purpose,
+ * provided the above notices are retained on all copies.
+ *
+ * Author: Hans-J. Boehm (boehm@parc.xerox.com)
+ */
+# include "../gc.h"
+# include "cord.h"
+# include <stdio.h>
+# include <string.h>
+
+/* An implementation of the cord primitives. These are the only */
+/* Functions that understand the representation. We perform only */
+/* minimal checks on arguments to these functions. Out of bounds */
+/* arguments to the iteration functions may result in client functions */
+/* invoked on garbage data. In most cases, client functions should be */
+/* programmed defensively enough that this does not result in memory */
+/* smashes. */
+
+typedef void (* oom_fn)(void);
+
+oom_fn CORD_oom_fn = (oom_fn) 0;
+
+# define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
+ abort("Out of memory\n"); }
+# define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
+
+typedef unsigned long word;
+
+typedef union {
+ struct Concatenation {
+ char null;
+ char header;
+ char depth; /* concatenation nesting depth. */
+ unsigned char left_len;
+ /* Length of left child if it is sufficiently */
+ /* short; 0 otherwise. */
+# define MAX_LEFT_LEN 255
+ word len;
+ CORD left; /* length(left) > 0 */
+ CORD right; /* length(right) > 0 */
+ } concatenation;
+ struct Function {
+ char null;
+ char header;
+ char depth; /* always 0 */
+ char left_len; /* always 0 */
+ word len;
+ CORD_fn fn;
+ void * client_data;
+ } function;
+ struct Generic {
+ char null;
+ char header;
+ char depth;
+ char left_len;
+ word len;
+ } generic;
+ char string[1];
+} CordRep;
+
+# define CONCAT_HDR 1
+
+# define FN_HDR 4
+# define SUBSTR_HDR 6
+ /* Substring nodes are a special case of function nodes. */
+ /* The client_data field is known to point to a substr_args */
+ /* structure, and the function is either CORD_apply_access_fn */
+ /* or CORD_index_access_fn. */
+
+/* The following may be applied only to function and concatenation nodes: */
+#define IS_CONCATENATION(s) (((CordRep *)s)->generic.header == CONCAT_HDR)
+
+#define IS_FUNCTION(s) ((((CordRep *)s)->generic.header & FN_HDR) != 0)
+
+#define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
+
+#define LEN(s) (((CordRep *)s) -> generic.len)
+#define DEPTH(s) (((CordRep *)s) -> generic.depth)
+#define GEN_LEN(s) (IS_STRING(s) ? strlen(s) : LEN(s))
+
+#define LEFT_LEN(c) ((c) -> left_len != 0? \
+ (c) -> left_len \
+ : (IS_STRING((c) -> left) ? \
+ (c) -> len - GEN_LEN((c) -> right) \
+ : LEN((c) -> left)))
+
+#define SHORT_LIMIT (sizeof(CordRep) - 1)
+ /* Cords shorter than this are C strings */
+
+
+/* Dump the internal representation of x to stdout, with initial */
+/* indentation level n. */
+void CORD_dump_inner(CORD x, unsigned n)
+{
+ register int i;
+
+ for (i = 0; i < n; i++) {
+ fputs(" ", stdout);
+ }
+ if (x == 0) {
+ fputs("NIL\n", stdout);
+ } else if (IS_STRING(x)) {
+ for (i = 0; i <= SHORT_LIMIT; i++) {
+ if (x[i] == '\0') break;
+ putchar(x[i]);
+ }
+ if (x[i] != '\0') fputs("...", stdout);
+ putchar('\n');
+ } else if (IS_CONCATENATION(x)) {
+ register struct Concatenation * conc =
+ &(((CordRep *)x) -> concatenation);
+ printf("Concatenation: %p (len: %d, depth: %d)\n",
+ x, (int)(conc -> len), (int)(conc -> depth));
+ CORD_dump_inner(conc -> left, n+1);
+ CORD_dump_inner(conc -> right, n+1);
+ } else /* function */{
+ register struct Function * func =
+ &(((CordRep *)x) -> function);
+ if (IS_SUBSTR(x)) printf("(Substring) ");
+ printf("Function: %p (len: %d): ", x, (int)(func -> len));
+ for (i = 0; i < 20 && i < func -> len; i++) {
+ putchar((*(func -> fn))(i, func -> client_data));
+ }
+ if (i < func -> len) fputs("...", stdout);
+ putchar('\n');
+ }
+}
+
+/* Dump the internal representation of x to stdout */
+void CORD_dump(CORD x)
+{
+ CORD_dump_inner(x, 0);
+ fflush(stdout);
+}
+
+CORD CORD_cat_char_star(CORD x, const char * y, size_t leny)
+{
+ register size_t result_len;
+ register size_t lenx;
+ register int depth;
+
+ if (x == CORD_EMPTY) return(y);
+ if (leny == 0) return(x);
+ if (IS_STRING(x)) {
+ lenx = strlen(x);
+ result_len = lenx + leny;
+ if (result_len <= SHORT_LIMIT) {
+ register char * result = GC_MALLOC_ATOMIC(result_len+1);
+
+ if (result == 0) OUT_OF_MEMORY;
+ memcpy(result, x, lenx);
+ memcpy(result + lenx, y, leny);
+ result[result_len] = '\0';
+ return((CORD) result);
+ } else {
+ depth = 1;
+ }
+ } else {
+ register CORD right;
+ register CORD left;
+ register char * new_right;
+ register size_t right_len;
+
+ lenx = LEN(x);
+
+ if (leny <= SHORT_LIMIT/2
+ && IS_CONCATENATION(x)
+ && IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
+ /* Merge y into right part of x. */
+ if (!IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
+ right_len = lenx - LEN(left);
+ } else if (((CordRep *)x) -> concatenation.left_len != 0) {
+ right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
+ } else {
+ right_len = strlen(right);
+ }
+ result_len = right_len + leny; /* length of new_right */
+ if (result_len <= SHORT_LIMIT) {
+ new_right = GC_MALLOC_ATOMIC(result_len + 1);
+ memcpy(new_right, right, right_len);
+ memcpy(new_right + right_len, y, leny);
+ new_right[result_len] = '\0';
+ y = new_right;
+ leny = result_len;
+ x = left;
+ lenx -= right_len;
+ /* Now fall through to concatenate the two pieces: */
+ }
+ if (IS_STRING(x)) {
+ depth = 1;
+ } else {
+ depth = DEPTH(x) + 1;
+ }
+ } else {
+ depth = DEPTH(x) + 1;
+ }
+ result_len = lenx + leny;
+ }
+ {
+ /* The general case; lenx, result_len is known: */
+ register struct Concatenation * result;
+
+ result = GC_NEW(struct Concatenation);
+ if (result == 0) OUT_OF_MEMORY;
+ result->header = CONCAT_HDR;
+ result->depth = depth;
+ if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
+ result->len = result_len;
+ result->left = x;
+ result->right = y;
+ if (depth > MAX_DEPTH) {
+ return(CORD_balance((CORD)result));
+ } else {
+ return((CORD) result);
+ }
+ }
+}
+
+
+CORD CORD_cat(CORD x, CORD y)
+{
+ register size_t result_len;
+ register int depth;
+ register size_t lenx;
+
+ if (x == CORD_EMPTY) return(y);
+ if (y == CORD_EMPTY) return(x);
+ if (IS_STRING(y)) {
+ return(CORD_cat_char_star(x, y, strlen(y)));
+ } else if (IS_STRING(x)) {
+ lenx = strlen(x);
+ depth = DEPTH(y) + 1;
+ } else {
+ register int depthy = DEPTH(y);
+
+ lenx = LEN(x);
+ depth = DEPTH(x) + 1;
+ if (depthy >= depth) depth = depthy + 1;
+ }
+ result_len = lenx + LEN(y);
+ {
+ register struct Concatenation * result;
+
+ result = GC_NEW(struct Concatenation);
+ if (result == 0) OUT_OF_MEMORY;
+ result->header = CONCAT_HDR;
+ result->depth = depth;
+ if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
+ result->len = result_len;
+ result->left = x;
+ result->right = y;
+ return((CORD) result);
+ }
+}
+
+
+
+CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
+{
+ if (len <= 0) return(0);
+ if (len <= SHORT_LIMIT) {
+ register char * result;
+ register int i;
+ char buf[SHORT_LIMIT+1];
+ register char c;
+
+ for (i = 0; i < len; i++) {
+ c = (*fn)(i, client_data);
+ if (c == '\0') goto gen_case;
+ buf[i] = c;
+ }
+ buf[i] = '\0';
+ result = GC_MALLOC_ATOMIC(len+1);
+ if (result == 0) OUT_OF_MEMORY;
+ strcpy(result, buf);
+ result[len] = '\0';
+ return((CORD) result);
+ }
+ gen_case:
+ {
+ register struct Function * result;
+
+ result = GC_NEW(struct Function);
+ if (result == 0) OUT_OF_MEMORY;
+ result->header = FN_HDR;
+ /* depth is already 0 */
+ result->len = len;
+ result->fn = fn;
+ result->client_data = client_data;
+ return((CORD) result);
+ }
+}
+
+size_t CORD_len(CORD x)
+{
+ if (x == 0) {
+ return(0);
+ } else {
+ return(GEN_LEN(x));
+ }
+}
+
+struct substr_args {
+ CordRep * sa_cord;
+ size_t sa_index;
+};
+
+char CORD_index_access_fn(size_t i, void * client_data)
+{
+ register struct substr_args *descr = (struct substr_args *)client_data;
+
+ return(((char *)(descr->sa_cord))[i + descr->sa_index]);
+}
+
+char CORD_apply_access_fn(size_t i, void * client_data)
+{
+ register struct substr_args *descr = (struct substr_args *)client_data;
+ register struct Function * fn_cord = &(descr->sa_cord->function);
+
+ return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
+}
+
+/* A version of CORD_substr that simply returns a function node, thus */
+/* postponing its work. The fourth argument is a function that may */
+/* be used for efficient access to the ith character. */
+/* Assumes i >= 0 and i + n < length(x). */
+CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
+{
+ register struct substr_args * sa = GC_NEW(struct substr_args);
+ CORD result;
+
+ if (sa == 0) OUT_OF_MEMORY;
+ sa->sa_cord = (CordRep *)x;
+ sa->sa_index = i;
+ result = CORD_from_fn(f, (void *)sa, n);
+ ((CordRep *)result) -> function.header = SUBSTR_HDR;
+ return (result);
+}
+
+# define SUBSTR_LIMIT (10 * SHORT_LIMIT)
+ /* Substrings of function nodes and flat strings shorter than */
+ /* this are flat strings. Othewise we use a functional */
+ /* representation, which is significantly slower to access. */
+
+/* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
+CORD CORD_substr_checked(CORD x, size_t i, size_t n)
+{
+ if (IS_STRING(x)) {
+ if (n > SUBSTR_LIMIT) {
+ return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
+ } else {
+ register char * result = GC_MALLOC_ATOMIC(n+1);
+ register char * p = result;
+
+ if (result == 0) OUT_OF_MEMORY;
+ strncpy(result, x+i, n);
+ result[n] = '\0';
+ return(result);
+ }
+ } else if (IS_CONCATENATION(x)) {
+ register struct Concatenation * conc
+ = &(((CordRep *)x) -> concatenation);
+ register size_t left_len;
+ register size_t right_len;
+
+ left_len = LEFT_LEN(conc);
+ right_len = conc -> len - left_len;
+ if (i >= left_len) {
+ if (n == right_len) return(conc -> right);
+ return(CORD_substr_checked(conc -> right, i - left_len, n));
+ } else if (i+n <= left_len) {
+ if (n == left_len) return(conc -> left);
+ return(CORD_substr_checked(conc -> left, i, n));
+ } else {
+ /* Need at least one character from each side. */
+ register CORD left_part;
+ register CORD right_part;
+ register size_t left_part_len = left_len - i;
+
+ if (i == 0) {
+ left_part = conc -> left;
+ } else {
+ left_part = CORD_substr_checked(conc -> left, i, left_part_len);
+ }
+ if (i + n == right_len + left_len) {
+ right_part = conc -> right;
+ } else {
+ right_part = CORD_substr_checked(conc -> right, 0,
+ n - left_part_len);
+ }
+ return(CORD_cat(left_part, right_part));
+ }
+ } else /* function */ {
+ if (n > SUBSTR_LIMIT) {
+ if (IS_SUBSTR(x)) {
+ /* Avoid nesting substring nodes. */
+ register struct Function * f = &(((CordRep *)x) -> function);
+ register struct substr_args *descr =
+ (struct substr_args *)(f -> client_data);
+
+ return(CORD_substr_closure((CORD)descr->sa_cord,
+ i + descr->sa_index,
+ n, f -> fn));
+ } else {
+ return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
+ }
+ } else {
+ char * result;
+ register struct Function * f = &(((CordRep *)x) -> function);
+ char buf[SUBSTR_LIMIT+1];
+ register char * p = buf;
+ register char c;
+ register int j;
+ register int lim = i + n;
+
+ for (j = i; j < lim; j++) {
+ c = (*(f -> fn))(j, f -> client_data);
+ if (c == '\0') {
+ return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
+ }
+ *p++ = c;
+ }
+ *p = '\0';
+ result = GC_MALLOC_ATOMIC(n+1);
+ if (result == 0) OUT_OF_MEMORY;
+ strcpy(result, buf);
+ return(result);
+ }
+ }
+}
+
+CORD CORD_substr(CORD x, size_t i, size_t n)
+{
+ register int len = CORD_len(x);
+
+ if (i >= len || n <= 0) return(0);
+ /* n < 0 is impossible in a correct C implementation, but */
+ /* quite possible under SunOS 4.X. */
+ if (i + n > len) n = len - i;
+ if (i < 0) ABORT("CORD_substr: second arg. negative");
+ /* Possible only if both client and C implementation are buggy. */
+ /* But empirically this happens frequently. */
+ return(CORD_substr_checked(x, i, n));
+}
+
+/* See cord.h for definition. We assume i is in range. */
+int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
+ CORD_batched_iter_fn f2, void * client_data)
+{
+ if (x == 0) return(0);
+ if (IS_STRING(x)) {
+ register const char *p = x+i;
+
+ if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
+ if (f2 != CORD_NO_FN) {
+ return((*f2)(p, client_data));
+ } else {
+ while (*p) {
+ if ((*f1)(*p, client_data)) return(1);
+ p++;
+ }
+ return(0);
+ }
+ } else if (IS_CONCATENATION(x)) {
+ register struct Concatenation * conc
+ = &(((CordRep *)x) -> concatenation);
+
+
+ if (i > 0) {
+ register size_t left_len = LEFT_LEN(conc);
+
+ if (i >= left_len) {
+ return(CORD_iter5(conc -> right, i - left_len, f1, f2,
+ client_data));
+ }
+ }
+ if (CORD_iter5(conc -> left, i, f1, f2, client_data)) {
+ return(1);
+ }
+ return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
+ } else /* function */ {
+ register struct Function * f = &(((CordRep *)x) -> function);
+ register size_t j;
+ register size_t lim = f -> len;
+
+ for (j = i; j < lim; j++) {
+ if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
+ return(1);
+ }
+ }
+ return(0);
+ }
+}
+
+#undef CORD_iter
+int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
+{
+ return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
+}
+
+int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
+{
+ if (x == 0) return(0);
+ if (IS_STRING(x)) {
+ register const char *p = x + i;
+ register char c;
+
+ while (p >= x) {
+ c = *p;
+ if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
+ if ((*f1)(c, client_data)) return(1);
+ p--;
+ }
+ return(0);
+ } else if (IS_CONCATENATION(x)) {
+ register struct Concatenation * conc
+ = &(((CordRep *)x) -> concatenation);
+ register CORD left_part = conc -> left;
+ register size_t left_len;
+
+ left_len = LEFT_LEN(conc);
+ if (i >= left_len) {
+ if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
+ return(1);
+ }
+ return(CORD_riter4(left_part, left_len - 1, f1, client_data));
+ } else {
+ return(CORD_riter4(left_part, i, f1, client_data));
+ }
+ } else /* function */ {
+ register struct Function * f = &(((CordRep *)x) -> function);
+ register size_t j;
+
+ for (j = i; j >= 0; j--) {
+ if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
+ return(1);
+ }
+ }
+ return(0);
+ }
+}
+
+int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
+{
+ return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
+}
+
+/*
+ * The following functions are concerned with balancing cords.
+ * Strategy:
+ * Scan the cord from left to right, keeping the cord scanned so far
+ * as a forest of balanced trees of exponentialy decreasing length.
+ * When a new subtree needs to be added to the forest, we concatenate all
+ * shorter ones to the new tree in the appropriate order, and then insert
+ * the result into the forest.
+ * Crucial invariants:
+ * 1. The concatenation of the forest (in decreasing order) with the
+ * unscanned part of the rope is equal to the rope being balanced.
+ * 2. All trees in the forest are balanced.
+ * 3. forest[i] has depth at most i.
+ */
+
+typedef struct {
+ CORD c;
+ size_t len; /* Actual ength of c */
+} ForestElement;
+
+static size_t min_len [ MAX_DEPTH ];
+
+static int min_len_init = 0;
+
+int CORD_max_len;
+
+typedef ForestElement Forest [ MAX_DEPTH ];
+ /* forest[i].min_length = fib(i+1) */
+ /* The string is the concatenation */
+ /* of the forest in order of DECREASING */
+ /* indices. */
+
+void CORD_init_min_len()
+{
+ register int i;
+ register size_t last, previous, current;
+
+ min_len[0] = previous = 1;
+ min_len[1] = last = 2;
+ for (i = 2; i < MAX_DEPTH; i++) {
+ current = last + previous;
+ if (current < last) /* overflow */ current = last;
+ min_len[i] = current;
+ previous = last;
+ last = current;
+ }
+ CORD_max_len = last - 1;
+ min_len_init = 1;
+}
+
+
+void CORD_init_forest(ForestElement * forest, size_t max_len)
+{
+ register int i;
+
+ for (i = 0; i < MAX_DEPTH; i++) {
+ forest[i].c = 0;
+ if (min_len[i] > max_len) return;
+ }
+ ABORT("Cord too long");
+}
+
+/* Add a leaf to the appropriate level in the forest, cleaning */
+/* out lower levels as necessary. */
+/* Also works if x is a balanced tree of concatenations; however */
+/* in this case an extra concatenation node may be inserted above x; */
+/* This node should not be counted in the statement of the invariants. */
+void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
+{
+ register int i = 0;
+ register CORD sum = CORD_EMPTY;
+ register size_t sum_len = 0;
+
+ while (len > min_len[i + 1]) {
+ if (forest[i].c != 0) {
+ sum = CORD_cat(forest[i].c, sum);
+ sum_len += forest[i].len;
+ forest[i].c = 0;
+ }
+ i++;
+ }
+ /* Sum has depth at most 1 greter than what would be required */
+ /* for balance. */
+ sum = CORD_cat(sum, x);
+ sum_len += len;
+ /* If x was a leaf, then sum is now balanced. To see this */
+ /* consider the two cases in whichforest[i-1] either is or is */
+ /* not empty. */
+ while (sum_len >= min_len[i]) {
+ if (forest[i].c != 0) {
+ sum = CORD_cat(forest[i].c, sum);
+ sum_len += forest[i].len;
+ /* This is again balanced, since sum was balanced, and has */
+ /* allowable depth that differs from i by at most 1. */
+ forest[i].c = 0;
+ }
+ i++;
+ }
+ i--;
+ forest[i].c = sum;
+ forest[i].len = sum_len;
+}
+
+CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
+{
+ register int i = 0;
+ CORD sum = 0;
+ size_t sum_len = 0;
+
+ while (sum_len != expected_len) {
+ if (forest[i].c != 0) {
+ sum = CORD_cat(forest[i].c, sum);
+ sum_len += forest[i].len;
+ }
+ i++;
+ }
+ return(sum);
+}
+
+/* Insert the frontier of x into forest. Balanced subtrees are */
+/* treated as leaves. This potentially adds one to the depth */
+/* of the final tree. */
+void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
+{
+ register int depth;
+
+ if (IS_STRING(x)) {
+ CORD_add_forest(forest, x, len);
+ } else if (IS_CONCATENATION(x)
+ && ((depth = DEPTH(x)) >= MAX_DEPTH
+ || len < min_len[depth])) {
+ register struct Concatenation * conc
+ = &(((CordRep *)x) -> concatenation);
+ size_t left_len = LEFT_LEN(conc);
+
+ CORD_balance_insert(conc -> left, left_len, forest);
+ CORD_balance_insert(conc -> right, len - left_len, forest);
+ } else /* function or balanced */ {
+ CORD_add_forest(forest, x, len);
+ }
+}
+
+
+CORD CORD_balance(CORD x)
+{
+ Forest forest;
+ register size_t len;
+ register int depth;
+
+ if (x == 0) return(0);
+ if (IS_STRING(x)) return(x);
+ if (!min_len_init) CORD_init_min_len();
+ len = LEN(x);
+ CORD_init_forest(forest, len);
+ CORD_balance_insert(x, len, forest);
+ return(CORD_concat_forest(forest, len));
+}
+
+
+/* Position primitives */
+
+/* Private routines to deal with the hard cases only: */
+
+/* P contains a prefix of the path to cur_pos. Extend it to a full */
+/* path and set up leaf info. */
+/* Return 0 if past the end of cord, 1 o.w. */
+void CORD__extend_path(register CORD_pos p)
+{
+ register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
+ register CORD top = current_pe -> pe_cord;
+ register size_t pos = p[0].cur_pos;
+ register size_t top_pos = current_pe -> pe_start_pos;
+ register size_t top_len = GEN_LEN(top);
+
+ /* Fill in the rest of the path. */
+ while(!IS_STRING(top) && IS_CONCATENATION(top)) {
+ register struct Concatenation * conc =
+ &(((CordRep *)top) -> concatenation);
+ register size_t left_len;
+
+ left_len = LEFT_LEN(conc);
+ current_pe++;
+ if (pos >= top_pos + left_len) {
+ current_pe -> pe_cord = top = conc -> right;
+ current_pe -> pe_start_pos = top_pos = top_pos + left_len;
+ top_len -= left_len;
+ } else {
+ current_pe -> pe_cord = top = conc -> left;
+ current_pe -> pe_start_pos = top_pos;
+ top_len = left_len;
+ }
+ p[0].path_len++;
+ }
+ /* Fill in leaf description for fast access. */
+ if (IS_STRING(top)) {
+ p[0].cur_leaf = top;
+ p[0].cur_start = top_pos;
+ p[0].cur_end = top_pos + top_len;
+ } else {
+ p[0].cur_end = 0;
+ }
+ if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
+}
+
+char CORD__pos_fetch(register CORD_pos p)
+{
+ /* Leaf is not a function node */
+ struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
+ CORD leaf = pe -> pe_cord;
+ register struct Function * f = &(((CordRep *)leaf) -> function);
+
+ if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
+ return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
+}
+
+void CORD__next(register CORD_pos p)
+{
+ /* Leaf is not a string or we're at end of leaf */
+ p[0].cur_pos++;
+ if (p[0].cur_end == 0) {
+ /* Function leaf */
+ struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
+ CORD leaf = pe -> pe_cord;
+ register struct Function * f = &(((CordRep *)leaf) -> function);
+
+ if (p[0].cur_pos < pe -> pe_start_pos + f -> len) return;
+ }
+ /* End of leaf */
+ /* Pop the stack until we find two concatenation nodes with the */
+ /* same start position: this implies we were in left part. */
+ {
+ register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
+
+ while (p[0].path_len > 0
+ && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
+ p[0].path_len--;
+ current_pe--;
+ }
+ if (p[0].path_len == 0) {
+ p[0].path_len = CORD_POS_INVALID;
+ return;
+ }
+ }
+ p[0].path_len--;
+ CORD__extend_path(p);
+}
+
+void CORD__prev(register CORD_pos p)
+{
+ register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
+
+ if (p[0].cur_pos == 0) {
+ p[0].path_len = CORD_POS_INVALID;
+ return;
+ }
+ p[0].cur_pos--;
+ if (p[0].cur_pos >= pe -> pe_start_pos) return;
+
+ /* Beginning of leaf */
+
+ /* Pop the stack until we find two concatenation nodes with the */
+ /* different start position: this implies we were in right part. */
+ {
+ register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
+
+ while (p[0].path_len > 0
+ && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
+ p[0].path_len--;
+ current_pe--;
+ }
+ }
+ p[0].path_len--;
+ CORD__extend_path(p);
+}
+
+#undef CORD_pos_fetch
+#undef CORD_next
+#undef CORD_prev
+#undef CORD_pos_to_index
+#undef CORD_pos_to_cord
+#undef CORD_pos_valid
+
+char CORD_pos_fetch(register CORD_pos p)
+{
+ if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
+ return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
+ } else {
+ return(CORD__pos_fetch(p));
+ }
+}
+
+void CORD_next(CORD_pos p)
+{
+ if (p[0].cur_pos < p[0].cur_end - 1) {
+ p[0].cur_pos++;
+ } else {
+ CORD__next(p);
+ }
+}
+
+void CORD_prev(CORD_pos p)
+{
+ if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
+ p[0].cur_pos--;
+ } else {
+ CORD__prev(p);
+ }
+}
+
+size_t CORD_pos_to_index(CORD_pos p)
+{
+ return(p[0].cur_pos);
+}
+
+CORD CORD_pos_to_cord(CORD_pos p)
+{
+ return(p[0].path[0].pe_cord);
+}
+
+int CORD_pos_valid(CORD_pos p)
+{
+ return(p[0].path_len != CORD_POS_INVALID);
+}
+
+void CORD_set_pos(CORD_pos p, CORD x, size_t i)
+{
+ if (x == CORD_EMPTY) {
+ p[0].path_len = CORD_POS_INVALID;
+ return;
+ }
+ p[0].path[0].pe_cord = x;
+ p[0].path[0].pe_start_pos = 0;
+ p[0].path_len = 0;
+ p[0].cur_pos = i;
+ CORD__extend_path(p);
+}
--- /dev/null
+/*
+ * Copyright (c) 1993 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to copy this code for any purpose,
+ * provided the above notices are retained on all copies.
+ *
+ * Author: Hans-J. Boehm (boehm@parc.xerox.com)
+ */
+/*
+ * These are functions on cords that do not need to understand their
+ * implementation. They serve also serve as example client code for
+ * cord_basics.
+ */
+# include <stdio.h>
+# include <string.h>
+# include "cord.h"
+# include "ec.h"
+# define I_HIDE_POINTERS /* So we get access to allocation lock. */
+ /* We use this for lazy file reading, */
+ /* so that we remain independent */
+ /* of the threads primitives. */
+# include "../gc.h"
+
+/* The standard says these are in stdio.h, but they aren't always: */
+# ifndef SEEK_SET
+# define SEEK_SET 0
+# endif
+# ifndef SEEK_END
+# define SEEK_END 2
+# endif
+
+# define BUFSZ 2048 /* Size of stack allocated buffers when */
+ /* we want large buffers. */
+
+typedef void (* oom_fn)(void);
+
+# define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
+ abort("Out of memory\n"); }
+
+typedef struct {
+ size_t min;
+ size_t max;
+ size_t count;
+ char * buf;
+} CORD_fill_data;
+
+int CORD_fill_proc(char c, void * client_data)
+{
+ register CORD_fill_data * d = (CORD_fill_data *)client_data;
+ register size_t count = d -> count;
+
+ (d -> buf)[count] = c;
+ d -> count = ++count;
+ if (count >= d -> min) {
+ return(1);
+ } else {
+ return(0);
+ }
+}
+
+int CORD_batched_fill_proc(const char * s, void * client_data)
+{
+ register CORD_fill_data * d = (CORD_fill_data *)client_data;
+ register size_t count = d -> count;
+ register size_t max = d -> max;
+ register char * buf = d -> buf;
+ register const char * t = s;
+
+ while(((d -> buf)[count] = *t++) != '\0') {
+ count++;
+ if (count >= max) break;
+ }
+ d -> count = count;
+ if (count >= d -> min) {
+ return(1);
+ } else {
+ return(0);
+ }
+}
+
+/* Fill buf with between min and max characters starting at i. Returns */
+/* the number of characters actually put in buf. Assumes min characters */
+/* are available. */
+size_t CORD_fill_buf(CORD x, size_t i, size_t min,
+ size_t max, char * buf)
+{
+ CORD_fill_data fd;
+
+ fd.min = min;
+ fd.max = max;
+ fd.buf = buf;
+ fd.count = 0;
+ (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
+ return(fd.count);
+}
+
+
+/* Compare two nonempty strings the hard way. */
+int CORD_cmp_general_case(CORD x, size_t xlen, CORD y, size_t ylen)
+{
+ char xbuf [BUFSZ];
+ char ybuf [BUFSZ];
+ register size_t pos = 0; /* First position not yet transfered to xbuf */
+ register size_t n_to_get;
+ register int result;
+ for (;;) {
+ n_to_get = BUFSZ;
+ if (xlen < BUFSZ) n_to_get = xlen;
+ if (ylen < n_to_get) n_to_get = ylen;
+ (void) CORD_fill_buf(x, pos, n_to_get, n_to_get, xbuf);
+ (void) CORD_fill_buf(y, pos, n_to_get, n_to_get, ybuf);
+ result = strncmp(xbuf,ybuf,n_to_get);
+ if (result != 0) return(result);
+ pos += n_to_get; xlen -= n_to_get; ylen -= n_to_get;
+ if (xlen == 0) {
+ if (ylen == 0) {
+ return(0);
+ } else {
+ return(-1);
+ }
+ }
+ if (ylen == 0) {
+ return(1);
+ }
+ }
+}
+
+
+int CORD_cmp(CORD x, CORD y)
+{
+ if (x == 0) {
+ if (y == 0) {
+ return (0);
+ } else {
+ return(-1);
+ }
+ }
+ if (y == 0) return(1);
+ if(IS_STRING(x) && IS_STRING(y)) {
+ return(strcmp(x, y));
+ }
+ {
+# define SBUFLEN 30
+# define MINCMPLEN 5
+ char xbuf[SBUFLEN];
+ char ybuf[SBUFLEN];
+ register size_t xlen = CORD_len(x);
+ register size_t ylen = CORD_len(y);
+ register size_t req_len = 0;
+ register int result;
+
+ if (xlen <= SBUFLEN) req_len = xlen;
+ if (ylen <= SBUFLEN && ylen < xlen) req_len = ylen;
+ if (req_len != 0) {
+ (void) CORD_fill_buf(x, 0, req_len, req_len, xbuf);
+ (void) CORD_fill_buf(x, 0, req_len, req_len, ybuf);
+ result = strncmp(xbuf, ybuf, req_len);
+ if (result != 0) return(result);
+ return(xlen-ylen);
+ } else {
+ /* Both have length > SBUFLEN */
+ register size_t xchars;
+ register size_t ychars;
+ register int result;
+
+ xchars = CORD_fill_buf(x, 0, MINCMPLEN, SBUFLEN, xbuf);
+ ychars = CORD_fill_buf(y, 0, MINCMPLEN, SBUFLEN, ybuf);
+ result = strncmp(xbuf, ybuf, xchars < ychars? xchars : ychars);
+ if (result != 0) return(result);
+ return(CORD_cmp_general_case(x, xlen, y, ylen));
+ }
+ }
+}
+
+char * CORD_to_char_star(CORD x)
+{
+ register size_t len;
+ char * result;
+
+ if (x == 0) return("");
+ len = CORD_len(x);
+ result = (char *)GC_MALLOC_ATOMIC(len + 1);
+ if (result == 0) OUT_OF_MEMORY;
+ if (CORD_fill_buf(x, 0, len, len, result) != len) abort("Goofed");
+ result[len] = '\0';
+ return(result);
+}
+
+typedef struct FetchDataRep {
+ struct FetchCacheRep * new_cache;
+ char character;
+} * fetch_data;
+
+int CORD_fetch_proc(char c, void * client_data)
+{
+ register fetch_data d = (fetch_data)client_data;
+
+ d -> character = c;
+ return(1);
+}
+
+char CORD_fetch(CORD x, size_t i)
+{
+ struct FetchDataRep result;
+
+ if (!CORD_iter5(x, i, CORD_fetch_proc, CORD_NO_FN, &result)) {
+ abort("bad index?");
+ }
+ return (result.character);
+}
+
+
+int CORD_put_proc(char c, void * client_data)
+{
+ register FILE * f = (FILE *)client_data;
+
+ return(putc(c, f) == EOF);
+}
+
+int CORD_batched_put_proc(const char * s, void * client_data)
+{
+ register FILE * f = (FILE *)client_data;
+
+ return(fputs(s, f) == EOF);
+}
+
+
+int CORD_put(CORD x, FILE * f)
+{
+ if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
+ return(EOF);
+ } else {
+ return(1);
+ }
+}
+
+typedef struct {
+ size_t pos; /* Current position in the cord */
+ char target; /* Character we're looking for */
+} chr_data;
+
+int CORD_chr_proc(char c, void * client_data)
+{
+ register chr_data * d = (chr_data *)client_data;
+
+ if (c == d -> target) return(1);
+ (d -> pos) ++;
+ return(0);
+}
+
+int CORD_rchr_proc(char c, void * client_data)
+{
+ register chr_data * d = (chr_data *)client_data;
+
+ if (c == d -> target) return(1);
+ (d -> pos) --;
+ return(0);
+}
+
+int CORD_batched_chr_proc(const char *s, void * client_data)
+{
+ register chr_data * d = (chr_data *)client_data;
+ register char * occ = strchr(s, d -> target);
+
+ if (occ == 0) {
+ d -> pos += strlen(s);
+ return(0);
+ } else {
+ d -> pos += occ - s;
+ return(1);
+ }
+}
+
+size_t CORD_chr(CORD x, size_t i, int c)
+{
+ chr_data d;
+
+ d.pos = i;
+ d.target = c;
+ if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
+ return(d.pos);
+ } else {
+ return(CORD_NOT_FOUND);
+ }
+}
+
+size_t CORD_rchr(CORD x, size_t i, int c)
+{
+ chr_data d;
+
+ d.pos = i;
+ d.target = c;
+ if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
+ return(d.pos);
+ } else {
+ return(CORD_NOT_FOUND);
+ }
+}
+
+void CORD_ec_flush_buf(CORD_ec x)
+{
+ register size_t len = x[0].ec_bufptr - x[0].ec_buf;
+ char * s;
+
+ if (len == 0) return;
+ s = GC_MALLOC_ATOMIC(len+1);
+ memcpy(s, x[0].ec_buf, len);
+ s[len] = '\0';
+ x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
+ x[0].ec_bufptr = x[0].ec_buf;
+}
+
+/*ARGSUSED*/
+char CORD_nul_func(size_t i, void * client_data)
+{
+ return('\0');
+}
+
+
+CORD CORD_nul(size_t i)
+{
+ return(CORD_from_fn(CORD_nul_func, 0, i));
+}
+
+CORD CORD_from_file_eager(FILE * f)
+{
+ register int c;
+ CORD_ec ecord;
+
+ CORD_ec_init(ecord);
+ for(;;) {
+ c = getc(f);
+ if (c == 0) {
+ /* Append the right number of NULs */
+ /* Note that any string of NULs is rpresented in 4 words, */
+ /* independent of its length. */
+ register size_t count = 1;
+
+ CORD_ec_flush_buf(ecord);
+ while ((c = getc(f)) == 0) count++;
+ ecord[0].ec_cord = CORD_cat(ecord[0].ec_cord, CORD_nul(count));
+ }
+ if (c == EOF) break;
+ CORD_ec_append(ecord, c);
+ }
+ (void) fclose(f);
+ return(CORD_balance(CORD_ec_to_cord(ecord)));
+}
+
+/* The state maintained for a lazily read file consists primarily */
+/* of a large direct-mapped cache of previously read values. */
+/* We could rely more on stdio buffering. That would have 2 */
+/* disadvantages: */
+/* 1) Empirically, not all fseek implementations preserve the */
+/* buffer whenever they could. */
+/* 2) It would fail if 2 different sections of a long cord */
+/* were being read alternately. */
+/* We do use the stdio buffer for read ahead. */
+/* To guarantee thread safety in the presence of atomic pointer */
+/* writes, cache lines are always replaced, and never modified in */
+/* place. */
+
+# define LOG_CACHE_SZ 14
+# define CACHE_SZ (1 << LOG_CACHE_SZ)
+# define LOG_LINE_SZ 7
+# define LINE_SZ (1 << LOG_LINE_SZ)
+
+typedef struct {
+ size_t tag;
+ char data[LINE_SZ];
+ /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ */
+} cache_line;
+
+typedef struct {
+ FILE * lf_file;
+ size_t lf_current; /* Current file pointer value */
+ cache_line * volatile lf_cache[CACHE_SZ/LINE_SZ];
+} lf_state;
+
+# define MOD_CACHE_SZ(n) ((n) & (CACHE_SZ - 1))
+# define DIV_CACHE_SZ(n) ((n) >> LOG_CACHE_SZ)
+# define MOD_LINE_SZ(n) ((n) & (LINE_SZ - 1))
+# define DIV_LINE_SZ(n) ((n) >> LOG_LINE_SZ)
+# define LINE_START(n) ((n) & ~(LINE_SZ - 1))
+
+typedef struct {
+ lf_state * state;
+ size_t file_pos; /* Position of needed character. */
+ cache_line * new_cache;
+} refill_data;
+
+/* Executed with allocation lock. */
+static char refill_cache(client_data)
+refill_data * client_data;
+{
+ register lf_state * state = client_data -> state;
+ register size_t file_pos = client_data -> file_pos;
+ FILE *f = state -> lf_file;
+ size_t line_start = LINE_START(file_pos);
+ size_t line_no = DIV_LINE_SZ(MOD_CACHE_SZ(file_pos));
+ cache_line * new_cache = client_data -> new_cache;
+
+ if (line_start != state -> lf_current
+ && fseek(f, line_start, SEEK_SET) != 0) {
+ abort("fseek failed");
+ }
+ if (fread(new_cache -> data, sizeof(char), LINE_SZ, f)
+ <= file_pos - line_start) {
+ abort("fread failed");
+ }
+ new_cache -> tag = DIV_LINE_SZ(file_pos);
+ /* Store barrier goes here. */
+ state -> lf_cache[line_no] = new_cache;
+ state -> lf_current = line_start + LINE_SZ;
+ return(new_cache->data[MOD_LINE_SZ(file_pos)]);
+}
+
+char CORD_lf_func(size_t i, void * client_data)
+{
+ register lf_state * state = (lf_state *)client_data;
+ register cache_line * cl = state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))];
+
+ if (cl == 0 || cl -> tag != DIV_LINE_SZ(i)) {
+ /* Cache miss */
+ refill_data rd;
+
+ rd.state = state;
+ rd.file_pos = i;
+ rd.new_cache = GC_NEW_ATOMIC(cache_line);
+ if (rd.new_cache == 0) OUT_OF_MEMORY;
+ return((char)(GC_word)
+ GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd));
+ }
+ return(cl -> data[MOD_LINE_SZ(i)]);
+}
+
+/*ARGSUSED*/
+void CORD_lf_close_proc(void * obj, void * client_data)
+{
+ if (fclose(((lf_state *)obj) -> lf_file) != 0) {
+ abort("CORD_lf_close_proc: fclose failed");
+ }
+}
+
+CORD CORD_from_file_lazy_inner(FILE * f, size_t len)
+{
+ register lf_state * state = GC_NEW(lf_state);
+ register int i;
+ register int c;
+
+ if (state == 0) OUT_OF_MEMORY;
+ state -> lf_file = f;
+ for (i = 0; i < CACHE_SZ/LINE_SZ; i++) {
+ state -> lf_cache[i] = 0;
+ }
+ state -> lf_current = 0;
+ GC_register_finalizer(state, CORD_lf_close_proc, 0, 0, 0);
+ return(CORD_from_fn(CORD_lf_func, state, len));
+}
+
+CORD CORD_from_file_lazy(FILE * f)
+{
+ register size_t len;
+
+ if (fseek(f, 0l, SEEK_END) != 0) {
+ abort("Bad fd argument - fseek failed");
+ }
+ if ((len = ftell(f)) < 0) {
+ abort("Bad fd argument - ftell failed");
+ }
+ rewind(f);
+ return(CORD_from_file_lazy_inner(f, len));
+}
+
+# define LAZY_THRESHOLD (16*1024 + 1)
+
+CORD CORD_from_file(FILE * f)
+{
+ register size_t len;
+
+ if (fseek(f, 0l, SEEK_END) != 0) {
+ abort("Bad fd argument - fseek failed");
+ }
+ if ((len = ftell(f)) < 0) {
+ abort("Bad fd argument - ftell failed");
+ }
+ rewind(f);
+ if (len < LAZY_THRESHOLD) {
+ return(CORD_from_file_eager(f));
+ } else {
+ return(CORD_from_file_lazy_inner(f, len));
+ }
+}
--- /dev/null
+# ifndef CORD_POSITION_H
+
+/* The representation of CORD_position. This is private to the */
+/* implementation, but the ise is known to clients. Also */
+/* the implementation of some exported macros relies on it. */
+/* Don't use anything defined here and not in cord.h. */
+
+# define MAX_DEPTH 48
+ /* The maximum depth of a balanced cord + 1. */
+ /* We don't let cords get deeper than MAX_DEPTH. */
+
+struct CORD_pe {
+ CORD pe_cord;
+ size_t pe_start_pos;
+};
+
+/* A structure describing an entry on the path from the root */
+/* to current position. */
+typedef struct CORD_pos {
+ size_t cur_pos;
+ int path_len;
+# define CORD_POS_INVALID (0x55555555)
+ /* path_len == INVALID <==> position invalid */
+ struct CORD_pe path[MAX_DEPTH + 1];
+ /* path[path_len] is the leaf corresponding to cur_pos */
+ /* path[0].pe_cord is the cord we point to. */
+ const char *cur_leaf; /* Current leaf, if it is a string. */
+ size_t cur_start; /* Start position of cur_leaf */
+ size_t cur_end; /* Ending position of cur_leaf */
+ /* 0 if leaf is not string. */
+} CORD_pos[1];
+
+/* Extract the cord from a position: */
+CORD CORD_pos_to_cord(CORD_pos p);
+
+/* Extract the current index from a position: */
+size_t CORD_pos_to_index(CORD_pos p);
+
+/* Fetch the character located at the given position: */
+char CORD_pos_fetch(register CORD_pos p);
+
+/* Initialize the position to refer to the give cord and index. */
+/* Note that this is the most expensive function on positions: */
+void CORD_set_pos(CORD_pos p, CORD x, size_t i);
+
+/* Advance the position to the next character. */
+/* P must be initialized and valid. */
+/* Invalidates p if past end: */
+void CORD_next(CORD_pos p);
+
+/* Move the position to the preceding character. */
+/* P must be initialized and valid. */
+/* Invalidates p if past beginning: */
+void CORD_next(CORD_pos p);
+
+/* Is the position valid, i.e. inside the cord? */
+int CORD_pos_valid(CORD_pos p);
+
+
+#define CORD_pos_fetch(p) \
+ (((p)[0].cur_start <= (p)[0].cur_pos && (p)[0].cur_pos < (p)[0].cur_end)? \
+ (p)[0].cur_leaf[(p)[0].cur_pos - (p)[0].cur_start] \
+ : CORD__pos_fetch(p))
+
+#define CORD_next(p) \
+ (((p)[0].cur_pos < (p)[0].cur_end - 1)? \
+ ((p)[0].cur_pos++, 1) \
+ : CORD__next(p))
+
+#define CORD_prev(p) \
+ (((p)[0].cur_end != 0 && (p)[0].cur_pos > (p)[0].cur_start)? \
+ ((p)[0].cur_pos--, 1) \
+ : CORD__next(p))
+
+#define CORD_pos_to_index(p) ((p)[0].cur_pos)
+
+#define CORD_pos_to_cord(p) ((p)[0].path[0].pe_cord)
+
+#define CORD_pos_valid(p) ((p)[0].path_len != CORD_POS_INVALID)
+
+#endif
--- /dev/null
+# include "cord.h"
+# include <stdio.h>
+/* This is a very incomplete test of the cord package. It knows about */
+/* a few internals of the package (e.g. when C strings are returned) */
+/* that real clients shouldn't rely on. */
+
+# define ABORT(string) \
+{ int x = 0; fprintf(stderr, "FAILED: %s\n", string); x = 1 / x; }
+
+int count;
+
+int test_fn(char c, void * client_data)
+{
+ if (client_data != (void *)13) ABORT("bad client data");
+ if (count < 64*1024+1) {
+ if ((count & 1) == 0) {
+ if (c != 'b') ABORT("bad char");
+ } else {
+ if (c != 'a') ABORT("bad char");
+ }
+ count++;
+ return(0);
+ } else {
+ if (c != 'c') ABORT("bad char");
+ count++;
+ return(1);
+ }
+}
+
+
+test_basics()
+{
+ CORD x = "ab";
+ register int i;
+ CORD y;
+ CORD_pos p;
+
+ x = CORD_cat(x,x);
+ if (!IS_STRING(x)) ABORT("short cord should usually be a string");
+ if (strcmp(x, "abab") != 0) ABORT("bad CORD_cat result");
+
+ for (i = 1; i < 16; i++) {
+ x = CORD_cat(x,x);
+ }
+ x = CORD_cat(x,"c");
+ if (CORD_len(x) != 128*1024+1) ABORT("bad length");
+
+ count = 0;
+ if (CORD_iter5(x, 64*1024-1, test_fn, CORD_NO_FN, (void *)13) == 0) {
+ ABORT("CORD_iter5 failed");
+ }
+ if (count != 64*1024 + 2) ABORT("CORD_iter5 failed");
+
+ count = 0;
+ CORD_set_pos(p, x, 64*1024-1);
+ while(CORD_pos_valid(p)) {
+ (void) test_fn(CORD_pos_fetch(p), (void *)13);
+ CORD_next(p);
+ }
+ if (count != 64*1024 + 2) ABORT("Position based iteration failed");
+
+ y = CORD_substr(x, 1023, 5);
+ if (!IS_STRING(y)) ABORT("short cord should usually be a string");
+ if (strcmp(y, "babab") != 0) ABORT("bad CORD_substr result");
+
+ y = CORD_substr(x, 1024, 8);
+ if (!IS_STRING(y)) ABORT("short cord should usually be a string");
+ if (strcmp(y, "abababab") != 0) ABORT("bad CORD_substr result");
+
+ y = CORD_substr(x, 128*1024-1, 8);
+ if (!IS_STRING(y)) ABORT("short cord should usually be a string");
+ if (strcmp(y, "bc") != 0) ABORT("bad CORD_substr result");
+
+ x = CORD_balance(x);
+ if (CORD_len(x) != 128*1024+1) ABORT("bad length");
+
+ count = 0;
+ if (CORD_iter5(x, 64*1024-1, test_fn, CORD_NO_FN, (void *)13) == 0) {
+ ABORT("CORD_iter5 failed");
+ }
+ if (count != 64*1024 + 2) ABORT("CORD_iter5 failed");
+
+ y = CORD_substr(x, 1023, 5);
+ if (!IS_STRING(y)) ABORT("short cord should usually be a string");
+ if (strcmp(y, "babab") != 0) ABORT("bad CORD_substr result");
+}
+
+test_extras()
+{
+ register int i;
+ CORD y = "abcdefghijklmnopqrstuvwxyz0123456789";
+ CORD x = "{}";
+ CORD w, z;
+ FILE *f;
+
+ for (i = 1; i < 100; i++) {
+ x = CORD_cat(x, y);
+ }
+ z = CORD_balance(x);
+ if (CORD_cmp(x,z) != 0) ABORT("balanced string comparison wrong");
+ if (CORD_cmp(x,CORD_cat(z, CORD_nul(13))) >= 0) ABORT("comparison 2");
+ if (CORD_cmp(CORD_cat(x, CORD_nul(13)), z) <= 0) ABORT("comparison 3");
+ if (CORD_cmp(x,CORD_cat(z, "13")) >= 0) ABORT("comparison 4");
+ if ((f = fopen("/tmp/cord_test", "w")) == 0) ABORT("open failed");
+ if (CORD_put(z,f) == EOF) ABORT("CORD_put failed");
+ if (fclose(f) == EOF) ABORT("fclose failed");
+ w = CORD_from_file(fopen("/tmp/cord_test", "r"));
+ if (CORD_len(w) != CORD_len(z)) ABORT("file length wrong");
+ if (CORD_cmp(w,z) != 0) ABORT("file comparison wrong");
+ if (CORD_cmp(CORD_substr(w, 50*36+2, 36), y) != 0)
+ ABORT("file substr wrong");
+ z = CORD_from_file_lazy(fopen("/tmp/cord_test", "r"));
+ if (CORD_cmp(w,z) != 0) ABORT("File conversions differ");
+ if (CORD_chr(w, 0, '9') != 37) ABORT("CORD_chr failed 1");
+ if (CORD_chr(w, 3, 'a') != 38) ABORT("CORD_chr failed 2");
+ if (CORD_rchr(w, CORD_len(w) - 1, '}') != 1) ABORT("CORD_rchr failed");
+ x = y;
+ for (i = 1; i < 14; i++) {
+ x = CORD_cat(x,x);
+ }
+ if ((f = fopen("/tmp/cord_test", "w")) == 0) ABORT("2nd open failed");
+ if (CORD_put(x,f) == EOF) ABORT("CORD_put failed");
+ if (fclose(f) == EOF) ABORT("fclose failed");
+ w = CORD_from_file(fopen("/tmp/cord_test", "r"));
+ if (CORD_len(w) != CORD_len(x)) ABORT("file length wrong");
+ if (CORD_cmp(w,x) != 0) ABORT("file comparison wrong");
+ if (CORD_cmp(CORD_substr(w, 1000*36, 36), y) != 0)
+ ABORT("file substr wrong");
+ if (strcmp(CORD_to_char_star(CORD_substr(w, 1000*36, 36)), y) != 0)
+ ABORT("char * file substr wrong");
+ if (strcmp(CORD_substr(w, 1000*36, 2), "ab") != 0)
+ ABORT("short file substr wrong");
+ if (remove("/tmp/cord_test") != 0) ABORT("remove failed");
+}
+
+main()
+{
+ test_basics();
+ test_extras();
+ fprintf(stderr, "SUCCEEDED\n");
+ return(0);
+}
--- /dev/null
+/*
+ * Copyright (c) 1993 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to copy this code for any purpose,
+ * provided the above notices are retained on all copies.
+ *
+ * Author: Hans-J. Boehm (boehm@parc.xerox.com)
+ */
+/*
+ * A really dumb text editor based on cords.
+ * Things it does right:
+ * No size bounds.
+ * Infinite undo.
+ * Shouldn't crash no matter what file you invoke it on (e.g. /vmunix)
+ * (Make sure /vmunix is not writable before you try this.)
+ * Scrolls horizontally.
+ * Things it does wrong:
+ * It doesn't handle tabs reasonably (use "expand" first).
+ * The command set is MUCH too small.
+ * The redisplay algorithm doesn't let curses do the scrolling.
+ * The rule for moving the window over the file is suboptimal.
+ */
+#include <stdio.h>
+#include <curses.h>
+#include "../gc.h"
+#include "cord.h"
+
+/* List of line number to position mappings, in descending order. */
+typedef struct LineMapRep {
+ int line;
+ size_t pos;
+ struct LineMapRep * previous;
+} * line_map;
+
+/* List of file versions, one per edit operation */
+typedef struct HistoryRep {
+ CORD file_contents;
+ struct HistoryRep * previous;
+ line_map map; /* Invalid for first record "now" */
+} * history;
+
+history now = 0;
+CORD current; /* == now -> file_contents. */
+size_t current_len; /* Current file length. */
+line_map current_map = 0; /* Current line no. to pos. map */
+
+/* Current display position */
+int dis_line = 0;
+int dis_col = 0;
+
+# define ALL -1
+# define NONE - 2
+int need_redisplay = 0; /* Line that needs to be redisplayed. */
+
+
+/* Current cursor position. Always within file. */
+int line = 0;
+int col = 0;
+size_t file_pos = 0; /* Character position corresponding to cursor. */
+
+/* Invalidate line map for lines > i */
+void invalidate_map(int i)
+{
+ while(current_map -> line > i) current_map = current_map -> previous;
+}
+
+/* Add mapping entry */
+void add_map(int line, size_t pos)
+{
+ line_map new_map = GC_NEW(struct LineMapRep);
+
+ new_map -> line = line;
+ new_map -> pos = pos;
+ new_map -> previous = current_map;
+ current_map = new_map;
+}
+
+/* Return position of column *c of ith line in */
+/* current file. Adjust c to be within the line. */
+/* A 0 pointer is taken as 0 column. */
+/* Returns CORD_NOT_FOUND if i is too big. */
+/* Assumes i > dis_line. */
+size_t line_pos(int i, int *c)
+{
+ int j;
+ size_t cur;
+ size_t next;
+ line_map map = current_map;
+
+ while (map -> line > i) map = map -> previous;
+ for (j = map -> line, cur = map -> pos; j < i;) {
+ cur = CORD_chr(current, cur, '\n');
+ if (cur == current_len-1) return(CORD_NOT_FOUND);
+ cur++;
+ if (++j > current_map -> line) add_map(j, cur);
+ }
+ if (c != 0) {
+ next = CORD_chr(current, cur, '\n');
+ if (next == CORD_NOT_FOUND) next = current_len - 1;
+ if (next < cur + *c) {
+ *c = next - cur;
+ }
+ cur += *c;
+ }
+ return(cur);
+}
+
+void add_hist(CORD s)
+{
+ history new_file = GC_NEW(struct HistoryRep);
+
+ new_file -> file_contents = current = s;
+ current_len = CORD_len(s);
+ new_file -> previous = now;
+ if (now != 0) now -> map = current_map;
+ now = new_file;
+}
+
+void del_hist(void)
+{
+ now = now -> previous;
+ current = now -> file_contents;
+ current_map = now -> map;
+ current_len = CORD_len(current);
+}
+
+/* Current screen_contents; a dynamically allocated array of CORDs */
+CORD * screen = 0;
+int screen_size = 0;
+
+/* Replace a line in the curses stdscr. All control characters are */
+/* displayed as upper case characters in standout mode. This isn't */
+/* terribly appropriate for tabs. */
+void replace_line(int i, CORD s)
+{
+ register int c;
+ CORD_pos p;
+
+ if (screen == 0 || LINES > screen_size) {
+ screen_size = LINES;
+ screen = (CORD *)GC_MALLOC(screen_size * sizeof(CORD));
+ }
+ if (CORD_cmp(screen[i], s) != 0) {
+ move(i,0); clrtoeol();
+ CORD_FOR (p, s) {
+ c = CORD_pos_fetch(p) & 0x7f;
+ if (iscntrl(c)) {
+ standout(); addch(c + 0x40); standend();
+ } else {
+ addch(c);
+ }
+ }
+ screen[i] = s;
+ }
+}
+
+/* Return up to COLS characters of the line of s starting at pos, */
+/* returning only characters after the given column. */
+CORD retrieve_line(CORD s, size_t pos, unsigned column)
+{
+ CORD candidate = CORD_substr(s, pos, column + COLS);
+ /* avoids scanning very long lines */
+ int eol = CORD_chr(candidate, 0, '\n');
+ int len;
+
+ if (eol == CORD_NOT_FOUND) eol = CORD_len(candidate);
+ len = (int)eol - (int)column;
+ if (len < 0) len = 0;
+ return(CORD_substr(s, pos + column, len));
+}
+
+/* Display the visible section of the current file */
+void redisplay(void)
+{
+ register int i;
+
+ invalidate_map(dis_line + LINES); /* Prune search */
+ for (i = 0; i < LINES; i++) {
+ if (need_redisplay == ALL || need_redisplay == i) {
+ register size_t pos = line_pos(dis_line + i, 0);
+
+ if (pos == CORD_NOT_FOUND) break;
+ replace_line(i, retrieve_line(current, pos, dis_col));
+ if (need_redisplay == i) goto done;
+ }
+ }
+ for (; i < LINES; i++) replace_line(i, CORD_EMPTY);
+done:
+ refresh();
+ need_redisplay = NONE;
+}
+
+/* Update dis_line, dis_col, and dis_pos to make cursor visible. */
+/* Assumes line, col, dis_line, dis_pos are in bounds. */
+void normalize_display()
+{
+ int old_line = dis_line;
+ int old_col = dis_col;
+ int i;
+
+ while (dis_line > line) dis_line -= 10;
+ while (dis_col > col) dis_col -= 10;
+ while (line >= dis_line + LINES) dis_line += 10;
+ while (col >= dis_col + COLS) dis_col += 10;
+ if (old_line != dis_line || old_col != dis_col) {
+ need_redisplay = ALL;
+ }
+}
+
+/* Adjust display so that cursor is visible; move cursor into position */
+/* Update screen if necessary. */
+void fix_cursor(void)
+{
+ normalize_display();
+ if (need_redisplay != NONE) redisplay();
+ move(line - dis_line, col - dis_col);
+ refresh();
+}
+
+/* Make sure line, col, and dis_pos are somewhere inside file. */
+/* Recompute file_pos. Assumes dis_pos is accurate or past eof */
+void fix_pos()
+{
+ int my_col = col;
+
+ if (line > current_len) line = current_len;
+ file_pos = line_pos(line, &my_col);
+ if (file_pos == CORD_NOT_FOUND) {
+ for (line = current_map -> line, file_pos = current_map -> pos;
+ file_pos < current_len;
+ line++, file_pos = CORD_chr(current, file_pos, '\n') + 1);
+ line--;
+ file_pos = line_pos(line, &col);
+ } else {
+ col = my_col;
+ }
+}
+
+# define UP '\020' /* ^P */
+# define DOWN '\016' /* ^N */
+# define LEFT '\002' /* ^B */
+# define RIGHT '\006' /* ^F */
+# define DEL '\177' /* ^? */
+# define BS '\010' /* ^H */
+# define UNDO '\025' /* ^U */
+# define WRITE '\027' /* ^W */
+# define QUIT '\004' /* ^D */
+# define REPEAT '\022' /* ^R */
+
+main(argc, argv)
+int argc;
+char ** argv;
+{
+ FILE * f, * out;
+ int c;
+ CORD initial;
+# define NO_PREFIX -1
+# define BARE_PREFIX -2
+ int repeat_count = NO_PREFIX;
+ int i, file_len;
+ int need_fix_pos;
+
+
+ if (argc != 2) goto usage;
+ if ((f = fopen(argv[1], "r")) == NULL) {
+ initial = "\n";
+ } else {
+ initial = CORD_from_file(f);
+ if (initial == CORD_EMPTY
+ || CORD_fetch(initial, CORD_len(initial)-1) != '\n') {
+ initial = CORD_cat(initial, "\n");
+ }
+ }
+ add_map(0,0);
+ add_hist(initial);
+ now -> map = current_map;
+ now -> previous = now; /* Can't back up further: beginning of the world */
+ GC_enable_incremental();
+ setvbuf(stdout, GC_MALLOC_ATOMIC(8192), _IOFBF, 8192);
+ initscr();
+ noecho(); nonl(); cbreak();
+ need_redisplay = ALL;
+ fix_cursor();
+
+ while ((c = getchar()) != QUIT) {
+ if ( c == '\r') c = '\n';
+ if ( c == REPEAT ) {
+ repeat_count = BARE_PREFIX; continue;
+ } else if (isdigit(c)){
+ if (repeat_count == BARE_PREFIX) {
+ repeat_count = c - '0'; continue;
+ } else if (repeat_count != NO_PREFIX) {
+ repeat_count = 10 * repeat_count + c - '0'; continue;
+ }
+ }
+ if (repeat_count == NO_PREFIX) repeat_count = 1;
+ if (repeat_count == BARE_PREFIX && (c == UP || c == DOWN)) {
+ repeat_count = LINES/2;
+ }
+ if (repeat_count == BARE_PREFIX) repeat_count = 8;
+ need_fix_pos = 0;
+ for (i = 0; i < repeat_count; i++) {
+ switch(c) {
+ case UP:
+ if (line != 0) {
+ line--;
+ need_fix_pos = 1;
+ }
+ break;
+ case DOWN:
+ line++;
+ need_fix_pos = 1;
+ break;
+ case LEFT:
+ if (col != 0) {
+ col--; file_pos--;
+ }
+ break;
+ case RIGHT:
+ if (CORD_fetch(current, file_pos) == '\n') break;
+ col++; file_pos++;
+ break;
+ case UNDO:
+ del_hist();
+ need_redisplay = ALL; need_fix_pos = 1;
+ break;
+ case BS:
+ if (col == 0) break;
+ col--; file_pos--;
+ /* fall through: */
+ case DEL:
+ if (file_pos == current_len-1) break;
+ /* Can't delete trailing newline */
+ if (CORD_fetch(current, file_pos) == '\n') {
+ need_redisplay = ALL; need_fix_pos = 1;
+ } else {
+ need_redisplay = line - dis_line;
+ }
+ add_hist(CORD_cat(
+ CORD_substr(current, 0, file_pos),
+ CORD_substr(current, file_pos+1, current_len)));
+ invalidate_map(line);
+ break;
+ case WRITE:
+ if ((out = fopen(argv[1], "w")) == NULL
+ || CORD_put(current, out) == EOF) {
+ fprintf(stderr, "Write failed\n"); sleep(2);
+ need_redisplay = ALL;
+ } else {
+ fclose(out);
+ }
+ break;
+ default:
+ {
+ char * new_char = GC_MALLOC_ATOMIC(2);
+ CORD left_part = CORD_substr(current, 0, file_pos);
+ CORD right_part = CORD_substr(current, file_pos, current_len);
+
+ new_char[0] = c; new_char[1] = '\0';
+ add_hist(CORD_cat(CORD_cat(left_part, new_char), right_part));
+ invalidate_map(line);
+ if (c == '\n') {
+ col = 0; line++; file_pos++;
+ need_redisplay = ALL;
+ } else {
+ col++; file_pos++;
+ need_redisplay = line - dis_line;
+ }
+ break;
+ }
+ }
+ }
+ if (need_fix_pos) fix_pos();
+ fix_cursor();
+ repeat_count = NO_PREFIX;
+ }
+done:
+ endwin();
+ exit(0);
+usage:
+ fprintf(stderr, "Usage: %s file\n", argv[0]);
+ fprintf(stderr, "Cursor keys: ^B(left) ^F(right) ^P(up) ^N(down)\n");
+ fprintf(stderr, "Undo: ^U Write: ^W Quit:^D Repeat count: ^R[n]\n");
+ exit(1);
+}
--- /dev/null
+# ifndef EC_H
+# define EC_H
+
+# ifndef CORD_H
+# include "cord.h"
+# endif
+
+/* Extensible cords are strings that may be destructively appended to. */
+/* They allow fast construction of cords from characters that are */
+/* being read from a stream. */
+/*
+ * A client might look like:
+ *
+ * {
+ * CORD_ec x;
+ * CORD result;
+ * char c;
+ * FILE *f;
+ *
+ * ...
+ * CORD_ec_init(x);
+ * while(...) {
+ * c = getc(f);
+ * ...
+ * CORD_ec_append(x, c);
+ * }
+ * result = CORD_balance(CORD_ec_to_cord(x));
+ *
+ * If a C string is desired as the final result, the call to CORD_balance
+ * may be replaced by a call to CORD_to_char_star.
+ */
+
+# ifndef CORD_BUFSZ
+# define CORD_BUFSZ 128
+# endif
+
+typedef struct CORD_ec_struct {
+ CORD ec_cord;
+ char * ec_bufptr;
+ char ec_buf[CORD_BUFSZ+1];
+} CORD_ec[1];
+
+/* This structure represents the concatenation of ec_cord with */
+/* ec_buf[0 ... (ec_bufptr-ec_buf-1)] */
+
+/* Flush the buffer part of the extended chord into ec_cord. */
+/* Note that this is the only real function, and it is */
+/* implemented in 6 lines in cord_extras.c */
+void CORD_ec_flush_buf(CORD_ec x);
+
+/* Convert an extensible cord to a cord. */
+# define CORD_ec_to_cord(x) (CORD_ec_flush_buf(x), (x)[0].ec_cord)
+
+/* Initialize an extensible cord. */
+# define CORD_ec_init(x) ((x)[0].ec_cord = 0, (x)[0].ec_bufptr = (x)[0].ec_buf)
+
+/* Append a character to an extensible cord. */
+# define CORD_ec_append(x, c) \
+ { \
+ if ((x)[0].ec_bufptr == (x)[0].ec_buf + CORD_BUFSZ) { \
+ CORD_ec_flush_buf(x); \
+ } \
+ *((x)[0].ec_bufptr)++ = (c); \
+ }
+
+# endif /* EC_H */
* Author: Bill Janssen
* Modified by: Hans Boehm
*/
+/* Boehm, December 17, 1993 4:46 pm PST */
/*
* This is incredibly OS specific code for tracking down data sections in
* that this is a bug in the design of the dlopen interface. THIS CODE
* MAY BREAK IN FUTURE OS RELEASES. If this matters to you, don't hesitate
* to let your vendor know ...
+ *
+ * None of this is safe with dlclose and incremental collection.
+ * But then not much of anything is safe in the presence of dlclose.
*/
+#include <sys/types.h>
#include "gc_private.h"
+
#ifdef DYNAMIC_LOADING
-#if !(defined(M68K) && defined(SUNOS)) && !defined(SPARC)
- --> We only know how to find data segments of dynamic libraries under SunOS 4.X
+#if !defined(SUNOS4) && !defined(SUNOS5) && !defined(IRIX5)
+ --> We only know how to find data segments of dynamic libraries under SunOS
+ --> and under IRIX5. Other SVR4 variants might not be too hard to add.
#endif
#include <stdio.h>
-#if defined SUNOS5
+#ifdef SUNOS5
# include <sys/elf.h>
# include <dlfcn.h>
# include <link.h>
-#else
+#endif
+#ifdef SUNOS4
# include <dlfcn.h>
# include <link.h>
# include <a.out.h>
# define l_next lm_next
# define l_addr lm_addr
# define l_name lm_name
-# endif
+#endif
#ifdef SUNOS5
return cachedResult;
}
-# endif
+#endif
-# ifdef SUNOS4
+#ifdef SUNOS4
#ifdef LINT
struct link_dynamic _DYNAMIC;
return(result);
}
-# endif
+#endif
+# if defined(SUNOS4) || defined(SUNOS5)
/* Add dynamic library data sections to the root set. */
# if !defined(PCR) && defined(THREADS)
# ifndef SRC_M3
# endif
}
-#else
+# endif /* SUNOS */
+
+#ifdef IRIX5
+
+#include <sys/procfs.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <elf.h>
+
+extern void * GC_roots_present();
+
+/* We use /proc to track down all parts of the address space that are */
+/* mapped by the process, and throw out regions we know we shouldn't */
+/* worry about. This may also work under other SVR4 variants. */
+void GC_register_dynamic_libraries()
+{
+ static int fd = -1;
+ char buf[30];
+ static prmap_t * addr_map = 0;
+ static int current_sz = 0; /* Number of records currently in addr_map */
+ static int needed_sz; /* Required size of addr_map */
+ register int i;
+ register long flags;
+ register ptr_t start;
+ register ptr_t limit;
+
+ if (fd < 0) {
+ sprintf(buf, "/proc/%d", getpid());
+ fd = open(buf, O_RDONLY);
+ if (fd < 0) {
+ ABORT("/proc open failed");
+ }
+ }
+ if (ioctl(fd, PIOCNMAP, &needed_sz) < 0) {
+ ABORT("/proc PIOCNMAP ioctl failed");
+ }
+ if (needed_sz >= current_sz) {
+ current_sz = needed_sz * 2 + 1;
+ /* Expansion, plus room for 0 record */
+ addr_map = (prmap_t *)GC_scratch_alloc(current_sz * sizeof(prmap_t));
+ }
+ if (ioctl(fd, PIOCMAP, addr_map) < 0) {
+ ABORT("/proc PIOCMAP ioctl failed");
+ };
+ for (i = 0; i < needed_sz; i++) {
+ flags = addr_map[i].pr_mflags;
+ if ((flags & (MA_BREAK | MA_STACK | MA_PHYS)) != 0) goto irrelevant;
+ if ((flags & (MA_READ | MA_WRITE)) != (MA_READ | MA_WRITE))
+ goto irrelevant;
+ /* The latter test is empirically useless. Other than the */
+ /* main data and stack segments, everything appears to be */
+ /* mapped readable, writable, executable, and shared(!!). */
+ /* This makes no sense to me. - HB */
+ start = (ptr_t)(addr_map[i].pr_vaddr);
+ if (GC_roots_present(start)) goto irrelevant;
+ limit = start + addr_map[i].pr_size;
+ if (addr_map[i].pr_off == 0 && strncmp(start, ELFMAG, 4) == 0) {
+ /* Discard text segments, i.e. 0-offset mappings against */
+ /* executable files which appear to have ELF headers. */
+ caddr_t arg;
+ int obj;
+# define MAP_IRR_SZ 10
+ static ptr_t map_irr[MAP_IRR_SZ];
+ /* Known irrelevant map entries */
+ static int n_irr = 0;
+ struct stat buf;
+ register int i;
+
+ for (i = 0; i < n_irr; i++) {
+ if (map_irr[i] == start) goto irrelevant;
+ }
+ arg = (caddr_t)start;
+ obj = ioctl(fd, PIOCOPENM, &arg);
+ if (obj >= 0) {
+
+ fstat(obj, &buf);
+ close(obj);
+ if ((buf.st_mode & 0111) != 0) {
+ if (n_irr < MAP_IRR_SZ) {
+ map_irr[n_irr++] = start;
+ }
+ goto irrelevant;
+ }
+ }
+ }
+ GC_add_roots_inner(start, limit);
+ irrelevant: ;
+ }
+}
+
+#endif /* IRIX5 */
+
+#else /* !DYNAMIC_LOADING */
+
void GC_register_dynamic_libraries(){}
int GC_no_dynamic_loading;
-#endif
+
+#endif /* !DYNAMIC_LOADING */
void_star client_data;
# endif
{
+ void_star result;
DCL_LOCK_STATE;
DISABLE_SIGNALS();
LOCK();
- (*fn)(client_data);
+ result = (*fn)(client_data);
UNLOCK();
ENABLE_SIGNALS();
+ return(result);
}
* Permission is hereby granted to copy this garbage collector for any purpose,
* provided the above notices are retained on all copies.
*/
+/* Boehm, December 20, 1993 3:05 pm PST */
#ifndef GC_H
/* available or most heap objects are */
/* pointerfree(atomic) or immutable. */
/* Don't use in leak finding mode. */
+/* Ignored if GC_dont_gc is true. */
void GC_enable_incremental();
/* Debugging (annotated) allocation. GC_gcollect will check */
# define GC_CHANGE_STUBBORN(p) GC_change_stubborn(p)
# define GC_END_STUBBORN_CHANGE(p) GC_end_stubborn_change(p)
# endif
+/* The following are included because they are often convenient, and */
+/* reduce the chance for a misspecifed size argument. But calls may */
+/* expand to something syntactically incorrect if t is a complicated */
+/* type expression. */
+# define GC_NEW(t) (t *)GC_MALLOC(sizeof (t))
+# define GC_NEW_ATOMIC(t) (t *)GC_MALLOC_ATOMIC(sizeof (t))
+# define GC_NEW_STUBBORN(t) (t *)GC_MALLOC_STUBBORN(sizeof (t))
+# define GC_NEW_UNCOLLECTABLE(t) (t *)GC_NEW_UNCOLLECTABLE(sizeof (t))
/* Finalization. Some of these primitives are grossly unsafe. */
/* The idea is to make them both cheap, and sufficient to build */
/* Converting a hidden pointer to a real pointer requires verifying */
/* that the object still exists. This involves acquiring the */
/* allocator lock to avoid a race with the collector. */
- typedef char * (*GC_fn_type)();
+
# if defined(__STDC__) || defined(__cplusplus)
+ typedef void * (*GC_fn_type)();
void * GC_call_with_alloc_lock(GC_fn_type fn, void * client_data);
# else
+ typedef char * (*GC_fn_type)();
char * GC_call_with_alloc_lock(/* GC_fn_type fn, char * client_data */);
# endif
# endif
* Permission is hereby granted to copy this garbage collector for any purpose,
* provided the above notices are retained on all copies.
*/
+/* Boehm, November 15, 1993 1:37 pm PST */
# ifndef GC_HEADERS_H
# define GC_HEADERS_H
typedef struct hblkhdr hdr;
(hhdr) = *_ha; }
# define SET_HDR(p, hhdr) { register hdr ** _ha; GET_HDR_ADDR(p, _ha); \
*_ha = (hhdr); }
-# define HDR(p) GC_find_header(p)
+# define HDR(p) GC_find_header((ptr_t)(p))
# endif
/* Is the result a forwarding address to someplace closer to the */
* Permission is hereby granted to copy this garbage collector for any purpose,
* provided the above notices are retained on all copies.
*/
+/* Boehm, December 16, 1993 4:52 pm PST */
# ifndef GC_PRIVATE_H
typedef char * ptr_t; /* A generic pointer to which we can add */
/* byte displacments. */
+ /* Prefereably identical to caddr_t, if it */
+ /* exists. */
#ifdef __STDC__
+# include <stdlib.h>
# if !(defined( sony_news ) )
# include <stddef.h>
# endif
# define FAR
#endif
-# ifndef OS2
-# include <sys/types.h>
-# endif
-
/*********************************/
/* */
/* Definitions for conservative */
# define GATHERSTATS
#endif
+# if defined(PCR) || defined(SRC_M3)
+# define THREADS
+# endif
#ifdef SPARC
# define ALIGN_DOUBLE /* Align objects of size > 1 word on 2 word */
/* boundaries. Wasteful of memory, but */
/* apparently required by SPARC architecture. */
+# define ASM_CLEAR_CODE /* Stack clearing is crucial, and we */
+ /* include assembly code to do it well. */
#endif
-#if defined(SPARC) || defined(M68K) && defined(SUNOS4)
+#if defined(SPARC) || defined(M68K) && defined(SUNOS4) || defined(IRIX5)
# if !defined(PCR)
# define DYNAMIC_LOADING /* Search dynamic libraries for roots. */
# else
# define MINHINCR 16 /* Minimum heap increment, in blocks of HBLKSIZE */
# define MAXHINCR 512 /* Maximum heap increment, in blocks */
+# define TIME_LIMIT 50 /* We try to keep pause times from exceeding */
+ /* this by much. In milliseconds. */
/*********************************/
/* */
# define bzero(x,n) memset(x, 0, n)
# endif
-# if defined(PCR) || defined(SRC_M3)
-# define THREADS
-# endif
-
/* HBLKSIZE aligned allocation. 0 is taken to mean failure */
/* space is assumed to be cleared. */
# ifdef PCR
void PCR_Base_Panic(const char *fmt, ...);
# define ABORT(s) PCR_Base_Panic(s)
# else
-# define ABORT(s) abort(s)
+# define ABORT(msg) { GC_err_printf1("%s\n", msg); (void) abort(); }
# endif
/* Exit abnormally, but without making a mess (e.g. out of memory) */
/* Ditto, but also mark from clean pages. */
struct hblk * GC_push_next_marked_uncollectable(/* h */);
/* Ditto, but mark only from uncollectable pages. */
-void GC_stopped_mark(); /* Mark from all roots and rescuers */
- /* with the world stopped. */
+bool GC_stopped_mark(); /* Stop world and mark from all roots */
+ /* and rescuers. */
void GC_clear_hdr_marks(/* hhdr */); /* Clear the mark bits in a header */
void GC_add_roots_inner();
void GC_register_dynamic_libraries();
--- /dev/null
+/*
+ * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
+ * Copyright (c) 1991 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to copy this garbage collector for any purpose,
+ * provided the above notices are retained on all copies.
+ */
+
+#ifndef GC_H
+
+# define GC_H
+
+# include <stddef.h>
+
+/* Define word and signed_word to be unsigned and signed types of the */
+/* size as char * or void *. There seems to be no way to do this */
+/* even semi-portably. The following is probably no better/worse */
+/* than almost anything else. */
+/* The ANSI standard suggests that size_t and ptr_diff_t might be */
+/* better choices. But those appear to have incorrect definitions */
+/* on may systems. Notably "typedef int size_t" seems to be both */
+/* frequent and WRONG. */
+typedef unsigned long GC_word;
+typedef long GC_signed_word;
+
+/* Public read-only variables */
+
+extern GC_word GC_heapsize; /* Heap size in bytes */
+
+extern GC_word GC_gc_no;/* Counter incremented per collection. */
+ /* Includes empty GCs at startup. */
+
+extern int GC_incremental; /* Using incremental/generational collection. */
+
+
+/* Public R/W variables */
+
+extern int GC_quiet; /* Disable statistics output. Only matters if */
+ /* collector has been compiled with statistics */
+ /* enabled. This involves a performance cost, */
+ /* and is thus not the default. */
+
+extern int GC_dont_gc; /* Dont collect unless explicitly requested, e.g. */
+ /* beacuse it's not safe. */
+
+extern int GC_dont_expand;
+ /* Dont expand heap unless explicitly requested */
+ /* or forced to. */
+
+extern int GC_full_freq; /* Number of partial collections between */
+ /* full collections. Matters only if */
+ /* GC_incremental is set. */
+
+extern GC_word GC_non_gc_bytes;
+ /* Bytes not considered candidates for collection. */
+ /* Used only to control scheduling of collections. */
+
+extern GC_word GC_free_space_divisor;
+ /* We try to make sure that we allocate at */
+ /* least N/GC_free_space_divisor bytes between */
+ /* collections, where N is the heap size plus */
+ /* a rough estimate of the root set size. */
+ /* Initially, GC_free_space_divisor = 4. */
+ /* Increasing its value will use less space */
+ /* but more collection time. Decreasing it */
+ /* will appreciably decrease collection time */
+ /* at the expens of space. */
+ /* GC_free_space_divisor = 1 will effectively */
+ /* disable collections. */
+
+/* Public procedures */
+/*
+ * general purpose allocation routines, with roughly malloc calling conv.
+ * The atomic versions promise that no relevant pointers are contained
+ * in the object. The nonatomic versions guarantee that the new object
+ * is cleared. GC_malloc_stubborn promises that no changes to the object
+ * will occur after GC_end_stubborn_change has been called on the
+ * result of GC_malloc_stubborn. GC_malloc_uncollectable allocates an object
+ * that is scanned for pointers to collectable objects, but is not itself
+ * collectable. GC_malloc_uncollectable and GC_free called on the resulting
+ * object implicitly update GC_non_gc_bytes appropriately.
+ */
+#if defined(__STDC__) || defined(__cplusplus)
+ extern void * GC_malloc(size_t size_in_bytes);
+ extern void * GC_malloc_atomic(size_t size_in_bytes);
+ extern void * GC_malloc_uncollectable(size_t size_in_bytes);
+ extern void * GC_malloc_stubborn(size_t size_in_bytes);
+# else
+ extern char * GC_malloc(/* size_in_bytes */);
+ extern char * GC_malloc_atomic(/* size_in_bytes */);
+ extern char * GC_malloc_uncollectable(/* size_in_bytes */);
+ extern char * GC_malloc_stubborn(/* size_in_bytes */);
+# endif
+
+/* Explicitly deallocate an object. Dangerous if used incorrectly. */
+/* Requires a pointer to the base of an object. */
+/* If the argument is stubborn, it should not be changeable when freed. */
+/* An object should not be enable for finalization when it is */
+/* explicitly deallocated. */
+#if defined(__STDC__) || defined(__cplusplus)
+ extern void GC_free(void * object_addr);
+# else
+ extern void GC_free(/* object_addr */);
+# endif
+
+/*
+ * Stubborn objects may be changed only if the collector is explicitly informed.
+ * The collector is implicitly informed of coming change when such
+ * an object is first allocated. The following routines inform the
+ * collector that an object will no longer be changed, or that it will
+ * once again be changed. Only nonNIL pointer stores into the object
+ * are considered to be changes. The argument to GC_end_stubborn_change
+ * must be exacly the value returned by GC_malloc_stubborn or passed to
+ * GC_change_stubborn. (In the second case it may be an interior pointer
+ * within 512 bytes of the beginning of the objects.)
+ * There is a performance penalty for allowing more than
+ * one stubborn object to be changed at once, but it is acceptable to
+ * do so. The same applies to dropping stubborn objects that are still
+ * changeable.
+ */
+void GC_change_stubborn(/* p */);
+void GC_end_stubborn_change(/* p */);
+
+/* Return a pointer to the base (lowest address) of an object given */
+/* a pointer to a location within the object. */
+/* Return 0 if displaced_pointer doesn't point to within a valid */
+/* object. */
+# if defined(__STDC__) || defined(__cplusplus)
+ void * GC_base(void * displaced_pointer);
+# else
+ char * GC_base(/* char * displaced_pointer */);
+# endif
+
+/* Given a pointer to the base of an object, return its size in bytes. */
+/* The returned size may be slightly larger than what was originally */
+/* requested. */
+# if defined(__STDC__) || defined(__cplusplus)
+ size_t GC_size(void * object_addr);
+# else
+ size_t GC_size(/* char * object_addr */);
+# endif
+
+/* For compatibility with C library. This is occasionally faster than */
+/* a malloc followed by a bcopy. But if you rely on that, either here */
+/* or with the standard C library, your code is broken. In my */
+/* opinion, it shouldn't have been invented, but now we're stuck. -HB */
+/* The resulting object has the same kind as the original. */
+/* If the argument is stubborn, the result will have changes enabled. */
+/* It is an error to have changes enabled for the original object. */
+# if defined(__STDC__) || defined(__cplusplus)
+ extern void * GC_realloc(void * old_object, size_t new_size_in_bytes);
+# else
+ extern char * GC_realloc(/* old_object, new_size_in_bytes */);
+# endif
+
+
+/* Explicitly increase the heap size. */
+/* Returns 0 on failure, 1 on success. */
+extern int GC_expand_hp(/* number_of_4K_blocks */);
+
+/* Clear the set of root segments */
+extern void GC_clear_roots();
+
+/* Add a root segment */
+extern void GC_add_roots(/* low_address, high_address_plus_1 */);
+
+/* Add a displacement to the set of those considered valid by the */
+/* collector. GC_register_displacement(n) means that if p was returned */
+/* by GC_malloc, then (char *)p + n will be considered to be a valid */
+/* pointer to n. N must be small and less than the size of p. */
+/* (All pointers to the interior of objects from the stack are */
+/* considered valid in any case. This applies to heap objects and */
+/* static data.) */
+/* Preferably, this should be called before any other GC procedures. */
+/* Calling it later adds to the probability of excess memory */
+/* retention. */
+void GC_register_displacement(/* n */);
+
+/* Explicitly trigger a collection. */
+void GC_gcollect();
+
+/* Enable incremental/generational collection. */
+/* Not advisable unless dirty bits are */
+/* available or most heap objects are */
+/* pointerfree(atomic) or immutable. */
+/* Don't use in leak finding mode. */
+void GC_enable_incremental();
+
+/* Debugging (annotated) allocation. GC_gcollect will check */
+/* objects allocated in this way for overwrites, etc. */
+# if defined(__STDC__) || defined(__cplusplus)
+ extern void * GC_debug_malloc(size_t size_in_bytes,
+ char * descr_string, int descr_int);
+ extern void * GC_debug_malloc_atomic(size_t size_in_bytes,
+ char * descr_string, int descr_int);
+ extern void * GC_debug_malloc_uncollectable(size_t size_in_bytes,
+ char * descr_string, int descr_int);
+ extern void * GC_debug_malloc_stubborn(size_t size_in_bytes,
+ char * descr_string, int descr_int);
+ extern void GC_debug_free(void * object_addr);
+ extern void * GC_debug_realloc(void * old_object,
+ size_t new_size_in_bytes,
+ char * descr_string, int descr_int);
+# else
+ extern char * GC_debug_malloc(/* size_in_bytes, descr_string, descr_int */);
+ extern char * GC_debug_malloc_atomic(/* size_in_bytes, descr_string,
+ descr_int */);
+ extern char * GC_debug_malloc_uncollectable(/* size_in_bytes, descr_string,
+ descr_int */);
+ extern char * GC_debug_malloc_stubborn(/* size_in_bytes, descr_string,
+ descr_int */);
+ extern void GC_debug_free(/* object_addr */);
+ extern char * GC_debug_realloc(/* old_object, new_size_in_bytes,
+ descr_string, descr_int */);
+# endif
+void GC_debug_change_stubborn(/* p */);
+void GC_debug_end_stubborn_change(/* p */);
+# ifdef GC_DEBUG
+# define GC_MALLOC(sz) GC_debug_malloc(sz, __FILE__, __LINE__)
+# define GC_MALLOC_ATOMIC(sz) GC_debug_malloc_atomic(sz, __FILE__, __LINE__)
+# define GC_MALLOC_UNCOLLECTABLE(sz) GC_debug_malloc_uncollectable(sz, \
+ __FILE__, __LINE__)
+# define GC_REALLOC(old, sz) GC_debug_realloc(old, sz, __FILE__, \
+ __LINE__)
+# define GC_FREE(p) GC_debug_free(p)
+# define GC_REGISTER_FINALIZER(p, f, d, of, od) \
+ GC_register_finalizer(GC_base(p), GC_debug_invoke_finalizer, \
+ GC_make_closure(f,d), of, od)
+# define GC_MALLOC_STUBBORN(sz) GC_debug_malloc_stubborn(sz, __FILE__, \
+ __LINE__)
+# define GC_CHANGE_STUBBORN(p) GC_debug_change_stubborn(p)
+# define GC_END_STUBBORN_CHANGE(p) GC_debug_end_stubborn_change(p)
+# else
+# define GC_MALLOC(sz) GC_malloc(sz)
+# define GC_MALLOC_ATOMIC(sz) GC_malloc_atomic(sz)
+# define GC_MALLOC_UNCOLLECTABLE(sz) GC_malloc_uncollectable(sz)
+# define GC_REALLOC(old, sz) GC_realloc(old, sz)
+# define GC_FREE(p) GC_free(p)
+# define GC_REGISTER_FINALIZER(p, f, d, of, od) \
+ GC_register_finalizer(p, f, d, of, od)
+# define GC_MALLOC_STUBBORN(sz) GC_malloc_stubborn(sz)
+# define GC_CHANGE_STUBBORN(p) GC_change_stubborn(p)
+# define GC_END_STUBBORN_CHANGE(p) GC_end_stubborn_change(p)
+# endif
+/* The following are included because they are often convenient, and */
+/* reduce the chance for a misspecifed size argument. But calls may */
+/* expand to something syntactically incorrect if t is a complicated */
+/* type expression. */
+# define GC_NEW(t) (t *)GC_MALLOC(sizeof (t))
+# define GC_NEW_ATOMIC(t) (t *)GC_MALLOC_ATOMIC(sizeof (t))
+# define GC_NEW_STUBBORN(t) (t *)GC_MALLOC_STUBBORN(sizeof (t))
+# define GC_NEW_UNCOLLECTABLE(t) (t *)GC_NEW_UNCOLLECTABLE(sizeof (t))
+
+/* Finalization. Some of these primitives are grossly unsafe. */
+/* The idea is to make them both cheap, and sufficient to build */
+/* a safer layer, closer to PCedar finalization. */
+/* The interface represents my conclusions from a long discussion */
+/* with Alan Demers, Dan Greene, Carl Hauser, Barry Hayes, */
+/* Christian Jacobi, and Russ Atkinson. It's not perfect, and */
+/* probably nobody else agrees with it. Hans-J. Boehm 3/13/92 */
+# if defined(__STDC__) || defined(__cplusplus)
+ typedef void (*GC_finalization_proc)(void * obj, void * client_data);
+# else
+ typedef void (*GC_finalization_proc)(/* void * obj, void * client_data */);
+# endif
+
+void GC_register_finalizer(/* void * obj,
+ GC_finalization_proc fn, void * cd,
+ GC_finalization_proc *ofn, void ** ocd */);
+ /* When obj is no longer accessible, invoke */
+ /* (*fn)(obj, cd). If a and b are inaccessible, and */
+ /* a points to b (after disappearing links have been */
+ /* made to disappear), then only a will be */
+ /* finalized. (If this does not create any new */
+ /* pointers to b, then b will be finalized after the */
+ /* next collection.) Any finalizable object that */
+ /* is reachable from itself by following one or more */
+ /* pointers will not be finalized (or collected). */
+ /* Thus cycles involving finalizable objects should */
+ /* be avoided, or broken by disappearing links. */
+ /* fn is invoked with the allocation lock held. It may */
+ /* not allocate. (Any storage it might need */
+ /* should be preallocated and passed as part of cd.) */
+ /* fn should terminate as quickly as possible, and */
+ /* defer extended computation. */
+ /* All but the last finalizer registered for an object */
+ /* is ignored. */
+ /* Finalization may be removed by passing 0 as fn. */
+ /* The old finalizer and client data are stored in */
+ /* *ofn and *ocd. */
+ /* Fn is never invoked on an accessible object, */
+ /* provided hidden pointers are converted to real */
+ /* pointers only if the allocation lock is held, and */
+ /* such conversions are not performed by finalization */
+ /* routines. */
+
+/* The following routine may be used to break cycles between */
+/* finalizable objects, thus causing cyclic finalizable */
+/* objects to be finalized in the correct order. Standard */
+/* use involves calling GC_register_disappearing_link(&p), */
+/* where p is a pointer that is not followed by finalization */
+/* code, and should not be considered in determining */
+/* finalization order. */
+int GC_register_disappearing_link(/* void ** link */);
+ /* Link should point to a field of a heap allocated */
+ /* object obj. *link will be cleared when obj is */
+ /* found to be inaccessible. This happens BEFORE any */
+ /* finalization code is invoked, and BEFORE any */
+ /* decisions about finalization order are made. */
+ /* This is useful in telling the finalizer that */
+ /* some pointers are not essential for proper */
+ /* finalization. This may avoid finalization cycles. */
+ /* Note that obj may be resurrected by another */
+ /* finalizer, and thus the clearing of *link may */
+ /* be visible to non-finalization code. */
+ /* There's an argument that an arbitrary action should */
+ /* be allowed here, instead of just clearing a pointer. */
+ /* But this causes problems if that action alters, or */
+ /* examines connectivity. */
+ /* Returns 1 if link was already registered, 0 */
+ /* otherwise. */
+ /* Only exists for backward compatibility. See below: */
+int GC_general_register_disappearing_link(/* void ** link, void * obj */);
+ /* A slight generalization of the above. *link is */
+ /* cleared when obj first becomes inaccessible. This */
+ /* can be used to implement weak pointers easily and */
+ /* safely. Typically link will point to a location */
+ /* holding a disguised pointer to obj. In this way */
+ /* soft pointers are broken before any object */
+ /* reachable from them are finalized. Each link */
+ /* May be registered only once, i.e. with one obj */
+ /* value. This was added after a long email discussion */
+ /* with John Ellis. */
+int GC_unregister_disappearing_link(/* void ** link */);
+ /* Returns 0 if link was not actually registered. */
+ /* Undoes a registration by either of the above two */
+ /* routines. */
+
+/* Auxiliary fns to make finalization work correctly with displaced */
+/* pointers introduced by the debugging allocators. */
+# if defined(__STDC__) || defined(__cplusplus)
+ void * GC_make_closure(GC_finalization_proc fn, void * data);
+ void GC_debug_invoke_finalizer(void * obj, void * data);
+# else
+ char * GC_make_closure(/* GC_finalization_proc fn, char * data */);
+ void GC_debug_invoke_finalizer(/* void * obj, void * data */);
+# endif
+
+
+/* The following is intended to be used by a higher level */
+/* (e.g. cedar-like) finalization facility. It is expected */
+/* that finalization code will arrange for hidden pointers to */
+/* disappear. Otherwise objects can be accessed after they */
+/* have been collected. */
+# ifdef I_HIDE_POINTERS
+# if defined(__STDC__) || defined(__cplusplus)
+# define HIDE_POINTER(p) (~(size_t)(p))
+# define REVEAL_POINTER(p) ((void *)(HIDE_POINTER(p)))
+# else
+# define HIDE_POINTER(p) (~(unsigned long)(p))
+# define REVEAL_POINTER(p) ((char *)(HIDE_POINTER(p)))
+# endif
+ /* Converting a hidden pointer to a real pointer requires verifying */
+ /* that the object still exists. This involves acquiring the */
+ /* allocator lock to avoid a race with the collector. */
+
+# if defined(__STDC__) || defined(__cplusplus)
+ typedef void * (*GC_fn_type)();
+ void * GC_call_with_alloc_lock(GC_fn_type fn, void * client_data);
+# else
+ typedef char * (*GC_fn_type)();
+ char * GC_call_with_alloc_lock(/* GC_fn_type fn, char * client_data */);
+# endif
+# endif
+
+#endif
# include "gc_private.h"
# include <stdio.h>
# include <setjmp.h>
-# ifdef OS2
+# if defined(OS2) || defined(CX_UX)
# define _setjmp(b) setjmp(b)
# define _longjmp(b,v) longjmp(b,v)
# endif
/* version at the end, that is likely, but not guaranteed to work */
/* on your architecture. Run the test_setjmp program to see whether */
/* there is any chance it will work. */
-void GC_push_regs()
+
+#ifdef AMIGA
+__asm GC_push_regs(
+ register __a2 word a2,
+ register __a3 word a3,
+ register __a4 word a4,
+ register __a5 word a5,
+ register __a6 word a6,
+ register __d2 const word d2,
+ register __d3 const word d3,
+ register __d4 const word d4,
+ register __d5 const word d5,
+ register __d6 const word d6,
+ register __d7 const word d7)
+#else
+ void GC_push_regs()
+#endif
{
# ifdef RT
register long TMP_SP; /* must be bound to r11 */
# endif
# ifdef VAX
- /* VAX - generic code below does not work under 4.2 */
+ /* VAX - generic code below does not work under 4.2 */
/* r1 through r5 are caller save, and therefore */
/* on the stack or dead. */
asm("pushl r11"); asm("calls $1,_GC_push_one");
asm("addq.w &0x4,%sp"); /* put stack back where it was */
# endif /* M68K HP */
+# ifdef AMIGA
+ /* AMIGA - could be replaced by generic code */
+ /* SAS/C optimizer mangles this so compile with "noopt" */
+ /* a0, a1, d0 and d1 are caller save */
+ GC_push_one(a2);
+ GC_push_one(a3);
+ GC_push_one(a4);
+ GC_push_one(a5);
+ GC_push_one(a6);
+ /* Skip stack pointer */
+ GC_push_one(d2);
+ GC_push_one(d3);
+ GC_push_one(d4);
+ GC_push_one(d5);
+ GC_push_one(d6);
+ GC_push_one(d7);
+# endif
+
# if defined(I386) && !defined(OS2) && !defined(SUNOS5)
/* I386 code, generic code does not appear to work */
/* It does appear to work under OS2, and asms dont */
# endif
# if defined(I386) && defined(SUNOS5)
- /* I386 code, generic code does not appear to work */
- /* It does appear to work under OS2, and asms dont */
+ /* I386 code, SVR4 variant, generic code does not appear to work */
asm("pushl %eax"); asm("call GC_push_one"); asm("addl $4,%esp");
asm("pushl %ecx"); asm("call GC_push_one"); asm("addl $4,%esp");
asm("pushl %edx"); asm("call GC_push_one"); asm("addl $4,%esp");
# endif /* M68K/SYSV */
-# if defined(HP_PA) || (defined(I386) && defined(OS2))
+# if defined(HP_PA) || defined(M88K) || (defined(I386) && defined(OS2))
/* Generic code */
/* The idea is due to Parag Patel at HP. */
/* We're not sure whether he would like */
/* other machines... */
# if !(defined M68K) && !(defined VAX) && !(defined RT)
-# if !(defined SPARC) && !(defined I386) &&!(defined NS32K)
-# if !defined(HP_PA)
+# if !(defined SPARC) && !(defined I386) && !(defined NS32K)
+# if !defined(HP_PA) && !defined(M88K)
--> bad news <--
# endif
# endif
# endif
# endif
+
+/* GC_clear_stack_inner(arg, limit) clears stack area up to limit and */
+/* returns arg. Stack clearing is crucial on SPARC, so we supply */
+/* an assembly version that's more careful. Assumes limit is hotter */
+/* than sp, and limit is 8 byte aligned. */
+#if defined(ASM_CLEAR_CODE) && !defined(THREADS)
+#ifndef SPARC
+ --> fix it
+#endif
+# ifdef SUNOS4
+ asm(".globl _GC_clear_stack_inner");
+ asm("_GC_clear_stack_inner:");
+# else
+ asm(".globl GC_clear_stack_inner");
+ asm("GC_clear_stack_inner:");
+# endif
+ asm("mov %sp,%o2"); /* Save sp */
+ asm("add %sp,-8,%o3"); /* p = sp-8 */
+ asm("clr %g1"); /* [g0,g1] = 0 */
+ asm("add %o1,-0x60,%sp"); /* Move sp out of the way, */
+ /* so that traps still work. */
+ /* Includes some extra words */
+ /* so we can be sloppy below. */
+ asm("loop:");
+ asm("std %g0,[%o3]"); /* *(long long *)p = 0 */
+ asm("cmp %o3,%o1");
+ asm("bgu loop "); /* if (p > limit) goto loop */
+ asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */
+ asm("retl");
+ asm("mov %o2,%sp"); /* Restore sp., delay slot */
+ /* First argument = %o0 = return value */
+
+# ifdef LINT
+ /*ARGSUSED*/
+ ptr_t GC_clear_stack_inner(arg, limit)
+ ptr_t arg; word limit;
+ { return(arg); }
+# endif
+#endif
* Permission is hereby granted to copy this garbage collector for any purpose,
* provided the above notices are retained on all copies.
*/
+/* Boehm, November 17, 1993 5:51 pm PST */
#include <stdio.h>
#include "gc_private.h"
-extern void GC_clear_stack(); /* in misc.c */
+extern ptr_t GC_clear_stack(); /* in misc.c, behaves like identity */
+void GC_extend_size_map(); /* in misc.c. */
# ifdef ALL_INTERIOR_POINTERS
# define SMALL_OBJ(bytes) ((bytes) < WORDS_TO_BYTES(MAXOBJSZ))
# endif
opp = &(GC_obj_kinds[k].ok_freelist[lw]);
if( (op = *opp) == 0 ) {
- if (!GC_is_initialized) {
- GC_init_inner();
- return(GC_generic_malloc_inner(lb, k));
- }
- GC_clear_stack();
+# ifdef MERGE_SIZES
+ if (GC_size_map[lb] == 0) {
+ if (!GC_is_initialized) GC_init_inner();
+ if (GC_size_map[lb] == 0) GC_extend_size_map(lb);
+ return(GC_generic_malloc_inner(lb, k));
+ }
+# else
+ if (!GC_is_initialized) {
+ GC_init_inner();
+ return(GC_generic_malloc_inner(lb, k));
+ }
+# endif
op = GC_allocobj(lw, k);
if (op == 0) goto out;
}
if (!GC_is_initialized) {
GC_init_inner();
}
- GC_clear_stack();
- op = GC_allocobj(lw, k);
+ op = GC_clear_stack(GC_allocobj(lw, k));
if (op == 0) goto out;
}
*opp = obj_link(op);
return((ptr_t)op);
}
+#define GENERAL_MALLOC(lb,k) \
+ (extern_ptr_t)GC_clear_stack(GC_generic_malloc((word)lb, k))
+/* We make the GC_clear_stack_call a tail call, hoping to get more of */
+/* the stack. */
+
/* Allocate lb bytes of atomic (pointerfree) data */
# ifdef __STDC__
extern_ptr_t GC_malloc_atomic(size_t lb)
FASTLOCK();
if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
FASTUNLOCK();
- return(GC_generic_malloc((word)lb, PTRFREE));
+ return(GENERAL_MALLOC((word)lb, PTRFREE));
}
/* See above comment on signals. */
*opp = obj_link(op);
FASTUNLOCK();
return((extern_ptr_t) op);
} else {
- return((extern_ptr_t)
- GC_generic_malloc((word)lb, PTRFREE));
+ return(GENERAL_MALLOC((word)lb, PTRFREE));
}
}
FASTLOCK();
if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
FASTUNLOCK();
- return(GC_generic_malloc((word)lb, NORMAL));
+ return(GENERAL_MALLOC((word)lb, NORMAL));
}
/* See above comment on signals. */
*opp = obj_link(op);
FASTUNLOCK();
return((extern_ptr_t) op);
} else {
- return((extern_ptr_t)
- GC_generic_malloc((word)lb, NORMAL));
+ return(GENERAL_MALLOC((word)lb, NORMAL));
}
}
case STUBBORN:
return(GC_malloc_stubborn((size_t)lb));
# endif
+ case PTRFREE:
+ return(GC_malloc_atomic((size_t)lb));
+ case NORMAL:
+ return(GC_malloc((size_t)lb));
case UNCOLLECTABLE:
return(GC_malloc_uncollectable((size_t)lb));
default:
void GC_initiate_full()
{
# ifdef PRINTSTATS
- GC_printf2("Full mark for collection %lu after %ld allocd bytes\n",
+ GC_printf2("***>Full mark for collection %lu after %ld allocd bytes\n",
(unsigned long) GC_gc_no+1,
(long)WORDS_TO_BYTES(GC_words_allocd));
# endif
/* Initiate partial marking. */
/*ARGSUSED*/
-void GC_initiate_partial(gc_no)
-word gc_no;
+void GC_initiate_partial()
{
-# ifdef PRINTSTATS
- if (gc_no > GC_gc_no) {
- GC_printf2("Partial mark for collection %lu after %ld allocd bytes\n",
- (unsigned long) gc_no,
- (long)WORDS_TO_BYTES(GC_words_allocd));
- } /* else the world is stopped, and we just printed this */
-# endif
if (GC_incremental) GC_read_dirty();
# ifdef STUBBORN_ALLOC
GC_read_changed();
(unsigned long)GC_n_rescuing_pages);
# endif
GC_push_roots(FALSE);
+ GC_objects_are_marked = TRUE;
if (GC_mark_state != MS_INVALID) {
GC_mark_state = MS_ROOTS_PUSHED;
}
scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
if (scan_ptr == 0) {
GC_push_roots(TRUE);
+ GC_objects_are_marked = TRUE;
if (GC_mark_state != MS_INVALID) {
GC_mark_state = MS_ROOTS_PUSHED;
}
scan_ptr = GC_push_next_marked(scan_ptr);
if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
GC_push_roots(TRUE);
+ GC_objects_are_marked = TRUE;
if (GC_mark_state != MS_INVALID) {
GC_mark_state = MS_ROOTS_PUSHED;
}
return(msp-INITIAL_MARK_STACK_SIZE/8);
}
msp -> mse_start = addr;
- msp -> mse_end = addr + sz;
+# ifdef ALL_INTERIOR_POINTERS
+ /* Last word can't possibly contain pointers, since we */
+ /* pad the size by a byte. */
+ msp -> mse_end = addr + sz - 1;
+# else
+ msp -> mse_end = addr + sz;
+# endif
# ifdef GATHERSTATS
GC_composite_in_use += sz;
# endif
if (GC_mark_stack_size != 0) {
if (new_stack != 0) {
+ word displ = HBLKDISPL(GC_mark_stack);
+ word size = GC_mark_stack_size * sizeof(struct ms_entry);
+
/* Recycle old space */
- GC_add_to_heap((struct hblk *)GC_mark_stack,
- GC_mark_stack_size * sizeof(struct ms_entry));
+ if (displ == 0) {
+ GC_add_to_heap((struct hblk *)GC_mark_stack, size);
+ } else {
+ GC_add_to_heap((struct hblk *)
+ ((word)GC_mark_stack - displ + HBLKSIZE),
+ size - HBLKSIZE);
+ }
GC_mark_stack = new_stack;
GC_mark_stack_size = n;
}
# ifdef GATHERSTATS
GC_n_rescuing_pages++;
# endif
+ GC_objects_are_marked = TRUE;
if (sz > MAXOBJSZ) {
lim = (word *)(h + 1);
} else {
/* Is a range starting at b already in the table? If so return a */
/* pointer to it, else NIL. */
-static struct roots * roots_present(b)
+struct roots * GC_roots_present(b)
char *b;
{
register int h = rt_hash(b);
} else if ((ptr_t)b < endGC_arrays && (ptr_t)e > endGC_arrays) {
b = (char *)endGC_arrays;
}
- old = roots_present(b);
+ old = GC_roots_present(b);
if (old != 0) {
if ((ptr_t)e <= old -> r_end) /* already there */ return;
/* else extend */
.globl GC_push_regs
.ent GC_push_regs
GC_push_regs:
- subu $sp,4 ## Need to save only return address
+ subu $sp,8 ## Need to save only return address
sw $31,4($sp)
- .mask 0x80000000,0
- .frame $sp,4,$31
+ .mask 0x80000000,-4
+ .frame $sp,8,$31
call_push($2)
call_push($3)
call_push($16)
call_push($23)
call_push($30)
lw $31,4($sp)
- addu $sp,4
+ addu $sp,8
j $31
.end GC_push_regs
* Permission is hereby granted to copy this garbage collector for any purpose,
* provided the above notices are retained on all copies.
*/
+/* Boehm, December 20, 1993 3:06 pm PST */
#define DEBUG /* Some run-time consistency checks */
#undef DEBUG
void GC_init_size_map()
{
register unsigned i;
- register unsigned sz_rounded_up = 0;
/* Map size 0 to 1. This avoids problems at lower levels. */
GC_size_map[0] = 1;
GC_size_map[i] = ROUNDED_UP_WORDS(i);
# endif
}
-
- for (i = 8*sizeof(word)+1; i <= WORDS_TO_BYTES(MAXOBJSZ); i++) {
- if (sz_rounded_up < ROUNDED_UP_WORDS(i)) {
- register int size = ROUNDED_UP_WORDS(i);
- register unsigned m = 0;
-
- while (size > 7) {
- m += 1;
- size += 1;
- size >>= 1;
- }
- sz_rounded_up = size << m;
- if (sz_rounded_up > MAXOBJSZ) {
- sz_rounded_up = MAXOBJSZ;
- }
- }
- GC_size_map[i] = sz_rounded_up;
+ /* We leave the rest of the array to be filled in on demand. */
+ }
+
+ /* Fill in additional entries in GC_size_map, including the ith one */
+ /* We assume the ith entry is currently 0. */
+ /* Note that a filled in section of the array ending at n always */
+ /* has length at least n/4. */
+ void GC_extend_size_map(i)
+ word i;
+ {
+ word orig_word_sz = ROUNDED_UP_WORDS(i);
+ word word_sz = orig_word_sz;
+ register word byte_sz = WORDS_TO_BYTES(word_sz);
+ /* The size we try to preserve. */
+ /* Close to to i, unless this would */
+ /* introduce too many distinct sizes. */
+ word smaller_than_i = byte_sz - (byte_sz >> 3);
+ word much_smaller_than_i = byte_sz - (byte_sz >> 2);
+ register word low_limit; /* The lowest indexed entry we */
+ /* initialize. */
+ register int j;
+
+ if (GC_size_map[smaller_than_i] == 0) {
+ low_limit = much_smaller_than_i;
+ while (GC_size_map[low_limit] != 0) low_limit++;
+ } else {
+ low_limit = smaller_than_i + 1;
+ while (GC_size_map[low_limit] != 0) low_limit++;
+ word_sz = ROUNDED_UP_WORDS(low_limit);
+ word_sz += word_sz >> 3;
+ if (word_sz < orig_word_sz) word_sz = orig_word_sz;
+ }
+# ifdef ALIGN_DOUBLE
+ word_sz += 1;
+ word_sz &= ~1;
+# endif
+ if (word_sz > MAXOBJSZ) {
+ word_sz = MAXOBJSZ;
}
+ byte_sz = WORDS_TO_BYTES(word_sz);
+# ifdef ALL_INTERIOR_POINTERS
+ /* We need one extra byte; don't fill in GC_size_map[byte_sz] */
+ byte_sz--;
+# endif
+
+ for (j = low_limit; j <= byte_sz; j++) GC_size_map[j] = word_sz;
}
# endif
+
/*
* The following is a gross hack to deal with a problem that can occur
* on machines that are sloppy about stack frame sizes, notably SPARC.
* Bogus pointers may be written to the stack and not cleared for
* a LONG time, because they always fall into holes in stack frames
- * that are not written. We partially address this by randomly clearing
+ * that are not written. We partially address this by clearing
* sections of the stack whenever we get control.
*/
word GC_stack_last_cleared = 0; /* GC_no when we last did this */
# define CLEAR_SIZE 213
-# define CLEAR_THRESHOLD 10000
# define DEGRADE_RATE 50
-ptr_t GC_min_sp; /* Coolest stack pointer value from which we've */
+word GC_min_sp; /* Coolest stack pointer value from which we've */
/* already cleared the stack. */
# ifdef STACK_GROWS_DOWN
# define COOLER_THAN >
# define HOTTER_THAN <
# define MAKE_COOLER(x,y) if ((word)(x)+(y) > (word)(x)) {(x) += (y);} \
- else {(x) = (ptr_t)ONES;}
+ else {(x) = (word)ONES;}
# define MAKE_HOTTER(x,y) (x) -= (y)
# else
# define COOLER_THAN <
# define MAKE_HOTTER(x,y) (x) += (y)
# endif
-ptr_t GC_high_water;
+word GC_high_water;
/* "hottest" stack pointer value we have seen */
/* recently. Degrades over time. */
+
+word GC_stack_upper_bound()
+{
+ word dummy;
+
+ return((word)(&dummy));
+}
+
+word GC_words_allocd_at_reset;
+
+#if defined(ASM_CLEAR_CODE) && !defined(THREADS)
+ extern ptr_t GC_clear_stack_inner();
+#endif
+
+#if !defined(ASM_CLEAR_CODE) && !defined(THREADS)
+/* Clear the stack up to about limit. Return arg. */
/*ARGSUSED*/
-void GC_clear_stack_inner(d)
-word *d;
+ptr_t GC_clear_stack_inner(arg, limit)
+ptr_t arg;
+word limit;
{
word dummy[CLEAR_SIZE];
bzero((char *)dummy, (int)(CLEAR_SIZE*sizeof(word)));
-# ifdef THREADS
- GC_noop(dummy);
-# else
- if ((ptr_t)(dummy) COOLER_THAN GC_min_sp) {
- GC_clear_stack_inner(dummy);
- }
-# endif
+ if ((word)(dummy) COOLER_THAN limit) {
+ (void) GC_clear_stack_inner(arg, limit);
+ }
+ /* Make sure the recursive call is not a tail call, and the bzero */
+ /* call is not recognized as dead code. */
+ GC_noop(dummy);
+ return(arg);
}
-
-void GC_clear_stack()
-{
- word dummy;
+#endif
+/* Clear some of the inaccessible part of the stack. Returns its */
+/* argument, so it can be used in a tail call position, hence clearing */
+/* another frame. */
+ptr_t GC_clear_stack(arg)
+ptr_t arg;
+{
+ register word sp = GC_stack_upper_bound();
+ register word limit;
+# ifdef THREADS
+ word dummy[CLEAR_SIZE];;
+# endif
+
+# define SLOP 200
+ /* Extra bytes we clear every time. This clears our own */
+ /* activation record, and should cause more frequent */
+ /* clearing near the cold end of the stack, a good thing. */
+# define CLEAR_THRESHOLD 100000
+ /* We restart the clearing process after this many bytes of */
+ /* allocation. Otherwise very heavily recursive programs */
+ /* with sparse stacks may result in heaps that grow almost */
+ /* without bounds. As the heap gets larger, collection */
+ /* frequency decreases, thus clearing frequency would decrease, */
+ /* thus more junk remains accessible, thus the heap gets */
+ /* larger ... */
# ifdef THREADS
- GC_clear_stack_inner(&dummy);
+ bzero((char *)dummy, (int)(CLEAR_SIZE*sizeof(word)));
# else
if (GC_gc_no > GC_stack_last_cleared) {
/* Start things over, so we clear the entire stack again */
- if (GC_stack_last_cleared == 0) GC_high_water = GC_stackbottom;
+ if (GC_stack_last_cleared == 0) GC_high_water = (word) GC_stackbottom;
GC_min_sp = GC_high_water;
GC_stack_last_cleared = GC_gc_no;
+ GC_words_allocd_at_reset = GC_words_allocd;
}
/* Adjust GC_high_water */
MAKE_COOLER(GC_high_water, WORDS_TO_BYTES(DEGRADE_RATE));
- if ((word)(&dummy) HOTTER_THAN (word)GC_high_water) {
- GC_high_water = (ptr_t)(&dummy);
+ if (sp HOTTER_THAN GC_high_water) {
+ GC_high_water = sp;
}
- if ((word)(&dummy) COOLER_THAN (word)GC_min_sp) {
- GC_clear_stack_inner(&dummy);
- GC_min_sp = (ptr_t)(&dummy);
- }
+ if (sp COOLER_THAN GC_min_sp) {
+ limit = GC_min_sp;
+ MAKE_HOTTER(limit, SLOP);
+ limit &= ~0xf; /* Make it sufficiently aligned for assembly */
+ /* implementations of GC_clear_stack_inner. */
+ GC_min_sp = sp;
+ return(GC_clear_stack_inner(arg, limit));
+ } else if (WORDS_TO_BYTES(GC_words_allocd_at_reset - GC_words_allocd)
+ > CLEAR_THRESHOLD) {
+ /* Restart clearing process, but limit how much clearing we do. */
+ GC_min_sp = sp;
+ MAKE_HOTTER(GC_min_sp, CLEAR_THRESHOLD/4);
+ if (GC_min_sp HOTTER_THAN GC_high_water) GC_min_sp = GC_high_water;
+ GC_words_allocd_at_reset = GC_words_allocd;
+ }
# endif
+ return(arg);
}
}
# endif
if (sizeof (ptr_t) != sizeof(word)) {
- GC_err_printf0("sizeof (ptr_t) != sizeof(word)\n");
ABORT("sizeof (ptr_t) != sizeof(word)\n");
}
if (sizeof (signed_word) != sizeof(word)) {
- GC_err_printf0("sizeof (signed_word) != sizeof(word)\n");
ABORT("sizeof (signed_word) != sizeof(word)\n");
}
if (sizeof (struct hblk) != HBLKSIZE) {
- GC_err_printf0("sizeof (struct hblk) != HBLKSIZE\n");
ABORT("sizeof (struct hblk) != HBLKSIZE\n");
}
# ifndef THREADS
# if defined(STACK_GROWS_UP) && defined(STACK_GROWS_DOWN)
- GC_err_printf0(
+ ABORT(
"Only one of STACK_GROWS_UP and STACK_GROWS_DOWN should be defd\n");
- ABORT("stack direction 1\n");
# endif
# if !defined(STACK_GROWS_UP) && !defined(STACK_GROWS_DOWN)
- GC_err_printf0(
+ ABORT(
"One of STACK_GROWS_UP and STACK_GROWS_DOWN should be defd\n");
- ABORT("stack direction 2\n");
# endif
# ifdef STACK_GROWS_DOWN
if ((word)(&dummy) > (word)GC_stackbottom) {
}
GC_init_headers();
+ /* Add initial guess of root sets */
+ GC_register_data_segments();
GC_bl_init();
GC_mark_init();
if (!GC_expand_hp_inner((word)MINHINCR)) {
# ifdef MERGE_SIZES
GC_init_size_map();
# endif
- /* Add initial guess of root sets */
- GC_register_data_segments();
# ifdef PCR
PCR_IL_Lock(PCR_Bool_false, PCR_allSigsBlocked, PCR_waitForever);
PCR_IL_Unlock();
if (!GC_is_initialized) {
GC_init_inner();
}
+ if (GC_dont_gc) {
+ /* Can't easily do it. */
+ UNLOCK();
+ ENABLE_SIGNALS();
+ return;
+ }
if (GC_words_allocd > 0) {
- /* There may be unmarked reachable objects */
+ /* There may be unmarked reachable objects */
GC_gcollect_inner();
- } /* else we're OK in assumeing everything's */
- /* clean since nothing can point to an */
- /* unmarked object. */
+ } /* else we're OK in assuming everything's */
+ /* clean since nothing can point to an */
+ /* unmarked object. */
GC_dirty_init();
GC_read_dirty();
GC_incremental = TRUE;
* ptr_t GC_build_flXXX(h, old_fl)
* void GC_new_hblk(n)
*/
+/* Boehm, December 17, 1993 11:53 am PST */
# include <stdio.h>
# ifdef PRINTSTATS
if ((sizeof (struct hblk)) > HBLKSIZE) {
- abort("HBLK SZ inconsistency");
+ ABORT("HBLK SZ inconsistency");
}
# endif
* Permission is hereby granted to copy this garbage collector for any purpose,
* provided the above notices are retained on all copies.
*/
+/* Boehm, December 16, 1993 4:25 pm PST */
+# if !defined(OS2) && !defined(PCR) && !defined(AMIGA)
+# include <sys/types.h>
+# endif
# include "gc_private.h"
# include <stdio.h>
# include <signal.h>
# include <workbench/startup.h>
#endif
+#ifdef IRIX5
+# include <sys/uio.h>
+#endif
+
# ifdef OS2
+# ifndef __IBMC__ /* e.g. EMX */
+
+struct exe_hdr {
+ unsigned short magic_number;
+ unsigned short padding[29];
+ long new_exe_offset;
+};
+
+#define E_MAGIC(x) (x).magic_number
+#define EMAGIC 0x5A4D
+#define E_LFANEW(x) (x).new_exe_offset
+
+struct e32_exe {
+ unsigned char magic_number[2];
+ unsigned char byte_order;
+ unsigned char word_order;
+ unsigned long exe_format_level;
+ unsigned short cpu;
+ unsigned short os;
+ unsigned long padding1[13];
+ unsigned long object_table_offset;
+ unsigned long object_count;
+ unsigned long padding2[31];
+};
+
+#define E32_MAGIC1(x) (x).magic_number[0]
+#define E32MAGIC1 'L'
+#define E32_MAGIC2(x) (x).magic_number[1]
+#define E32MAGIC2 'X'
+#define E32_BORDER(x) (x).byte_order
+#define E32LEBO 0
+#define E32_WORDER(x) (x).word_order
+#define E32LEWO 0
+#define E32_CPU(x) (x).cpu
+#define E32CPU286 1
+#define E32_OBJTAB(x) (x).object_table_offset
+#define E32_OBJCNT(x) (x).object_count
+
+struct o32_obj {
+ unsigned long size;
+ unsigned long base;
+ unsigned long flags;
+ unsigned long pagemap;
+ unsigned long mapsize;
+ unsigned long reserved;
+};
+
+#define O32_FLAGS(x) (x).flags
+#define OBJREAD 0x0001L
+#define OBJWRITE 0x0002L
+#define OBJINVALID 0x0080L
+#define O32_SIZE(x) (x).size
+#define O32_BASE(x) (x).base
+
+# else /* IBM's compiler */
+
# define INCL_DOSEXCEPTIONS
# define INCL_DOSPROCESS
# define INCL_DOSERRORS
# include <newexe.h>
# include <exe386.h>
+# endif /* __IBMC__ */
+
/* Disable and enable signals during nontrivial allocations */
void GC_disable_signals(void)
extern int end;
# endif
-# if !defined(PCR) && !defined(SRC_M3) && !defined(NEXT)
+# if !defined(PCR) && !defined(SRC_M3) && !defined(NEXT)
GC_add_roots_inner(DATASTART, (char *)(&end));
# endif
# if !defined(PCR) && defined(NEXT)
word GC_page_size;
+bool GC_just_outside_heap(addr)
+word addr;
+{
+ register int i;
+ register word start;
+ register word end;
+ word mask = GC_page_size-1;
+
+ for (i = 0; i < GC_n_heap_sects; i++) {
+ start = (word) GC_heap_sects[i].hs_start;
+ end = start + (word)GC_heap_sects[i].hs_bytes;
+ if (addr < start && addr >= (start & ~mask)
+ || addr >= end && addr < ((end + mask) & ~mask)) {
+ return(TRUE);
+ }
+ }
+ return(FALSE);
+}
+
/*ARGSUSED*/
# ifdef SUNOS4
void GC_write_fault_handler(sig, code, scp, addr)
register struct hblk * h =
(struct hblk *)((word)addr & ~(GC_page_size-1));
- for (i = 0; i < GC_page_size/HBLKSIZE; i++) {
+ if (HDR(addr) == 0 && !GC_just_outside_heap((word)addr)) {
+ ABORT("Unexpected bus error or segmentation fault");
+ }
+ for (i = 0; i < divHBLKSZ(GC_page_size); i++) {
register int index = PHT_HASH(h+i);
- if (HDR(h+i) == 0) {
- ABORT("Unexpected bus error or segmentation fault");
- }
set_pht_entry_from_index(GC_dirty_pages, index);
}
if (mprotect((caddr_t)h, (int)GC_page_size,
GC_begin_syscall();
GC_unprotect_range(buf, (word)nbyte);
- result = syscall(SYS_read, fd, buf, nbyte);
+# ifdef IRIX5
+ /* Indirect system call exists, but is undocumented, and */
+ /* always seems to return EINVAL. There seems to be no */
+ /* general way to wrap system calls, since the system call */
+ /* convention appears to require an immediate argument for */
+ /* the system call number, and building the required code */
+ /* in the data segment also seems dangerous. We can fake it */
+ /* for read; anything else is up to the client. */
+ {
+ struct iovec iov;
+
+ iov.iov_base = buf;
+ iov.iov_len = nbyte;
+ result = readv(fd, &iov, 1);
+ }
+# else
+ result = syscall(SYS_read, fd, buf, nbyte);
+# endif
GC_end_syscall();
return(result);
}
void GC_dirty_init()
{
int fd;
- char buf[20];
+ char buf[30];
sprintf(buf, "/proc/%d", getpid());
fd = open(buf, O_RDONLY);
--- /dev/null
+! SPARCompiler 3.0 and later apparently no loner handles
+! asm outside functions. So we need a separate .s file
+! This is only set up for SunOS 5, not SunOS 4.
+! Assumes this is called before the stack contents are
+! examined.
+
+ .seg "text"
+ .globl GC_save_regs_in_stack
+ .globl GC_push_regs
+GC_save_regs_in_stack:
+GC_push_regs:
+ ta 0x3 ! ST_FLUSH_WINDOWS
+ mov %sp,%o0
+ retl
+ nop
+
+ .globl GC_clear_stack_inner
+GC_clear_stack_inner:
+ mov %sp,%o2 ! Save sp
+ add %sp,-8,%o3 ! p = sp-8
+ clr %g1 ! [g0,g1] = 0
+ add %o1,-0x60,%sp ! Move sp out of the way,
+ ! so that traps still work.
+ ! Includes some extra words
+ ! so we can be sloppy below.
+loop:
+ std %g0,[%o3] ! *(long long *)p = 0
+ cmp %o3,%o1
+ bgu loop ! if (p > limit) goto loop
+ add %o3,-8,%o3 ! p -= 8 (delay slot)
+ retl
+ mov %o2,%sp ! Restore sp., delay slot
+
+
+
+
+
+
\ No newline at end of file
# define SMALL_OBJ(bytes) ((bytes) <= WORDS_TO_BYTES(MAXOBJSZ))
# endif
+extern ptr_t GC_clear_stack(); /* in misc.c, behaves like identity */
+
+#define GENERAL_MALLOC(lb,k) \
+ (extern_ptr_t)GC_clear_stack(GC_generic_malloc((word)lb, k))
+
/* Data structure representing immutable objects that */
/* are still being initialized. */
/* This is a bit baroque in order to avoid acquiring */
register ptr_t op;
register ptr_t *opp;
register word lw;
-extern_ptr_t result;
+ptr_t result;
DCL_LOCK_STATE;
if( SMALL_OBJ(lb) ) {
result = (extern_ptr_t) op;
ADD_CHANGING(result);
FASTUNLOCK();
- return(result);
+ return((extern_ptr_t)result);
} else {
result = (extern_ptr_t)
GC_generic_malloc((word)lb, STUBBORN);
ADD_CHANGING(result);
UNLOCK();
ENABLE_SIGNALS();
- return(result);
+ return((extern_ptr_t)GC_clear_stack(result));
}
/* An incomplete test for the garbage collector. */
/* Some more obscure entry points are not tested at all. */
+/* Boehm, November 24, 1993 5:14 pm PST */
# include <stdlib.h>
# include <stdio.h>
# include "gc.h"
# define FAR
# endif
-# define FAIL abort()
+# define FAIL (void)abort()
/* AT_END may be defined to excercise the interior pointer test */
/* if the collector is configured with ALL_INTERIOR_POINTERS. */
}
for (i = 0; i < 60; i++) {
/* This maintains the invariant that a always points to a list of */
- /* 100 integers. Thus this is thread safe without locks. */
+ /* 49 integers. Thus this is thread safe without locks. */
a = reverse(reverse(a));
# if !defined(AT_END) && !defined(PCR)
/* This is not thread safe, since realloc explicitly deallocates */
{
DCL_LOCK_STATE;
+# ifndef GC_DEBUG
+ if (GC_size(GC_MALLOC(7)) != 8
+ || GC_size(GC_MALLOC(15)) != 16) {
+ (void)printf ("GC_size produced unexpected results\n");
+ FAIL;
+ }
+# endif
reverse_test();
tree_test();
LOCK();