From 5b0c9d3603b027e022b8a0ebd4c492469a5d1633 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 23 Jun 2005 00:06:37 +0000 Subject: [PATCH] Cleanup the contrib/lo module: there is no need anymore to implement a physically separate type. Defining 'lo' as a domain over OID works just fine and is more efficient. Improve documentation and fix up the test script. (Would like to turn test script into a proper regression test, but right now its output is not constant because of numeric OIDs; plus it makes Unix-specific assumptions about files it can import.) --- contrib/lo/README.lo | 67 ++++++++++------ contrib/lo/lo.c | 128 +++---------------------------- contrib/lo/lo.sql.in | 63 ++++----------- contrib/lo/lo_drop.sql | 7 +- contrib/lo/lo_test.sql | 23 +++--- contrib/vacuumlo/README.vacuumlo | 6 +- 6 files changed, 91 insertions(+), 203 deletions(-) diff --git a/contrib/lo/README.lo b/contrib/lo/README.lo index aa06adf084..a7b99940f2 100644 --- a/contrib/lo/README.lo +++ b/contrib/lo/README.lo @@ -8,57 +8,77 @@ also), is that the specification assumes that references to BLOBS (Binary Large OBjectS) are stored within a table, and if that entry is changed, the associated BLOB is deleted from the database. -As PostgreSQL stands, this doesn't occur. It allocates an OID for each object, -and it is up to the application to store, and ultimately delete the objects. - -Now this is fine for new postgresql specific applications, but existing ones -using JDBC or ODBC wont delete the objects, arising to orphaning - objects +As PostgreSQL stands, this doesn't occur. Large objects are treated as +objects in their own right; a table entry can reference a large object by +OID, but there can be multiple table entries referencing the same large +object OID, so the system doesn't delete the large object just because you +change or remove one such entry. + +Now this is fine for new PostgreSQL-specific applications, but existing ones +using JDBC or ODBC won't delete the objects, resulting in orphaning - objects that are not referenced by anything, and simply occupy disk space. + The Fix I've fixed this by creating a new data type 'lo', some support functions, and -a Trigger which handles the orphaning problem. +a Trigger which handles the orphaning problem. The trigger essentially just +does a 'lo_unlink' whenever you delete or modify a value referencing a large +object. When you use this trigger, you are assuming that there is only one +database reference to any large object that is referenced in a +trigger-controlled column! + +The 'lo' type was created because we needed to differentiate between plain +OIDs and Large Objects. Currently the JDBC driver handles this dilemma easily, +but (after talking to Byron), the ODBC driver needed a unique type. They had +created an 'lo' type, but not the solution to orphaning. + +You don't actually have to use the 'lo' type to use the trigger, but it may be +convenient to use it to keep track of which columns in your database represent +large objects that you are managing with the trigger. -The 'lo' type was created because we needed to differenciate between normal -Oid's and Large Objects. Currently the JDBC driver handles this dilema easily, -but (after talking to Byron), the ODBC driver needed a unique type. They had created an 'lo' type, but not the solution to orphaning. Install Ok, first build the shared library, and install. Typing 'make install' in the contrib/lo directory should do it. -Then, as the postgres super user, run the lo.sql script. This will install the -type, and define the support functions. +Then, as the postgres super user, run the lo.sql script in any database that +needs the features. This will install the type, and define the support +functions. You can run the script once in template1, and the objects will be +inherited by subsequently-created databases. + How to Use The easiest way is by an example: -> create table image (title text,raster lo); -> create trigger t_image before update or delete on image for each row execute procedure lo_manage(raster); +> create table image (title text, raster lo); +> create trigger t_raster before update or delete on image +> for each row execute procedure lo_manage(raster); -Here, a trigger is created for each column that contains a lo type. +Create a trigger for each column that contains a lo type, and give the column +name as the trigger procedure argument. You can have more than one trigger on +a table if you need multiple lo columns in the same table, but don't forget to +give a different name to each trigger. -Issues -* dropping a table will still orphan any objects it contains, as the trigger - is not actioned. +Issues - For now, precede the 'drop table' with 'delete from {table}'. However, this - could be fixed by having 'drop table' perform an additional +* Dropping a table will still orphan any objects it contains, as the trigger + is not executed. - 'select lo_unlink({colname}::oid) from {tablename}' + Avoid this by preceding the 'drop table' with 'delete from {table}'. - for each column, before actually dropping the table. + If you already have, or suspect you have, orphaned large objects, see + the contrib/vacuumlo module to help you clean them up. It's a good idea + to run contrib/vacuumlo occasionally as a back-stop to the lo_manage + trigger. * Some frontends may create their own tables, and will not create the associated trigger(s). Also, users may not remember (or know) to create the triggers. - This can be solved, but would involve changes to the parser. - As the ODBC driver needs a permanent lo type (& JDBC could be optimised to use it if it's Oid is fixed), and as the above issues can only be fixed by some internal changes, I feel it should become a permanent built-in type. @@ -66,4 +86,3 @@ some internal changes, I feel it should become a permanent built-in type. I'm releasing this into contrib, just to get it out, and tested. Peter Mount June 13 1998 - diff --git a/contrib/lo/lo.c b/contrib/lo/lo.c index bdc9307bc2..8d6ba169a2 100644 --- a/contrib/lo/lo.c +++ b/contrib/lo/lo.c @@ -1,7 +1,7 @@ /* - * PostgreSQL type definitions for managed LargeObjects. + * PostgreSQL definitions for managed Large Objects. * - * $PostgreSQL: pgsql/contrib/lo/lo.c,v 1.14 2003/11/29 19:51:35 pgsql Exp $ + * $PostgreSQL: pgsql/contrib/lo/lo.c,v 1.15 2005/06/23 00:06:37 tgl Exp $ * */ @@ -21,117 +21,12 @@ #define atooid(x) ((Oid) strtoul((x), NULL, 10)) -/* - * This is the internal storage format for managed large objects - * - */ - -typedef Oid Blob; - -/* - * Various forward declarations: - */ - -Blob *lo_in(char *str); /* Create from String */ -char *lo_out(Blob * addr); /* Output oid as String */ -Oid lo_oid(Blob * addr); /* Return oid as an oid */ -Blob *lo(Oid oid); /* Return Blob based on oid */ -Datum lo_manage(PG_FUNCTION_ARGS); /* Trigger handler */ - -/* - * This creates a large object, and sets its OID to the value in the - * supplied string. - * - * If the string is empty, then a new LargeObject is created, and its oid - * is placed in the resulting lo. - */ -Blob * -lo_in(char *str) -{ - Blob *result; - Oid oid; - int count; - - if (strlen(str) > 0) - { - count = sscanf(str, "%u", &oid); - - if (count < 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("error in parsing \"%s\"", str))); - - if (oid == InvalidOid) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("illegal oid: \"%s\"", str))); - } - else - { - /* - * There is no Oid passed, so create a new one - */ - oid = DatumGetObjectId(DirectFunctionCall1(lo_creat, - Int32GetDatum(INV_READ | INV_WRITE))); - if (oid == InvalidOid) - /* internal error */ - elog(ERROR, "InvalidOid returned from lo_creat"); - } - - result = (Blob *) palloc(sizeof(Blob)); - - *result = oid; - - return (result); -} - -/* - * This simply outputs the Oid of the Blob as a string. - */ -char * -lo_out(Blob * addr) -{ - char *result; - - if (addr == NULL) - return (NULL); - - result = (char *) palloc(32); - snprintf(result, 32, "%u", *addr); - return (result); -} - -/* - * This function converts Blob to oid. - * - * eg: select lo_export(raster::oid,'/path/file') from table; - * - */ -Oid -lo_oid(Blob * addr) -{ - if (addr == NULL) - return InvalidOid; - return (Oid) (*addr); -} - -/* - * This function is used so we can convert oid's to lo's - * - * ie: insert into table values(lo_import('/path/file')::lo); - * - */ -Blob * -lo(Oid oid) -{ - Blob *result = (Blob *) palloc(sizeof(Blob)); +/* forward declarations */ +Datum lo_manage(PG_FUNCTION_ARGS); - *result = oid; - return (result); -} /* - * This handles the trigger that protects us from orphaned large objects + * This is the trigger that protects us from orphaned large objects */ PG_FUNCTION_INFO_V1(lo_manage); @@ -144,11 +39,10 @@ lo_manage(PG_FUNCTION_ARGS) TupleDesc tupdesc; /* Tuple Descriptor */ HeapTuple rettuple; /* Tuple to be returned */ bool isdelete; /* are we deleting? */ - HeapTuple newtuple = NULL; /* The new value for tuple */ + HeapTuple newtuple; /* The new value for tuple */ HeapTuple trigtuple; /* The original value of tuple */ - if (!CALLED_AS_TRIGGER(fcinfo)) - /* internal error */ + if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */ elog(ERROR, "not fired by trigger manager"); /* @@ -168,9 +62,12 @@ lo_manage(PG_FUNCTION_ARGS) /* Are we deleting the row? */ isdelete = TRIGGER_FIRED_BY_DELETE(trigdata->tg_event); - /* Get the column were interested in */ + /* Get the column we're interested in */ attnum = SPI_fnumber(tupdesc, args[0]); + if (attnum <= 0) + elog(ERROR, "column \"%s\" does not exist", args[0]); + /* * Handle updates * @@ -182,7 +79,7 @@ lo_manage(PG_FUNCTION_ARGS) char *orig = SPI_getvalue(trigtuple, tupdesc, attnum); char *newv = SPI_getvalue(newtuple, tupdesc, attnum); - if (orig != NULL && (newv == NULL || strcmp(orig, newv))) + if (orig != NULL && (newv == NULL || strcmp(orig, newv) != 0)) DirectFunctionCall1(lo_unlink, ObjectIdGetDatum(atooid(orig))); @@ -196,7 +93,6 @@ lo_manage(PG_FUNCTION_ARGS) * Handle deleting of rows * * Here, we unlink the large object associated with the managed attribute - * */ if (isdelete) { diff --git a/contrib/lo/lo.sql.in b/contrib/lo/lo.sql.in index 7be68453f7..5bf1c7e649 100644 --- a/contrib/lo/lo.sql.in +++ b/contrib/lo/lo.sql.in @@ -1,62 +1,29 @@ -- --- PostgreSQL code for LargeObjects +-- PostgreSQL code for managed Large Objects -- --- $PostgreSQL: pgsql/contrib/lo/lo.sql.in,v 1.12 2005/01/29 22:35:02 tgl Exp $ --- --- --- Create the data type +-- $PostgreSQL: pgsql/contrib/lo/lo.sql.in,v 1.13 2005/06/23 00:06:37 tgl Exp $ -- --- used by the lo type, it takes an oid and returns an lo object - -- Adjust this setting to control where the objects get created. SET search_path = public; -CREATE FUNCTION lo_in(cstring) -RETURNS lo -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - --- used by the lo type, it returns the oid of the object -CREATE FUNCTION lo_out(lo) -RETURNS cstring -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - --- finally the type itself -CREATE TYPE lo ( - INTERNALLENGTH = 4, - EXTERNALLENGTH = variable, - INPUT = lo_in, - OUTPUT = lo_out -); - --- this returns the oid associated with a lo object -CREATE FUNCTION lo_oid(lo) -RETURNS oid -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; +-- +-- Create the data type ... now just a domain over OID +-- --- same function, named to allow it to be used as a type coercion, eg: --- CREATE TABLE a (image lo); --- SELECT image::oid FROM a; --- -CREATE FUNCTION oid(lo) -RETURNS oid -AS 'MODULE_PATHNAME', 'lo_oid' -LANGUAGE C IMMUTABLE STRICT; -CREATE CAST (lo as oid) WITH FUNCTION oid(lo) AS IMPLICIT; +CREATE DOMAIN lo AS pg_catalog.oid; --- this allows us to convert an oid to a managed lo object --- ie: insert into test values (lo_import('/fullpath/file')::lo); -CREATE FUNCTION lo(oid) -RETURNS lo -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; -CREATE CAST (oid as lo) WITH FUNCTION lo(oid) AS IMPLICIT; +-- +-- For backwards compatibility, define a function named lo_oid. +-- +-- The other functions that formerly existed are not needed because +-- the implicit casts between a domain and its underlying type handle them. +-- +CREATE FUNCTION lo_oid(lo) RETURNS pg_catalog.oid AS +'SELECT $1::pg_catalog.oid' LANGUAGE SQL STRICT IMMUTABLE; -- This is used in triggers CREATE FUNCTION lo_manage() -RETURNS trigger +RETURNS pg_catalog.trigger AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/contrib/lo/lo_drop.sql b/contrib/lo/lo_drop.sql index c6c491b2c1..de0281fabd 100644 --- a/contrib/lo/lo_drop.sql +++ b/contrib/lo/lo_drop.sql @@ -1,15 +1,12 @@ -- --- This removes the type (and a test table) +-- This removes the LO type -- It's used just for development -- -- Adjust this setting to control where the objects get created. SET search_path = public; --- remove our test table -DROP TABLE a; - --- now drop the type and associated C functions +-- drop the type and associated functions DROP TYPE lo CASCADE; -- the trigger function has no dependency on the type, so drop separately diff --git a/contrib/lo/lo_test.sql b/contrib/lo/lo_test.sql index 9acc47debd..ba4eca55b8 100644 --- a/contrib/lo/lo_test.sql +++ b/contrib/lo/lo_test.sql @@ -3,6 +3,11 @@ -- -- It's used just for development -- +-- XXX would be nice to turn this into a proper regression test +-- + +-- Check what is in pg_largeobject +SELECT count(DISTINCT loid) FROM pg_largeobject; -- ignore any errors here - simply drop the table if it already exists DROP TABLE a; @@ -11,18 +16,15 @@ DROP TABLE a; CREATE TABLE a (fname name,image lo); -- insert a null object -INSERT INTO a VALUES ('null'); - --- insert an empty large object -INSERT INTO a VALUES ('empty',''); +INSERT INTO a VALUES ('empty'); -- insert a large object based on a file -INSERT INTO a VALUES ('/etc/group',lo_import('/etc/group')::lo); +INSERT INTO a VALUES ('/etc/group', lo_import('/etc/group')::lo); -- now select the table SELECT * FROM a; --- this select also returns an oid based on the lo column +-- check that coercion to plain oid works SELECT *,image::oid from a; -- now test the trigger @@ -32,7 +34,7 @@ FOR EACH ROW EXECUTE PROCEDURE lo_manage(image); -- insert -INSERT INTO a VALUES ('aa',''); +INSERT INTO a VALUES ('aa', lo_import('/etc/hosts')); SELECT * FROM a WHERE fname LIKE 'aa%'; @@ -43,7 +45,7 @@ SELECT * FROM a WHERE fname LIKE 'aa%'; -- update the 'empty' row which should be null -UPDATE a SET image=lo_import('/etc/hosts')::lo +UPDATE a SET image=lo_import('/etc/hosts') WHERE fname='empty'; SELECT * FROM a WHERE fname LIKE 'empty%'; @@ -60,10 +62,13 @@ WHERE fname LIKE 'aa%'; -- This deletes the table contents. Note, if you comment this out, and -- expect the drop table to remove the objects, think again. The trigger --- doesn't get thrown by drop table. +-- doesn't get fired by drop table. DELETE FROM a; -- finally drop the table DROP TABLE a; +-- Check what is in pg_largeobject ... if different from original, trouble +SELECT count(DISTINCT loid) FROM pg_largeobject; + -- end of tests diff --git a/contrib/vacuumlo/README.vacuumlo b/contrib/vacuumlo/README.vacuumlo index b72ecf18e0..560649a654 100644 --- a/contrib/vacuumlo/README.vacuumlo +++ b/contrib/vacuumlo/README.vacuumlo @@ -1,9 +1,13 @@ -$PostgreSQL: pgsql/contrib/vacuumlo/README.vacuumlo,v 1.4 2003/11/29 19:51:36 pgsql Exp $ +$PostgreSQL: pgsql/contrib/vacuumlo/README.vacuumlo,v 1.5 2005/06/23 00:06:37 tgl Exp $ This is a simple utility that will remove any orphaned large objects out of a PostgreSQL database. An orphaned LO is considered to be any LO whose OID does not appear in any OID data column of the database. +If you use this, you may also be interested in the lo_manage trigger in +contrib/lo. lo_manage is useful to try to avoid creating orphaned LOs +in the first place. + Compiling -------- -- 2.40.0