]> granicus.if.org Git - transmission/commitdiff
(trunk libT) #2955 "verify pieces only when necessary, or when the user requests...
authorJordan Lee <jordan@transmissionbt.com>
Wed, 2 Feb 2011 21:17:16 +0000 (21:17 +0000)
committerJordan Lee <jordan@transmissionbt.com>
Wed, 2 Feb 2011 21:17:16 +0000 (21:17 +0000)
As pointed out by longinus00 and ijuxda, storing per-piece timestamps in the .resume file can involve a lot of overhead. This commit reduces the overhead by adding a couple of optimizations: (1) in cases where *all* or *none* of the files' pieces were checked after the file's mtime, we can safely fold all the pieces' mtimes into a single per-file mtime. (2) since unix time takes up a lot of space when rendered as a benc integer, find a common per-file "baseline" number, then store the pieces' timestamps as offsets from that number.  Also add documentation explaining this new format, and also better explaining the pre-2.20 progress format.

libtransmission/resume.c
libtransmission/torrent.c
libtransmission/torrent.h

index ac46721b5f08eae69672240a0737eb69c40efa95..5d0f13675dd2940c3afd7be3d2f5411b0c3b65b8 100644 (file)
@@ -407,98 +407,172 @@ loadIdleLimits( tr_benc *    dict,
 ***/
 
 static void
-saveProgress( tr_benc *          dict,
-              const tr_torrent * tor )
+saveProgress( tr_benc * dict, const tr_torrent * tor )
 {
-    size_t              i, n;
-    tr_benc *           p;
-    tr_benc *           m;
-    const tr_bitfield * bitfield;
-
-    p = tr_bencDictAdd( dict, KEY_PROGRESS );
-    tr_bencInitDict( p, 2 );
+    tr_benc * l;
+    tr_benc * prog;
+    tr_file_index_t fi;
+    const struct tr_bitfield * bitfield;
+    const tr_info * inf = tr_torrentInfo( tor );
+    const time_t now = tr_time( );
+
+    prog = tr_bencDictAddDict( dict, KEY_PROGRESS, 3 );
+
+    /* add the file/piece check timestamps... */
+    l = tr_bencDictAddList( prog, KEY_PROGRESS_CHECKTIME, inf->fileCount );
+    for( fi=0; fi<inf->fileCount; ++fi )
+    {
+        const tr_piece * p;
+        const tr_piece * pend;
+        time_t oldest_nonzero = now;
+        time_t newest = 0;
+        tr_bool has_zero = FALSE;
+        const time_t mtime = tr_torrentGetFileMTime( tor, fi );
+        const tr_file * f = &inf->files[fi];
+
+        /* get the oldest and newest nonzero timestamps for pieces in this file */
+        for( p=&inf->pieces[f->firstPiece], pend=&inf->pieces[f->lastPiece]; p!=pend; ++p )
+        {
+            if( !p->timeChecked )
+                has_zero = TRUE;
+            else if( oldest_nonzero > p->timeChecked )
+                oldest_nonzero = p->timeChecked;
+            if( newest < p->timeChecked )
+                newest = p->timeChecked;
+        }
 
-    /* add each piece's timeChecked */
-    n = tor->info.pieceCount;
-    m = tr_bencDictAddList( p, KEY_PROGRESS_CHECKTIME, n );
-    for( i=0; i<n; ++i )
-        tr_bencListAddInt( m, tor->info.pieces[i].timeChecked );
+        /* If some of a file's pieces have been checked more recently than
+           the file's mtime, and some lest recently, then that file will
+           have a list containing timestamps for each piece.
+           
+           However, the most common use case is that the file doesn't change
+           after it's downloaded. To reduce overhead in the .resume file,
+           only a single timestamp is saved for the file if *all* or *none*
+           of the pieces were tested more recently than the file's mtime. */
+
+        if( !has_zero && ( mtime <= oldest_nonzero ) ) /* all checked */
+            tr_bencListAddInt( l, oldest_nonzero );
+        else if( newest < mtime ) /* none checked */
+            tr_bencListAddInt( l, newest );
+        else { /* some are checked, some aren't... so list piece by piece */
+            const int offset = oldest_nonzero - 1;
+            tr_benc * ll = tr_bencListAddList( l, 2 + f->lastPiece - f->firstPiece );
+            tr_bencListAddInt( ll, offset );
+            for( p=&inf->pieces[f->firstPiece], pend=&inf->pieces[f->lastPiece]; p!=pend; ++p )
+                tr_bencListAddInt( ll, p->timeChecked ? p->timeChecked - offset : 0 );
+        }
+    }
 
     /* add the progress */
     if( tor->completeness == TR_SEED )
-        tr_bencDictAddStr( p, KEY_PROGRESS_HAVE, "all" );
+        tr_bencDictAddStr( prog, KEY_PROGRESS_HAVE, "all" );
+
+    /* add the pieces bitfield */
     bitfield = tr_cpBlockBitfield( &tor->completion );
-    tr_bencDictAddRaw( p, KEY_PROGRESS_BITFIELD,
-                       bitfield->bits, bitfield->byteCount );
+    tr_bencDictAddRaw( prog, KEY_PROGRESS_BITFIELD, bitfield->bits,
+                                                    bitfield->byteCount );
 }
 
 static uint64_t
-loadProgress( tr_benc *    dict,
-              tr_torrent * tor )
+loadProgress( tr_benc * dict, tr_torrent * tor )
 {
-    size_t    i, n;
-    uint64_t  ret = 0;
-    tr_benc * p;
+    size_t i, n;
+    uint64_t ret = 0;
+    tr_benc * prog;
+    const tr_info * inf = tr_torrentInfo( tor );
 
-    for( i=0, n=tor->info.pieceCount; i<n; ++i )
-        tor->info.pieces[i].timeChecked = 0;
+    for( i=0, n=inf->pieceCount; i<n; ++i )
+        inf->pieces[i].timeChecked = 0;
 
-    if( tr_bencDictFindDict( dict, KEY_PROGRESS, &p ) )
+    if( tr_bencDictFindDict( dict, KEY_PROGRESS, &prog ) )
     {
         const char * err;
         const char * str;
         const uint8_t * raw;
-        size_t          rawlen;
-        tr_benc *       m;
-        int64_t  timeChecked;
+        size_t rawlen;
+        tr_benc * l;
 
-        if( tr_bencDictFindList( p, KEY_PROGRESS_CHECKTIME, &m ) )
-        {
-            /* This key was added in 2.20.
-               Load in the timestamp of when we last checked each piece */
-            for( i=0, n=tor->info.pieceCount; i<n; ++i )
-                if( tr_bencGetInt( tr_bencListChild( m, i ), &timeChecked ) )
-                    tor->info.pieces[i].timeChecked = (time_t)timeChecked;
-        }
-        else if( tr_bencDictFindList( p, KEY_PROGRESS_MTIMES, &m ) )
+        if( tr_bencDictFindList( prog, KEY_PROGRESS_CHECKTIME, &l ) )
         {
-            /* This is how it was done pre-2.20... per file. */
-            for( i=0, n=tr_bencListSize(m); i<n; ++i )
+            /* per-piece timestamps were added in 2.20.
+              
+               If some of a file's pieces have been checked more recently than
+               the file's mtime, and some lest recently, then that file will
+               have a list containing timestamps for each piece.
+              
+               However, the most common use case is that the file doesn't change
+               after it's downloaded. To reduce overhead in the .resume file,
+               only a single timestamp is saved for the file if *all* or *none*
+               of the pieces were tested more recently than the file's mtime. */
+
+            tr_file_index_t fi;
+
+            for( fi=0; fi<inf->fileCount; ++fi )
             {
-                /* get the timestamp of file #i */
-                if( tr_bencGetInt( tr_bencListChild( m, i ), &timeChecked ) )
+                tr_benc * b = tr_bencListChild( l, fi );
+                const tr_file * f = &inf->files[fi];
+                tr_piece * p = &inf->pieces[f->firstPiece];
+                const tr_piece * pend = &inf->pieces[f->lastPiece];
+
+                if( tr_bencIsInt( b ) )
                 {
-                    /* walk through all the pieces that are in that file... */
-                    tr_piece_index_t j;
-                    tr_file * file = &tor->info.files[i];
-                    for( j=file->firstPiece; j<=file->lastPiece; ++j )
+                    int64_t t;
+                    tr_bencGetInt( b, &t );
+                    for( ; p!=pend; ++p )
+                        p->timeChecked = (time_t)t;
+                }
+                else if( tr_bencIsList( b ) )
+                {
+                    int i = 0;
+                    int64_t offset = 0;
+                    const int pieces = f->lastPiece + 1 - f->firstPiece;
+
+                    tr_bencGetInt( tr_bencListChild( b, 0 ), &offset );
+
+                    for( i=0; i<pieces; ++i )
                     {
-                        tr_piece * piece = &tor->info.pieces[j];
-
-                        /* If the piece's timestamp is unset from earlier,
-                         * set it here. */
-                        if( piece->timeChecked == 0 ) 
-                            piece->timeChecked = timeChecked;
-
-                        /* If the piece's timestamp is *newer* timeChecked,
-                         * the piece probably spans more than one file.
-                         * To be safe, let's use the older timestamp. */
-                        if( piece->timeChecked > timeChecked )
-                            piece->timeChecked = timeChecked;
+                        int64_t t = 0;
+                        tr_bencGetInt( tr_bencListChild( b, i+1 ), &t );
+                        inf->pieces[f->firstPiece+i].timeChecked = (time_t)(t + offset);
                     }
                 }
             }
         }
+        else if( tr_bencDictFindList( prog, KEY_PROGRESS_MTIMES, &l ) )
+        {
+            tr_file_index_t fi;
+
+            /* Before 2.20, we stored the files' mtimes in the .resume file.
+               When loading the .resume file, a torrent's file would be flagged
+               as untested if its stored mtime didn't match its real mtime. */
+
+            for( fi=0; fi<inf->fileCount; ++fi )
+            {
+                int64_t t;
+
+                if( tr_bencGetInt( tr_bencListChild( l, fi ), &t ) )
+                {
+                    const tr_file * f = &inf->files[fi];
+                    tr_piece * p = &inf->pieces[f->firstPiece];
+                    const tr_piece * pend = &inf->pieces[f->lastPiece];
+                    const time_t mtime = tr_torrentGetFileMTime( tor, fi );
+                    const time_t timeChecked = mtime==t ? mtime : 0;
+
+                    for( ; p!=pend; ++p )
+                        p->timeChecked = timeChecked;
+                }
+            }
+        }
 
         err = NULL;
-        if( tr_bencDictFindStr( p, KEY_PROGRESS_HAVE, &str ) )
+        if( tr_bencDictFindStr( prog, KEY_PROGRESS_HAVE, &str ) )
         {
             if( !strcmp( str, "all" ) )
                 tr_cpSetHaveAll( &tor->completion );
             else
                 err = "Invalid value for HAVE";
         }
-        else if( tr_bencDictFindRaw( p, KEY_PROGRESS_BITFIELD, &raw, &rawlen ) )
+        else if( tr_bencDictFindRaw( prog, KEY_PROGRESS_BITFIELD, &raw, &rawlen ) )
         {
             tr_bitfield tmp;
             tmp.byteCount = rawlen;
index b9aba8abd68ac23e5f339d1f2c721cbcee474b0a..8a5293fd768c7e945181f9243ea092f1d9f387bb 100644 (file)
@@ -2388,8 +2388,8 @@ tr_torrentCheckPiece( tr_torrent * tor, tr_piece_index_t pieceIndex )
     return pass;
 }
 
-static time_t
-getFileMTime( const tr_torrent * tor, tr_file_index_t i )
+time_t
+tr_torrentGetFileMTime( const tr_torrent * tor, tr_file_index_t i )
 {
     struct stat sb;
     time_t mtime = 0;
@@ -2425,7 +2425,7 @@ tr_torrentPieceNeedsCheck( const tr_torrent * tor, tr_piece_index_t p )
     tr_ioFindFileLocation( tor, p, 0, &f, &unused );
     for( ; f < inf->fileCount && pieceHasFile( p, &inf->files[f] ); ++f )
         if( tr_cpFileIsComplete( &tor->completion, f ) )
-            if( getFileMTime( tor, f ) > inf->pieces[p].timeChecked )
+            if( tr_torrentGetFileMTime( tor, f ) > inf->pieces[p].timeChecked )
                 return TRUE;
 
     return FALSE;
index c49b574a4eed298b2a8336591f56bec026223f8b..9c8bcce5364fdc49daab249d234698885af40b9f 100644 (file)
@@ -429,6 +429,8 @@ tr_bool tr_torrentPieceNeedsCheck( const tr_torrent * tor, tr_piece_index_t piec
  */
 tr_bool tr_torrentCheckPiece( tr_torrent * tor, tr_piece_index_t pieceIndex );
 
+time_t tr_torrentGetFileMTime( const tr_torrent * tor, tr_file_index_t i );
+
 uint64_t tr_torrentGetCurrentSizeOnDisk( const tr_torrent * tor );