contrib: update stats/mailmap script

author Jeff King <peff@peff.net>

Wed, 12 Dec 2012 11:41:41 +0000 (06:41 -0500)

committer Junio C Hamano <gitster@pobox.com>

Wed, 12 Dec 2012 19:09:11 +0000 (11:09 -0800)
author Jeff King <peff@peff.net>
Wed, 12 Dec 2012 11:41:41 +0000 (06:41 -0500)
committer Junio C Hamano <gitster@pobox.com>
Wed, 12 Dec 2012 19:09:11 +0000 (11:09 -0800)
diff --git a/contrib/stats/mailmap.pl b/contrib/stats/mailmap.pl

index 4b852e2455bab324e3bd16e02ec712fbacbf34b0..9513f5e35b443c13cead1f73a27b5e76f94e8d66 100755 (executable)
--- a/contrib/stats/mailmap.pl
+++ b/contrib/stats/mailmap.pl
@@ -1,38 +1,70 @@
-#!/usr/bin/perl -w
-my %mailmap = ();
-open I, "<", ".mailmap";
-while (<I>) {
-       chomp;
-       next if /^#/;
-       if (my ($author, $mail) = /^(.*?)\s+<(.+)>$/) {
-               $mailmap{$mail} = $author;
-       }
+#!/usr/bin/perl
+
+use warnings 'all';
+use strict;
+use Getopt::Long;
+
+my $match_emails;
+my $match_names;
+my $order_by = 'count';
+Getopt::Long::Configure(qw(bundling));
+GetOptions(
+       'emails|e!' => \$match_emails,
+       'names|n!'  => \$match_names,
+       'count|c'   => sub { $order_by = 'count' },
+       'time|t'    => sub { $order_by = 'stamp' },
+) or exit 1;
+$match_emails = 1 unless $match_names;
+
+my $email = {};
+my $name = {};
+
+open(my $fh, '-|', "git log --format='%at <%aE> %aN'");
+while(<$fh>) {
+       my ($t, $e, $n) = /(\S+) <(\S+)> (.*)/;
+       mark($email, $e, $n, $t);
+       mark($name, $n, $e, $t);
  }
-close I;
-
-my %mail2author = ();
-open I, "git log --pretty='format:%ae  %an' |";
-while (<I>) {
-       chomp;
-       my ($mail, $author) = split(/\t/, $_);
-       next if exists $mailmap{$mail};
-       $mail2author{$mail} ||= {};
-       $mail2author{$mail}{$author} ||= 0;
-       $mail2author{$mail}{$author}++;
+close($fh);
+
+if ($match_emails) {
+       foreach my $e (dups($email)) {
+               foreach my $n (vals($email->{$e})) {
+                       show($n, $e, $email->{$e}->{$n});
+               }
+               print "\n";
+       }
  }
-close I;
-
-while (my ($mail, $authorcount) = each %mail2author) {
-       # %$authorcount is ($author => $count);
-       # sort and show the names from the most frequent ones.
-       my @names = (map { $_->[0] }
-               sort { $b->[1] <=> $a->[1] }
-               map { [$_, $authorcount->{$_}] }
-               keys %$authorcount);
-       if (1 < @names) {
-               for (@names) {
-                       print "$_ <$mail>\n";
+if ($match_names) {
+       foreach my $n (dups($name)) {
+               foreach my $e (vals($name->{$n})) {
+                       show($n, $e, $name->{$n}->{$e});
                 }
+               print "\n";
         }
  }
+exit 0;
  
+sub mark {
+       my ($h, $k, $v, $t) = @_;
+       my $e = $h->{$k}->{$v} ||= { count => 0, stamp => 0 };
+       $e->{count}++;
+       $e->{stamp} = $t unless $t < $e->{stamp};
+}
+
+sub dups {
+       my $h = shift;
+       return grep { keys($h->{$_}) > 1 } keys($h);
+}
+
+sub vals {
+       my $h = shift;
+       return sort {
+               $h->{$b}->{$order_by} <=> $h->{$a}->{$order_by}
+       } keys($h);
+}
+
+sub show {
+       my ($n, $e, $h) = @_;
+       print "$n <$e> ($h->{$order_by})\n";
+}
author	Jeff King <peff@peff.net>
	Wed, 12 Dec 2012 11:41:41 +0000 (06:41 -0500)
committer	Junio C Hamano <gitster@pobox.com>
	Wed, 12 Dec 2012 19:09:11 +0000 (11:09 -0800)