#!/usr/bin/perl # Aleksander Adamowski (s1869) # czw paź 28 22:11:16 CEST 2004 # Znajduje duplikaty obrazkow i zastepuje je symlinkami do oryginalow use strict; use Getopt::Std; use Fcntl ':mode'; use File::Basename; use Data::Dumper; use Digest::MD5; use File::Spec; my $debug_recurse = 0; my $debug = 0; my %fotos_origs; my %fotos_dupes; our($opt_n); getopts('n'); my ($path_origs, $path_dupes); if ($#ARGV >= 1) { ($path_origs, $path_dupes) = @ARGV; } else { print STDERR "Uzycie: $0 [-n] sciezka_do_oryginalow sciezka_do_duplikatow\n"; exit(1); } foreach my $dirname ($path_origs, $path_dupes) { my $mode = (stat($dirname))[2]; if (! S_ISDIR($mode)) { print STDERR "BLAD: $dirname to nie katalog\n"; die("Nieprawidlowy argument. Koniec pracy"); } } descend($path_origs, \&add_to_origs); descend($path_dupes, \&add_to_dupes); #print "======= ORIGS: ========\n"; #print Dumper(\%fotos_origs); #print "======= DUPES: ========\n"; #print Dumper(\%fotos_dupes); sub descend { my $dirname = shift; my $func = shift; #if ($debug) { print "zejscie z funkcja $func do $dirname\n"; } opendir DH, $dirname; my @descend_list; my @process_list; my $entry; my $pathname; while ($entry = readdir(DH)) { if ($entry !~ '^\.$' && $entry !~ '^\.\.$') { $pathname = $dirname.'/'.$entry; if ($debug_recurse) { print "wpis: $pathname\n"; } my $mode = (stat($pathname))[2]; if (S_ISDIR($mode)) { if ($debug_recurse) { print "$pathname to katalog.\n"; } push @descend_list, $pathname; } elsif (-f $pathname && (! readlink($pathname))) { push @process_list, $pathname; if ($debug_recurse) { print "$pathname to plik.\n"; } } } } closedir DH; foreach $pathname (@process_list) { &$func($pathname); } foreach my $subdir (@descend_list) { descend($subdir, $func); } } sub add_to_origs { my $pathname = shift; my $size = (stat($pathname))[7]; my $basename = basename($pathname, ''); if (! defined($fotos_origs{$basename})) { $fotos_origs{$basename} = {}; } $fotos_origs{$basename}{$pathname} = $size; #if ($debug) { print "add_to_origs $pathname.\n"; } } sub add_to_dupes { my $pathname = shift; my $size = (stat($pathname))[7]; my $basename = basename($pathname, ''); my $orig_pathname; my $orig_size; if (defined($fotos_origs{$basename})) { foreach $orig_pathname (keys(%{$fotos_origs{$basename}})) { $orig_size = $fotos_origs{$basename}{$orig_pathname}; if ($debug) { print "$pathname : possible dupe of $orig_pathname\n"; print " $pathname has size $size; \n"; print " $orig_pathname has size $orig_size.\n"; } # Jesli rozmiary rozne, przerwanie petli: if ($size != $orig_size) { next; } open(ORIG, $orig_pathname) or die "Can't open '$orig_pathname': $!"; binmode(ORIG); my $md5_orig = Digest::MD5->new->addfile(*ORIG)->hexdigest; close(ORIG); open(DUPE, $pathname) or die "Can't open '$pathname': $!"; binmode(DUPE); my $md5_dupe = Digest::MD5->new->addfile(*DUPE)->hexdigest; close(ORIG); if ($debug) { print " $pathname has md5 $md5_dupe; \n"; print " $orig_pathname has $md5_orig.\n"; } # Jesli sumy kontrolne MD5 rozne, przerwanie petli: if (($md5_dupe cmp $md5_orig) != 0) { next; } my $abs_pathname = File::Spec->rel2abs($pathname); my $abs_orig_pathname = File::Spec->rel2abs($orig_pathname); if (($abs_pathname cmp $abs_orig_pathname) != 0) { print "$abs_pathname is a dupe of $abs_orig_pathname.\n"; if (!$opt_n) { print "Unlinking $abs_pathname and substituting with a symlink to $abs_orig_pathname.\n"; unlink $abs_pathname; symlink $abs_orig_pathname, $abs_pathname; } } } } #if ($debug) { print "add_to_dupes $pathname.\n"; } }