查找相同文件

#!/usr/bin/perl

# find similar file

use strict;
use Digest::MD5;
use Benchmark;
my $TT0 = new Benchmark;

my $error_file = "error.txt";
my $clone_report_file = "clone_report.txt";

my @error;
my @path_list;
if ($ARGV[0] =~ /^(-h|-\?|\/\?|-help)$/i or !@ARGV) {
        print "\n Usage: find_clone [path1] [path2] [path3] ... \n\n Press Enter to close me ... \7";
         <STDIN>;
         exit;
}
else {
        foreach (@ARGV) {
                 s/\\/\//g;
                 chop if (/.*\/$/);
                 if (! -d $_) {
                          print " The path ($_) does not exist! Please check!\n\7";
                          push (@error,"The path ($_) does not exist! Please check!");
                  }
                  else { push(@path_list,$_); }
        }
        if (!@path_list) {
                 print "\n The path(s) you input are invalid, try again please!\n\n Press Enter to close me ... \7";
                 <STDIN>;
                 exit;
        }
}

$| = 1;
print "\n Scanning the path(s) ... ";
my @file_list;
foreach (@path_list) { readsub($_); }
print "Done!\n\n Analyzing ... \n\n";
my @progress = ('-','-','-','\\','\\','\\','|','|','|','/','/','/');
my $s = 0;
my $n = 1;
my %clone;
foreach my $file(@file_list) {
        eval{
          
         my $percent = int($n/($#file_list+1)*100);
         if ($percent >= 99.5) {print  "\r ",$#file_list+1," file(s) analyzed! 100 % Done!"; }
         else { print  "\r $progress[$s]  $percent %"; }
         open(FILE, $file) or push(@error,"$file\t$!");
         binmode(FILE);
         my $md5 = Digest::MD5->new->addfile(*FILE)->hexdigest();
close FILE;
         push(@{$clone{$md5}},$file);
         $s = ($s>=$#progress)? 0:$s+1;
         $n++;
      
   };
}

my $clone;
open FH, ">$clone_report_file";
foreach (keys %clone) {
        if (@{$clone{$_}} > 1) {
                 foreach(@{$clone{$_}}) {
                         my @stat = stat($_);
                         print FH time_transfer($stat[9]),"\t$_\n";
                 }
                 print FH "\n";
                 $clone++;
         }
}
if ($clone) { print qq~\n\n $clone group(s) clone files found, details see "$clone_report_file"!\n~; }
else {
        print qq~\n\n No clone files found!\n~;
        print FH qq~No clone files found!~;
}
close FH;

if (@error) {
        open FH,">$error_file";
        foreach (@error) { print FH "$_\n"; }
         close FH;
         print "\n ",$#error+1," error(s) ocurred, details please see \"$error_file\"!";
}
else { print qq~\n No error ocurred!~; }

my $TT1 = new Benchmark;
my $td = Benchmark::timediff($TT1, $TT0);
$td = Benchmark::timestr($td);
print "\n\n Time expend: $td\n\n Press Enter to close me ... \7";

<STDIN>;

sub readsub
{
        my $file_t = shift;
        if (-f $file_t && $file =~ /\.jar$/)
        {
                push(@file_list,$file_t);
        }
        if (-d $file_t) {
                opendir(AA,$file_t);
                my @list = readdir(AA);
                close (AA);
                my $file_to_act;
                foreach $file_to_act (sort @list)
                {
                        if ($file_to_act =~ /^\.|\.$/) { next; }
                        else
                        {
                                readsub("$file_t/$file_to_act");
                        }
                }
        }
}

sub time_transfer {
        my $time_str = shift;
        my ($sec,$min,$hour,$day,$mon,$year,$weekday,$yeardate,$savinglightday) = (localtime($time_str));
        #$sec = ($sec < 10)? "0$sec":$sec;
        $min = ($min < 10)? "0$min":$min;
        $hour = ($hour < 10)? "0$hour":$hour;
        $day = ($day < 10)? "0$day":$day;
        $mon = ($mon < 9)? "0".($mon+1):($mon+1);
        $year += 1900;
        return("$year-$mon-$day $hour:$min");
}

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章