#!/usr/bin/perl
# find similar file
use strict;
use Digest::MD5;
use Benchmark;
my $TT0 = new Benchmark;
my $error_file = "error.txt";
my $clone_report_file = "clone_report.txt";
my @error;
my @path_list;
if ($ARGV[0] =~ /^(-h|-\?|\/\?|-help)$/i or !@ARGV) {
print "\n Usage: find_clone [path1] [path2] [path3] ... \n\n Press Enter to close me ... \7";
<STDIN>;
exit;
}
else {
foreach (@ARGV) {
s/\\/\//g;
chop if (/.*\/$/);
if (! -d $_) {
print " The path ($_) does not exist! Please check!\n\7";
push (@error,"The path ($_) does not exist! Please check!");
}
else { push(@path_list,$_); }
}
if (!@path_list) {
print "\n The path(s) you input are invalid, try again please!\n\n Press Enter to close me ... \7";
<STDIN>;
exit;
}
}
$| = 1;
print "\n Scanning the path(s) ... ";
my @file_list;
foreach (@path_list) { readsub($_); }
print "Done!\n\n Analyzing ... \n\n";
my @progress = ('-','-','-','\\','\\','\\','|','|','|','/','/','/');
my $s = 0;
my $n = 1;
my %clone;
foreach my $file(@file_list) {
eval{
my $percent = int($n/($#file_list+1)*100);
if ($percent >= 99.5) {print "\r ",$#file_list+1," file(s) analyzed! 100 % Done!"; }
else { print "\r $progress[$s] $percent %"; }
open(FILE, $file) or push(@error,"$file\t$!");
binmode(FILE);
my $md5 = Digest::MD5->new->addfile(*FILE)->hexdigest();
close FILE;
push(@{$clone{$md5}},$file);
$s = ($s>=$#progress)? 0:$s+1;
$n++;
};
}
my $clone;
open FH, ">$clone_report_file";
foreach (keys %clone) {
if (@{$clone{$_}} > 1) {
foreach(@{$clone{$_}}) {
my @stat = stat($_);
print FH time_transfer($stat[9]),"\t$_\n";
}
print FH "\n";
$clone++;
}
}
if ($clone) { print qq~\n\n $clone group(s) clone files found, details see "$clone_report_file"!\n~; }
else {
print qq~\n\n No clone files found!\n~;
print FH qq~No clone files found!~;
}
close FH;
if (@error) {
open FH,">$error_file";
foreach (@error) { print FH "$_\n"; }
close FH;
print "\n ",$#error+1," error(s) ocurred, details please see \"$error_file\"!";
}
else { print qq~\n No error ocurred!~; }
my $TT1 = new Benchmark;
my $td = Benchmark::timediff($TT1, $TT0);
$td = Benchmark::timestr($td);
print "\n\n Time expend: $td\n\n Press Enter to close me ... \7";
<STDIN>;
sub readsub
{
my $file_t = shift;
if (-f $file_t && $file =~ /\.jar$/)
{
push(@file_list,$file_t);
}
if (-d $file_t) {
opendir(AA,$file_t);
my @list = readdir(AA);
close (AA);
my $file_to_act;
foreach $file_to_act (sort @list)
{
if ($file_to_act =~ /^\.|\.$/) { next; }
else
{
readsub("$file_t/$file_to_act");
}
}
}
}
sub time_transfer {
my $time_str = shift;
my ($sec,$min,$hour,$day,$mon,$year,$weekday,$yeardate,$savinglightday) = (localtime($time_str));
#$sec = ($sec < 10)? "0$sec":$sec;
$min = ($min < 10)? "0$min":$min;
$hour = ($hour < 10)? "0$hour":$hour;
$day = ($day < 10)? "0$day":$day;
$mon = ($mon < 9)? "0".($mon+1):($mon+1);
$year += 1900;
return("$year-$mon-$day $hour:$min");
}