Current File : //bin/duck
#!/usr/bin/perl -w

# duck - the Debian Url Checker
# Copyright (C) 2017 Simon Kainz <skainz@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# he Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# On Debian GNU/Linux systems, the complete text of the GNU General
# Public License can be found in `/usr/share/common-licenses/GPL-2'.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;

use lib '/usr/share/duck';
use lib '/usr/share/duck/lib';

use DUCK;
use Getopt::Std;
use Getopt::Long qw(:config pass_through );
use Data::Dumper;
use File::Basename;
use File::Temp qw(tempdir);
use File::Path qw(remove_tree);
use Cwd;
use IPC::Open3;
use POSIX ":sys_wait_h";
#require lib;
use Parallel::ForkManager;

sub HELP_MESSAGE();
sub setColor($;$);
sub display_result($;$;$);
sub display_blob($;$);
sub missingHelpers();
sub handle_UrlFixDisableOptions($;$);
sub handle_UrlFixEnableOptions($;$);						   
sub handle_dsc($);
sub cleanup();
sub get_duck_overrides();	       
sub is_overridden($;$);

our $color_r="\x1b[31m";
our $color_g="\x1b[32m";
our $color_y="\x1b[33m";
our $color_n="\x1b[0m"; 
our $abort=0;
my $origcwd=getcwd;

#$SIG{INT} = sub { $abort=1;cleanup();};
$SIG{INT} = sub { print $color_r."Aborting, please wait!\n".$color_n;$abort=1;cleanup();};


my $checksdir='/usr/share/duck/lib/checks';

my $try_https=0;
my $no_check_cert=0;
my $nocolor=0;
our $enforceColor="auto";
my @allowedColorVals=qw/always auto never/;

my $termcolors=0;
$termcolors=`/usr/bin/tput colors 2>/dev/null`;
if ($? eq 0 )
{
    if ($termcolors < 4)
    {
	$enforceColor="never";
    }
} else {
    $enforceColor="never";
}  

my $showMissingHelpers;
my $urlFixEnableOptions;
my $urlFixDisableOptions;
my $tempdir;
my @global_options=("n","v","q");
$Getopt::Std::STANDARD_HELP_VERSION=1;

my $exitcode=0;

our %opt;
our @overrides=get_duck_overrides();
my $dh;

my $parallel_tasks=24;

my $retval=GetOptions(
    "modules-dir=s" => \$checksdir,
    "no-https" => \$try_https,
    "no-check-certificate" => \$no_check_cert,
    "no-color" => \$nocolor,
    "missing-helpers" => \$showMissingHelpers,
    "disable-urlfix=s" => \$urlFixDisableOptions,
    "enable-urlfix=s" => \$urlFixEnableOptions,
    "color=s" => \$enforceColor,
    "colour=s" => \$enforceColor,
    "tasks=i" => \$parallel_tasks
    );

die("Number of parallel tasks must be >0") unless (int($parallel_tasks)>0);

my $DUCK= DUCK->new();
my $funcref= $DUCK->cb();

if ($showMissingHelpers)
{
    print "\n";
    my $mh=missingHelpers();
    if ($mh) {print $mh; exit(1)}
    else { print "All helpers installed.\n"; exit(0);}
    
}


if ($urlFixEnableOptions && $urlFixDisableOptions)
{
    print "Conflicting options: Either use --disable-urlfix or --enable-urlfix option.\n";
    exit 1;

}


if (!(   grep( /^$enforceColor$/, @allowedColorVals ) ))
{
    print STDERR "Invalid option \"".$enforceColor." \"for --color: Valid options are:".join(",",@allowedColorVals)."\n";
    exit 1;
    
}


if ($urlFixDisableOptions) { handle_UrlFixDisableOptions($DUCK,$urlFixDisableOptions);}
if ($urlFixEnableOptions) {handle_UrlFixEnableOptions($DUCK,$urlFixEnableOptions);}






if ($enforceColor eq "never" )
{
    $color_r="";
    $color_g="";
    $color_y="";
    $color_n="";
}



import lib $checksdir;

if (!opendir($dh,$checksdir))
{
 print STDERR "Modules directory $checksdir not found!, please use --modules-dir=<path> !\n";
 if ($opt{h})
 {
     HELP_MESSAGE();
   
 }
  exit(1);
}



my @modules;
my @module_options;

my $descriptions;

while (readdir $dh)
{
    my($filename, $directories, $suffix) = fileparse($_);
    if (/^\./) { next};
    if (/.pm$/)
    {
	require $_;
	my $modulename=fileparse($filename,qr/\.pm/);
	my $n="DUCK::".$modulename;
	if ($n->can("run"))
	{
	    push (@modules,$modulename);
	}
    }
}

if (!scalar @modules)
{
    print STDERR "No check modules found! Please check path: ".$checksdir."\n";
    exit 1;
}

#get all options modules are providing

foreach my $m (@modules)
{

    my $n="DUCK::".$m;
    if ( ($n->can("opts")) && ($n->can("run")) )
    {
	foreach ($n->opts())
	{
	    push(@module_options,$_);
	}
    }
    if ($n->can("desc"))
    {
       $descriptions.=$n->desc();
     }
}
push(@module_options,@global_options);

#print Dumper @module_options;

GetOptions(
      "help" => sub {HELP_MESSAGE() }
    );

getopts(join("",@module_options),\%opt);

if ( $opt{v} && $opt{q} ) 
{
    print STDERR " Please specify either -q or -v\n";
    exit(1);
}

if ( $opt{l} )
{
    $opt{S}=1;
    $opt{P}=1;
    $opt{F}=1;
    $opt{U}=1;
    $opt{C}=1;
    $opt{A}=1;
}

if (@ARGV) { 
    $checksdir=getcwd."/lib/checks";
    handle_dsc(\$tempdir) unless $opt{l};
}

my @entries;

my @resultarray;

my $finished_tasks=0;
#run all modules, create the list of checks to run.
foreach my $m (@modules)
{
    my $n="DUCK::".$m;
    $n->run(\%opt,\@entries);
}

# inject all options to check modules
$DUCK->setOptions("no-https",$try_https);
$DUCK->setOptions("no-check-certificate",$no_check_cert);


# iterate over all urls, run checks.

my $manager= new Parallel::ForkManager($parallel_tasks);


#$manager->run_on_finish( sub {
#    my ($pid)=@_;
#    print "** finished, pid: $pid\n";
#});
 


$manager->run_on_finish( sub {
       my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;

        if (defined($data_structure_reference)) {  # children are not forced to send anything
      my $string = ${$data_structure_reference};  # child passed a string reference
#      print Dumper $string;
      push(@resultarray,$string);
      $finished_tasks++;
    }
    else {  # problems occurring during storage or retrieval will throw a warning
      print qq|No message received from child process $pid!\n|;
    }
 #   my ($pid, $exit_code) = @_;
#    print "** just got out of the pool ".
#      "with PID $pid and exit code: \n";
});
 
#my $entry;
 CHECKS:
foreach my $entry (@entries)
{
    my $res;
    
    $manager->start and next CHECKS unless $opt{n};
    {
    if ($abort) {cleanup(); exit};
    my $type=@$entry[0];
    my $k=@$entry[1];
    my $url=@$entry[2];
    my $origline=@$entry[3];
    my $extra=@$entry[4];

    chomp $origline unless !$origline;
  
    if ($funcref->{$k})
    {

	if (is_overridden(\@overrides,$url)) {
	    next;
	}
    

  if ($opt{n})
    {
         print STDOUT $type.": ".$k.": ".$url.": ";
	 print STDOUT " DRY RUN\n";
	 next;
    }

	$res=&{$funcref->{$k}}($url);
	

    }
}    
 $manager->finish(0,\{res=>$res,entry=>$entry}) unless $opt{n};
}

#print scalar(@resultarray)."\n";
#while ($abort == 0)
#{
#    print Dumper $manager;
#    sleep(1);
#}

$manager->wait_all_children unless $opt{n};

#print "Sizeof reuslts:".scalar(@resultarray)."\n";


foreach my $_res (@resultarray) {

    my $res=$_res->{"res"};

my $entry=$_res->{'entry'};

if (!defined($entry)) {next;}
if (!defined($res)) {next;}
    my $type=@$entry[0];
    my $k=@$entry[1];
    my $url=@$entry[2];
    my $origline=@$entry[3];
    my $extra=@$entry[4];



#    my $res=@{$_res}[0];

#print "DEPUG: entry";
#print Dumper $entry;
#print "DEBUG: res";
#print Dumper $res;
#print "-------------------\n";
#next;    
	if (!defined $res)
	{
	    if (!$opt{q})
	    {
		setColor(\*STDERR,$color_y);
		my $k;
		print STDERR "I: Skipping field ".$k." (Reason: Missing helper!)\n";
	        setColor(\*STDERR,$color_n);
	    }
	}
	else
	{ #print Dumper $res;
#	  my $extra;
	    if (ref($res) eq "ARRAY")
	    {#print "ARRAY!!!\n";
		foreach my $b (@$res)
		{
#		    print "mainloop:";
#		    print Dumper $extra;
		    display_blob($b,$extra);
		}
	    }
	    else
	    {
	#	print "BLOB\n";
		display_blob($res,$extra);
	    } 
	}

}

cleanup();
exit($exitcode);

##############################################################################
# Helper functions

sub display_blob($;$) {
    (my $res,my $extra)=@_;
#print "Display_blob, extra:";
#print Dumper $extra;    

    if ($res->{retval}>0)
    {
	
	if ($res->{'certainty'}) { $extra->{'certainty'}=$res->{'certainty'};}
	
	if ($res->{retval}==2)
	{
	    
	    if (!$opt{q})
	    {
		setColor(\*STDERR,$color_y);
		print STDERR display_result($res,$extra);
		setColor(\*STDERR,$color_n);
	    }
	    
	} else
	{
	    if (!$opt{q})
	    {
		setColor(\*STDERR,$color_r);
		print STDERR display_result($res,$extra);
		setColor(\*STDERR,$color_n);
	    }
	    $exitcode=1;
	    
	}
	
    }
    else
    {
	if ($opt{v})
	{
	    setColor(\*STDOUT,$color_g);
	    print STDOUT display_result($res,$extra);
	    setColor(\*STDOUT,$color_n);
	}
    }
    
}


sub setColor($;$)
{
    my ($fh,$c)=@_;
    
    if ($enforceColor eq "never")
    {
	return;
	
    }
    
    if ($enforceColor eq "always")
    {
	print $fh $c;
	return;
    }    

    if ($enforceColor eq "auto")
    {
	if (-t $fh)
	{
	    print $fh $c;
	    return;
	}
    }
    
}

sub display_result($;$;$)
{
    
    my $out="";
    my ($res,$data)=@_;
#    print Dumper $res;
#    print "DATA:";
#    print Dumper $data;
    my $prefixes={ 0 => "O: ",
		   1 => "E: ",
		   2 => "I: "
    };

    my $states={ 0 => "OK",
		 1 => "ERROR",
		 2 => "INFORMATION"
    };
    my $P;
    if ($prefixes->{$res->{retval}}) 
    {
	$P=$prefixes->{$res->{retval}};
    }
    else
    {
	$P=$prefixes->{1}; # default to Error if return value > 0 and out of bounds
    }
    $out.=$P;
    my $indent=$P;
    $indent =~ s/./ /g;
    
    
# try to print data supplied by check
    if ($data->{verbose})
    {
	$out.=$data->{verbose};
	if ($states->{$res->{retval}})
	{
	    $out.=': '.$states->{$res->{retval}};
        } else
    	{
	    $out.=': '.$states->{1};
    	}
	$out.=' (Certainty:'.$data->{certainty}.')';
	$out.="\n";
    } else

    {


	if ($data->{filename})
	{
	    $out.=$data->{filename}.":";
	}
	if ($data->{linenumber})
	{
	    $out.= $data->{linenumber}.": ";
	}
	
	if ($data->{checkmethod})
	{
	    $out.=$data->{checkmethod}.": ";
	}
	if ($data->{url})
	{
	    $out.=$data->{url}.": ";
	}

	$res->{retval}=2  unless $res->{retval}<2;
    
	$out.=$states->{$res->{retval}};
	$out.=' (Certainty:'.$data->{certainty}.')';
	$out.="\n";
    }
    
    if ( $res->{response} && ($res->{retval}>0) )
    {
	my $ts=$res->{response};
	$ts =~ s/\n*$//g;
	$ts =~ s/^/$indent/g;
	$ts =~ s/\n/\n$indent/g;

	$out.=$ts;
	
    }


    $out =~ s/\n*$//g;
    $out.="\n\n";
    

    return $out ;
    
}

sub handle_UrlFixDisableOptions($;$) {
    
    my ($duck,$paramlist)=(@_);
    
    my @fixes=split(/,/,$paramlist);

    foreach ($duck->getUrlFixes())
    {
	$duck->setUrlFixOptions($_,1);
    }
    
    foreach (@fixes) {
	$duck->setUrlFixOptions($_,0);
    }

}

sub handle_UrlFixEnableOptions($;$)
{
    
    my ($duck,$paramlist)=(@_);
    
    my @fixes=split(/,/,$paramlist);

    foreach ($duck->getUrlFixes())
    {
	$duck->setUrlFixOptions($_,0);
    }
    
    foreach (@fixes) {
	$duck->setUrlFixOptions($_,1);
    }

}

sub handle_dsc($) {

    my ($tmpdirref)=@_;
        
    my $tempdir = File::Temp->newdir(TEMPLATE=>"duckXXXXXX",TMPDIR => 1,CLEANUP => 0)->dirname;
    $$tmpdirref=$tempdir;
    my $filename=fileparse($ARGV[0]);

    print "Downloading to $tempdir"."\n";
    
    chdir($tempdir);

    my ($html)=@_;
    my $pid=open3(0,\*READ,\*ERR,'dget -d '.$ARGV[0]) or die("dget error!");
    my @std_output=<READ>;
    my @std_err=<ERR>;
    my $kid;
    do {
	$kid=waitpid($pid,WNOHANG);
	sleep(1);
    } while ($kid >0 && $abort == 0);
    if ($abort) {kill -9,$pid;cleanup(); exit}
    close(READ);
    my $prefix="dget: ";

    if ($opt{'v'})
    {
	chomp @std_output;
	setColor(\*STDOUT,$color_y);
	print join("\n$prefix",@std_output);
	print "\n\n";
    }
 
    $pid=open3(0,\*READ,\*ERR,"dpkg-source -x $filename extract") or die("dpkg-source error!");

    @std_output=<READ>;
    @std_err=<ERR>;
    do {
	$kid=waitpid($pid,WNOHANG);
	    my $dpkgsource_errcode=$?;
    print "errcode: $dpkgsource_errcode\n";

	sleep(1);
    } while ($kid >0 && $abort==0);
    if ($abort) {kill -9,$pid;cleanup(); exit}

    
    close(READ);
    $prefix="dpkg-source: ";
    if ($opt{'v'})
    {
	chomp @std_output;
	setColor(\*STDOUT,$color_y);;
	print join("\n$prefix",@std_output);
	print "\n";

	chomp @std_err;
	setColor(\*STDOUT,$color_r);
	print join("\n$prefix",@std_err);
	print "\n\n";

	
    }
 
   
    chdir($tempdir."/extract");
    if ($opt{'v'})
    {
	setColor(\*STDOUT,$color_n);
    }

    return $tempdir;
}



sub cleanup()
{
    chdir($origcwd);
    #restore terminal color
    print $color_n;
    if ($tempdir && -d $tempdir ) {
	
	if ($opt{v}) {
	    print "removing tempdir $tempdir\n";
	}
	remove_tree($tempdir);
}
}

sub get_duck_overrides()
{
    my $overrides_path='debian/duck-overrides';
    my @overrides;
    
    if ( -e $overrides_path)
    {
	open (my $overrides_fh,"<",$overrides_path);
	@overrides=<$overrides_fh>;
	close($overrides_fh);
	
    }
    chomp(@overrides);

	my @results;

    foreach (@overrides)
    {
	if ( ! /^\s*#/)
	{
	    if (length($_)>1) {push (@results,$_);}
	}
	
    }
    return @results;
}

sub is_overridden($;$)
		  {
		      my ($overrides_ref,$url)=(@_);
		      
		      my @overrides=@{$overrides_ref};
		      
		      foreach my $ov (@overrides)
		      {
			  if ( $url =~ m/$ov/i)
			  {
			      return 1;
			  }
		      }
		      return 0;
		  }


sub get_domain()
{
    my $url=$_[0];

    $url =~ /:\/\/([^\/].*)/;
    if (!$1) {return 0};
    my @dom=split(/\//,$1);
    
    return $dom[0];
}


sub HELP_MESSAGE()
{

if (!$descriptions)
{
    $descriptions="  No modules, found, no further options available.\n";
}
print STDOUT <<EOF;

Usage: duck [options] 

  -h\t--help\t\t\tdisplay this usage information and exit
  -q\t\t\t\tquiet mode, suppress all output
  -v\t\t\t\tverbose mode
  -n\t\t\t\tdry run, don't run any checks, just show what would be checked
  --modules-dir=dir\t\tpath to check modules
  --no-https\t\t\tdo not try to find matching https URLs to http URLs
  --no-check-certificate\tdo not check SSL certificates
  --color=[auto,always,never]\tauto (default): color on terminal, no color when piping output
\t\t\t\tor dumb terminal
\t\t\t\tnever: no color output
\t\t\t\talways: show colored output always

  --missing-helpers\t\tdisplay list of missing external helper tools

  --disable-urlfix=<fix1,...>\tdisable specified url fix function(s). Use --disable-urlfix=list or 
  \t\t\t\tsee duck(1) for available options and further information.
  --enable-urlfix=<fix1,...>\tenable specified url fix function(s).  Use --enable-urlfix=list or 
  \t\t\t\tsee duck(1) for available options and further information.

  --tasks=[number]\t\tSpecify the number of checks allowed to run in parallel. Default value is 24. 
\t\t\t\tThis value must be an integer value >0.

  --version\t\t\tdisplay copyright and version information
  Available module options:

EOF
print $descriptions;
print "\n";
    exit(0);
}

sub VERSION_MESSAGE()
{
    my $DUCK=DUCK->new();
    print "duck ".$DUCK->version()."\n";
    my $copyright_year=$DUCK->copyright_year();

print <<EOF;
This code is copyright $copyright_year by Simon Kainz <skainz\@debian.org>
all rights reserved.
This program comes with ABSOLUTELY NO WARRANTY.
You are free to redistribute this code under the terms of the
GNU General Public License, version 2 or later.

EOF
}

sub missingHelpers()
{
    my $DUCK=DUCK->new();
    my $out;
    my $h=$DUCK->getHelpers();
    
    if (!$h->{git}) { $out.= "git missing. Please install package git.\n"; }
    if (!$h->{bzr}) { $out.= "bzr missing. Please install package bzr.\n"; }
    if (!$h->{svn}) { $out.= "svn missing. Please install package subversion.\n"; }
    if (!$h->{hg}) { $out.= "hg missing. Please install package mercurial.\n"; }
    
    return $out;
    

}