Current File : //sbin/sharedindexsplit
#! /usr/bin/perl
#
# Copyright 2004 Double Precision, Inc.
#
# See COPYING for distribution information.
#
# Split the shared index into multiple files.  The entire shared index is
# piped on stdin. There are two modes of operation:
# - if nletters is specified and is greater than 0, then
#   split based on the first n characters of the username
# - if nletters is omitted or zero, then split based on the 'sharedgroup'
#   account option. This requires the options to be given as column 6 of
#   the input.
#
# Assume that account names use only the Latin alphabet.

use IO::File;

my $hasEncode=0;

eval 'use Encode; $hasEncode=1;';

if ($hasEncode)
{
    $hasEncode=0;

    grep {$hasEncode=1 if $_ eq "UTF-32BE"; } Encode->encodings(":all");

}

my $mult=1;

$mult=4 if $hasEncode;

my $outputdir=shift @ARGV;
my $nletters=shift @ARGV;

die "Usage: $0 outputdir [ letters ]\n" unless -d $outputdir;

print "*** WARNING - Encode not found, you should upgrade to Perl 5.8.0\n"
    unless $hasEncode;

$nletters=0 unless defined($nletters);

my %FILES;  # All opened files
my @MRU;    # Recycle using most-recently-used mechanism.

sub indexfile {
    my $filename=shift @_;

    Encode::from_to($filename, "UTF-32BE", "UTF-8") if $hasEncode;

    return "$filename";
}

while (defined($_=<STDIN>))
{
    chomp;
    s/\#.*//;

    my @fields=split /\t/;


    next unless $#fields>2;   # Comments, etc...

    my $key;

    if ($nletters > 0)
    {
	$key=$fields[0];
	Encode::from_to($key, "UTF-8", "UTF-32BE") if $hasEncode;
	$key=substr($key, 0, $nletters * $mult);
    }
    elsif ($fields[5] =~ /(^|,)sharedgroup=([^,]+)/)
    {
        $key = $2;
	Encode::from_to($key, "UTF-8", "UTF-32BE") if $hasEncode;
    }
    else
    {
	$key = "";
    }

    while (length($key) < $nletters * $mult)
    {
	my $u="_";

	$u=Encode::encode("UTF-32BE", $u) if $hasEncode;

	$key .= $u;
    }

    if (defined $FILES{$key})
    {
	@MRU=grep {$_ ne $key} @MRU;
	push @MRU, $key;

    }
    else
    {
	unless ($#MRU < 3)
	{
	    my $oldest=shift @MRU;

	    close($FILES{$oldest});
	    $FILES{$oldest}=undef;
	}

	push @MRU, $key;

	open( ($FILES{$key}=new IO::File), ">>$outputdir/index"
	      . indexfile($key) . "\n")
	    || die "$outputdir/index" . indexfile($key) . ": $!\n";
    }

    my $fh=$FILES{$key};
    splice(@fields,5,1);  # hide options
    (print $fh (join("\t", @fields) . "\n")) || exit 1;
}

grep { ( close($FILES{$_}) || exit(1)) if defined $FILES{$_}} keys %FILES;

while ($nletters > 0)
{
    my %NEWKEYS;
    my %NEWFILES;

    --$nletters;

    foreach (keys %FILES)
    {
	$NEWKEYS{substr($_, 0, $nletters * $mult)}=1;

	push @{$NEWFILES{substr($_, 0, $nletters * $mult)}}, $_;
    }

    foreach (keys %NEWFILES)
    {
	my $fn=indexfile($_);

	open(FH, ">$outputdir/index$fn") || die "$outputdir/index$fn: $!\n";

	grep { my $x=indexfile($_);print FH "$x\t*\tindex$x\n"
		   || exit 1; } @{$NEWFILES{$_}};
	close(FH) || exit 1;
    }

    %FILES=%NEWKEYS;
}