Current File : //bin/dh_sphinxdoc
#!/usr/bin/perl

# Copyright © 2011 Jakub Wilk <jwilk@debian.org>
#           © 2014-2020 Dmitry Shachnev <mitya57@debian.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=head1 NAME

dh_sphinxdoc - helps with packaging Sphinx-generated documentation

=head1 SYNOPSIS

dh_sphinxdoc [S<I<debhelper options>>] [B<-X>I<item>] [I<directory>...]

=head1 DESCRIPTION

B<dh_sphinxdoc> is a debhelper program that prepares Sphinx-generated
documentation for inclusion in a Debian package. More specifically:

=over 4

=item *

It checks if all the files referenced by F<searchindex.js> exist.

=item *

It replaces known F<*.js> files with symlinks to F</usr/share/javascript/sphinxdoc/>
and generates B<${sphinxdoc:Depends}> substitution variable.

=item *

If the Sphinx RTD theme is used, it replaces known files from this theme with
symlinks to F</usr/share/sphinx_rtd_theme/>, and adds B<sphinx-rtd-theme-common>
to B<${sphinxdoc:Depends}>.

=item *

It provides a B<${sphinxdoc:Built-Using}> substitution variable, for tracking
files which could not be symlinked. Examples of such files are F<*.js> and F<*.css>
files that are generated from corresponding F<*.js_t> and F<*.css_t> templates,
and can vary depending on the used theme options (for instance, F<basic.css> file
is generated from F<basic.css_t> and is included in almost every Sphinx-generated
documentation). Currently, this variable will contain B<sphinx> and, if the default
theme is used, B<alabaster>, with their versions (other themes are not supported).

=item *

It removes the F<.doctrees> directory.

=item *

It removes the F<.buildinfo> file.

=item *

It removes the F<websupport.js> file.

=back

Note that B<dh_sphinxdoc> does not build the documentation, it only performs
the above operations when the documentation is already installed into the
package build path. To build the docs, please use L<sphinx-build(1)> command
or B<python3 -m sphinx> syntax.

You can pass B<--with sphinxdoc> to L<dh(1)> to make it automatically call
B<dh_sphinxdoc> after B<dh_installdocs>.

=head1 OPTIONS

=over 4

=item I<directory>

By default, B<dh_sphinxdoc> scans your package looking for directories looking
like they contain Sphinx-generated documentation. However, if you explicitly
provide one or more directories, only they will be processed. If documentation
is not found at I<directory>, an error is raised.

=item B<-X>I<item>, B<--exclude=>I<item>

Exclude files that contain I<item> anywhere in their filename from
being symlinked, removed or checked for existence.

=back

=head1 BUGS

Symlinking translations.js is not supported.

=cut

use strict;
use warnings;

use Digest::MD5;
use File::Find;
use Debian::Debhelper::Dh_Lib;

my %packaged_js = ();
my @cruft_js = qw(websupport.js);

sub md5($)
{
    my ($filename) = @_;
    my $md5 = Digest::MD5->new;
    open(F, '<', $filename) or error("cannot open $filename");
    $md5->addfile(*F);
    close(F);
    return $md5->digest;
}

sub load_packaged_js()
{
    my %versions = ();
    my $root = 'debian/libjs-sphinxdoc'; # It's tempting to use
    # tmpdir('libjs-sphinxdoc') here, but it would break if the user passed
    # --tmpdir to the command.
    $root = '' unless -d $root;
    my $path = "$root/usr/share/javascript/sphinxdoc";
    open(F, '<', "$path/index") or error("cannot open $path/index");
    while (<F>)
    {
        chomp;
        next if /^(#|\s*$)/;
        my ($js, $minver) = split(/\s+/, $_, 2);
        unless (defined($minver))
        {
            $js =~ m{^([0-9.]+)/} or error("syntax error in $path/index");
            $minver = $1;
        }
        $versions{$js} = $minver;
    }
    close(F);
    find({
        wanted => sub {
            my $js = $_;
            my ($jsbase, $jsname) = m{([0-9.]+/(\S+[.]js))$} or return;
            my $version = $versions{$jsbase};
            defined($version) or error("$jsbase is not in the index; is it up-to-date?");
            delete $versions{$jsbase};
            my $md5;
            if (-l $js)
            {
                # Follow the symlink, but only if points *outside* our own directory.
                my $js_target = readlink($js);
                $js_target =~ m{^(/|\Q../../\E)} or return;
                unless ($js_target =~ m{^/})
                {
                    $js_target = "$js/../$js_target";
                    while ($js_target =~ s{[^./][^/]+/[.][.]/}{}) {};
                }
                $md5 = md5($js_target);
            }
            else
            {
                $js =~ s{^\Q$root\E}{} unless -f $js;
                $md5 = md5($js);
            }
            $js =~ s{^\Q$root\E}{};
            my $data = [$js, "libjs-sphinxdoc (>= $version)"];
            $packaged_js{$md5} = $data;
            $packaged_js{$jsname} = $data;
        },
        no_chdir => 1
    }, $path);
    map { error("$path/$_ is missing") } keys(%versions);
}

sub looks_like_sphinx_doc($)
{
    my ($path) = @_;
    return 0 unless -f "$path/searchindex.js";
    return 0 unless -f "$path/search.html";
    return 1;
}

sub looks_like_sphinx_singlehtml_doc($)
{
    my ($path) = @_;
    return 0 unless -d "$path/_static";
    return 0 if -f "$path/searchindex.js";

    # There should be exactly one HTML file in singlehtml build.
    my @html_files = glob("$path/*.html");
    my @sphinx_html_files;
    foreach my $html_file (@html_files)
    {
        open(my $fh, '<', $html_file) or error("cannot open $html_file");
        while (my $line = <$fh>)
        {
            if ($line =~ /<script(?: type="text\/javascript")? src="_static\/doctools.js">/)
            {
                push @sphinx_html_files, $html_file;
                last;
            }
        }
    }
    return 0 if @sphinx_html_files != 1;
    return $sphinx_html_files[0];
}

sub sanity_check($$)
{
    local $/;
    my ($path, $singlehtml_file) = @_;
    my $searchfn;
    my $index;
    if ($singlehtml_file)
    {
        # There is no search.html in singlehtml build, so we take the main HTML
        # file for sanity checking and retrieving JS files.
        $searchfn = $singlehtml_file;
    }
    else
    {
        my $indexfn = "$path/searchindex.js";
        open(F, '<', $indexfn) or error("cannot open $indexfn");
        $index = <F>;
        close(F);
        $index =~ m{^Search[.]setIndex[(].*?filenames:\["(.*?)"\].*[)]$} or error("$indexfn doesn't look like a Sphinx search index");
        $index = $1;
        $searchfn = "$path/search.html";
    }
    open(F, '<', $searchfn) or error("cannot open $searchfn");
    my $search = <F>;
    close F;
    $search =~ s/<!--.*?-->//g; # strip comments
    my %js = ();
    grep {
        s/[?#].*//;
        s/\s+$//;
        $js{$_} = 1 unless m/^[a-z][a-z0-9.+-]*:/i or excludefile("$path/$_");
    } $search =~ m{<script(?: type="text/javascript")? src="([^"]++)"></script>}g;
    my $documentation_options;
    for my $line (split /^/, $search)
    {
        if ($line =~ "_static/documentation_options.js")
        {
            my $documentation_options_fn = "$path/_static/documentation_options.js";
            open(my $fh, '<', $documentation_options_fn) or error("cannot open $documentation_options_fn");
            $documentation_options = <$fh>;
            close $fh;
        }
        if ($line =~ "var DOCUMENTATION_OPTIONS =")
        {
            $documentation_options = $search;
        }
    }
    defined $documentation_options or error("DOCUMENTATION_OPTIONS not found");
    my $loads_searchindex = $search =~ m{<script(?: type="text/javascript")? src="[^"]*searchindex.js\s?"(?: defer)?>};
    unless ($loads_searchindex)
    {
        # old style, used before Sphinx 2.0
        $loads_searchindex = $search =~ m/\QjQuery(function() { Search.loadIndex("\E/;
    }
    my ($has_source) = $documentation_options =~ m{HAS_SOURCE:\s*(true|false)};
    my ($sourcelink_suffix) = $documentation_options =~ m{SOURCELINK_SUFFIX:\s*'([^']*)'};
    $sourcelink_suffix = ".txt" unless defined $sourcelink_suffix;
    my $url_root;
    if ($documentation_options =~ /\QURL_ROOT: document.getElementById("documentation_options")\E/)
    {
        ($url_root) = $search =~ m{data-url_root="([^"]*)"};
    }
    else
    {
        ($url_root) = $documentation_options =~ m{URL_ROOT:\s*'([^']*)'};
    }
    %js or error("$searchfn does not include any JavaScript code");
    $singlehtml_file or $loads_searchindex or error("$searchfn does not load searchindex.js");
    defined $has_source or error("DOCUMENTATION_OPTIONS does not define HAS_SOURCE");
    defined $url_root or error("DOCUMENTATION_OPTIONS does not define URL_ROOT");
    $has_source = $has_source eq 'true';
    $url_root =~ m{^([a-z]+:/)?/} and error("URL_ROOT in $searchfn is not relative");
    for my $js (keys(%js))
    {
        -f "$path/$js" or -l "$path/$js" or error("$path/$js is missing");
    }
    unless ($singlehtml_file)
    {
        for my $page (split(/","/, $index))
        {
            # Append sourcelink_suffix if the page name does not already end with it.
            (my $sourcepage = $page) =~ s/(?<!$sourcelink_suffix)$/$sourcelink_suffix/;
            -f "$path/_sources/$sourcepage"
                or excludefile("$path/_sources/$sourcepage")
                or error("$path/_sources/$sourcepage is missing")
                if $has_source;
            # Get the page basename before appending .html.
            $page =~ s/\.[a-z]+$//;
            -f "$path/$page.html"
                or excludefile("$path/$page.html")
                or error("$path/$page.html is missing");
        }
    }
    if (opendir(D, "$path/_static/"))
    {
        grep {
            $js{"_static/$_"} = 1
                if /[.]js$/ and not excludefile("$path/_static/$_"); 
        } readdir(D);
        closedir(D);
    }
    return keys(%js);
}

sub unknown_javascript($)
{
    my ($js) = @_;
    my $message = "unknown JavaScript code: $js";
    $js =~ s{.*/}{};
    my $basic = grep { $_ eq $js } qw(searchtools.js doctools.js jquery.js underscore.js);
    my $cruft = grep { $_ eq $js } @cruft_js;
    my $basic_ignored = grep { $_ eq $js } qw(theme.js documentation_options.js language_data.js sidebar.js searchindex.js);
    if ($basic)
    {
        error("error: $message");
    }
    elsif (not $cruft and not $basic_ignored)
    {
        warning("ignoring $message");
    }
}

sub ln_sf($$)
{
    my ($orig_target, $orig_source) = my ($target, $source) = @_;
    $source =~ s{^debian/[^/]++/+}{} or die;
    $target =~ s{^/++}{} or die;
    my @source = split(m{/++}, $source);
    my @target = split(m{/++}, $target);
    @source > 0 and @target > 0 or die;
    if ($source[0] eq $target[0])
    {
        # Make the symlink relative, as per Policy 10.5.
        while (@source > 0 and @target > 0 and $source[0] eq $target[0])
        {
            shift @source;
            shift @target;
        }
        $target = ('../' x $#source) . join('/', @target);
    }
    else
    {
        # Keep the symlink absolute, as per Policy 10.5.
        $target = $orig_target;
    }
    doit('ln', '-sf', $target, $orig_source);
}

sub fix_symlinks($@)
{
    my %deps = ();
    my ($path, @js) = @_;
    for my $js (@js)
    {
        my $id = '';
        if (-l "$path/$js")
        {
            my $symlink_target = readlink("$path/$js");
            $symlink_target =~ m{/sphinxdoc/} and next;
            $symlink_target =~ m{/javascript/\w+/(\w+)([.](min|lite|pack))?[.]js$} and $id = "$1.js";
        }
        elsif (-f "$path/$js")
        {
            $id = md5("$path/$js");
        }
        if (exists $packaged_js{$id})
        {
            my ($target, $dependency) = @{$packaged_js{$id}};
            ln_sf($target, "$path/$js");
            $deps{$dependency} = 1;
        }
        else
        {
            unknown_javascript("$path/$js");
        }
    }
    return keys %deps;
}

sub drop_cruft($)
{
    my ($path) = @_;
    my $doctrees = "$path/.doctrees";
    my $buildinfo = "$path/.buildinfo";
    if (-d $doctrees and not excludefile($doctrees))
    {
        doit('rm', '-rf', $doctrees);
    }
    if (-f $buildinfo and not excludefile($buildinfo))
    {
        doit('rm', '-f', $buildinfo);
    }
    foreach my $js (@cruft_js)
    {
        my $js = "$path/_static/$js";
        if (-f $js and not excludefile($js))
        {
            doit('rm', '-f', $js) if -f $js;
        }
    }
}

sub process_rtd($)
{
    my ($path) = @_;
    my $theme_is_rtd = 0;
    if (-d "$path/_static/js" and -f "$path/_static/js/theme.js")
    {
        if (open(F, '<', "$path/_static/js/theme.js"))
        {
            while (my $line = <F>) {
                if (index($line, "window.SphinxRtdTheme") != -1)
                {
                    $theme_is_rtd = 1;
                    last;
                }
            }
            close(F);
        }
    }

    my @deps;
    my $target_dir = "/usr/share/sphinx_rtd_theme/static";
    if ($theme_is_rtd and -d $target_dir)
    {
        find({
            wanted => sub {
                return if -d;
                my $filename = $_;
                substr($filename, 0, 1 + length $target_dir) = "";
                return unless -f "$path/_static/$filename";
                ln_sf($_, "$path/_static/$filename");
            },
            no_chdir => 1
        }, $target_dir);

        my $rtd_theme_version = get_installed_package_version("sphinx-rtd-theme-common");
        $rtd_theme_version =~ s/-[^-]+$//;  # Remove the Debian version suffix
        push @deps, "sphinx-rtd-theme-common (>= $rtd_theme_version)";
    }
    return @deps;
}
 
sub get_installed_package_version($)
{
    my ($package_name) = @_;
    return `dpkg-query -W -f '\${Version}' $package_name 2>/dev/null`;
}

sub list_built_using($)
{
    my ($path) = @_;
    my @built_using;
    my $sphinx_version = get_installed_package_version("sphinx-common");
    push @built_using, "sphinx (= $sphinx_version)";
    if (-d "$path/_static" and -f "$path/_static/alabaster.css")
    {
        my $alabaster_version = get_installed_package_version("python3-alabaster");
        if ($alabaster_version)
        {
            push @built_using, "alabaster (= $alabaster_version)";
        }
    }
    return @built_using;
}

sub fix_sphinx_doc($$)
{
    my ($package, $path) = @_;
    my $is_html = looks_like_sphinx_doc($path);
    my $singlehtml_file = looks_like_sphinx_singlehtml_doc($path);
    return 0 if not ($is_html or $singlehtml_file);
    my @js = sanity_check($path, $singlehtml_file);
    my @rtd_deps = process_rtd($path);
    my @deps = fix_symlinks($path, @js);
    my @built_using = list_built_using($path);
    drop_cruft($path);
    map { addsubstvar($package, "sphinxdoc:Depends", $_) } @deps;
    map { addsubstvar($package, "sphinxdoc:Depends", $_) } @rtd_deps;
    map { addsubstvar($package, "sphinxdoc:Built-Using", $_) } @built_using;
    return 1;
}

init();

load_packaged_js();

my @paths = @ARGV;
@paths = (undef) unless @paths;

foreach my $path (@paths)
{
    my $done = 0;
    my @matching_packages;
    foreach my $package (@{$dh{DOPACKAGES}})
    {
        my $pkgpath = tmpdir($package);
        if (defined $path)
        {
            next if -l $path;
            $pkgpath .= "/$path";
            next unless -d $pkgpath;
            push @matching_packages, $package;
            $done += fix_sphinx_doc($package, $pkgpath);
        }
        else
        {
            $pkgpath .= '/usr/share/doc/';
            next unless -d $pkgpath;
            find({
                wanted => sub {
                    return unless -d;
                    return if -l;
                    return if excludefile($_);
                    $done += fix_sphinx_doc($package, $_);
                },
                no_chdir => 1
            }, $pkgpath);
        }
    }
    if ($done == 0)
    {
        if (defined $path)
        {
            if (!@matching_packages)
            {
                error("Path $path not found in any built package\n(searched in: @{$dh{DOPACKAGES}})");
            }
            error("Sphinx documentation not found at $path\n(path found in packages: @matching_packages)");
        }
        else
        {
            warning("Sphinx documentation not found");
        }
    }
}

=head1 SEE ALSO

L<debhelper(7)>, L<dh(1)>.

This program is meant to be used together with debhelper.

=head1 AUTHOR

Jakub Wilk <jwilk@debian.org>

=cut

# vim:ts=4 sw=4 et