Current File : //bin/X11/uscan
#!/usr/bin/perl
# -*- tab-width: 8; indent-tabs-mode: t; cperl-indent-level: 4 -*-
# vim: set ai shiftwidth=4 tabstop=4 expandtab:

# uscan: This program looks for watch files and checks upstream ftp sites
# for later versions of the software.
#
# Originally written by Christoph Lameter <clameter@debian.org> (I believe)
# Modified by Julian Gilbey <jdg@debian.org>
# HTTP support added by Piotr Roszatycki <dexter@debian.org>
# Rewritten in Perl, Copyright 2002-2006, Julian Gilbey
# Rewritten in Object Oriented Perl, copyright 2018, Xavier Guimard
# <yadd@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

#######################################################################
# {{{ code 0: POD for manpage
#######################################################################

=pod

=head1 NAME

uscan - scan/watch upstream sources for new releases of software

=head1 SYNOPSIS

B<uscan> [I<options>] [I<path>]

=head1 DESCRIPTION

For basic usage, B<uscan> is executed without any arguments from the root
of the Debianized source tree where you see the F<debian/> directory, or
a directory containing multiple source trees.

Unless --watchfile is given, B<uscan> looks recursively for valid source
trees starting from the current directory (see the below section
L<Directory name checking> for details).

For each valid source tree found, typically the following happens:

=over

=item * B<uscan> reads the first entry in F<debian/changelog> to determine the
source package name I<< <spkg> >> and the last upstream version.

=item * B<uscan> process the watch lines F<debian/watch> from the top to the
bottom in a single pass.

=over

=item * B<uscan> downloads a web page from the specified I<URL> in
F<debian/watch>.

=item * B<uscan> extracts hrefs pointing to the upstream tarball(s) from the
web page using the specified I<matching-pattern> in F<debian/watch>.

=item * B<uscan> downloads the upstream tarball with the highest version newer
than the last upstream version.

=item * B<uscan> saves the downloaded tarball to the parent B<../> directory:
I<< ../<upkg>-<uversion>.tar.gz >>

=item * B<uscan> invokes B<mk-origtargz> to create the source tarball: I<<
../<spkg>_<oversion>.orig.tar.gz >>

=over

=item * For a multiple upstream tarball (MUT) package, the secondary upstream
tarball will instead be named I<< ../<spkg>_<oversion>.orig-<component>.tar.gz >>.

=back

=item * Repeat until all lines in F<debian/watch> are processed.

=back

=item * B<uscan> invokes B<uupdate> to create the Debianized source tree: I<<
../<spkg>-<oversion>/* >>

=back

Please note the following.

=over

=item * For simplicity, the compression method used in examples is B<gzip> with
B<.gz> suffix.  Other methods such as B<xz>, B<bzip2>, and B<lzma> with
corresponding B<xz>, B<bz2> and B<lzma> suffixes may also be used.

=item * The new B<version=4> enables handling of multiple upstream tarball
(MUT) packages but this is a rare case for Debian packaging.  For a single
upstream tarball package, there is only one watch line and no I<<
../<spkg>_<oversion>.orig-<component>.tar.gz >> .

=item * B<uscan> with the B<--verbose> option produces a human readable report
of B<uscan>'s execution.

=item * B<uscan> with the B<--debug> option produces a human readable report of
B<uscan>'s execution including internal variable states.

=item * B<uscan> with the B<--extra-debug> option produces a human readable
report of B<uscan>'s execution including internal variable states and remote
content during "search" step.

=item * B<uscan> with the B<--dehs> option produces an upstream package status
report in XML format for other programs such as the Debian External Health
System.

=item * The primary objective of B<uscan> is to help identify if the latest
version upstream tarball is used or not; and to download the latest upstream
tarball.  The ordering of versions is decided by B<dpkg --compare-versions>.

=item * B<uscan> with the B<--safe> option limits the functionality of B<uscan>
to its primary objective.  Both the repacking of downloaded files and
updating of the source tree are skipped to avoid running unsafe scripts.
This also changes the default to B<--no-download> and B<--skip-signature>.

=back

=head1 FORMAT OF THE WATCH FILE

The current version 4 format of F<debian/watch> can be summarized as follows:

=over

=item * Leading spaces and tabs are dropped.

=item * Empty lines are dropped.

=item * A line started by B<#> (hash) is a comment line and dropped.

=item * A single B<\> (back slash) at the end of a line is dropped and the
next line is concatenated after removing leading spaces and tabs. The
concatenated line is parsed as a single line. (The existence or non-existence
of the space before the tailing single B<\> is significant.)

=item * The first non-comment line is:

=over

=item B<version=4>

=back

This is a required line and the recommended version number.

If you use "B<version=3>" instead here, some features may not work as
documented here.  See L<HISTORY AND UPGRADING>.

=item * The following non-comment lines (watch lines) specify the rules for the
selection of the candidate upstream tarball URLs and are in one of the
following three formats:

=over

=item * B<opts="> I<...> B<"> B<http://>I<URL> I<matching-pattern> [I<version> [I<script>]]

=item * B<http://>I<URL> I<matching-pattern> [I<version> [I<script>]]

=item * B<opts="> I<...> B<">

=back

Here,

=over

=item * B<opts="> I<...> B<"> specifies the behavior of B<uscan>.  See L<WATCH
FILE OPTIONS>.

=item * B<http://>I<URL> specifies the web page where upstream publishes
the link to the latest source archive.

=over

=item * B<https://>I<URL> may also be used, as may

=item * B<ftp://>I<URL>

=item * Some parts of I<URL> may be in the regex match pattern surrounded
between B<(> and B<)> such as B</foo/bar-([\.\d]+)/>.  (If multiple
directories match, the highest version is picked.) Otherwise, the I<URL>
is taken as verbatim.

=back

=item * I<matching-pattern> specifies the full string matching pattern for
hrefs in the web page.  See L<WATCH FILE EXAMPLES>.

=over

=item * All matching parts in B<(> and B<)> are concatenated with B<.> (period)
to form the upstream version.

=item * If the hrefs do not contain directories, you can combine this with the
previous entry. I.e., B<http://>I<URL>B</>I<matching-pattern> .

=back

=item * I<version> restricts the upstream tarball which may be downloaded.
The newest available version is chosen in each case.

=over

=item * B<debian> I<(default)> requires the downloading upstream tarball to be
newer than the version obtained from F<debian/changelog>.

=item * I<version-number> such as B<12.5> requires the upstream
tarball to be newer than the I<version-number>.

=item * B<same> requires the downloaded version of the secondary tarballs to be
exactly the same as the one for the first upstream tarball downloaded. (Useful
only for MUT)

=item * B<previous> restricts the version of the signature
file. (Used with pgpmode=previous)

=item * B<ignore> does not restrict the version of the secondary
tarballs. (Maybe useful for MUT)

=item * B<group> requires the downloading upstream tarball to be newer than
the version obtained from F<debian/changelog>. Package version is the
concatenation of all "group" upstream version.

=item * B<checksum> requires the downloading upstream tarball to be newer than
the version obtained from F<debian/changelog>. Package version is the
concatenation of the version of the main tarball, followed by a checksum of all
the tarballs using the "checksum" version system.
At least the main upstream source has to be declared as "group".

=back

=item * I<script> is executed at the end of B<uscan> execution with appropriate
arguments provided by B<uscan> I<(default: no action)>.

=over

=item * The typical Debian package is a non-native package made from one
upstream tarball.  Only a single line of the watch line in one of the first two
formats is usually used with its I<version> set to B<debian> and I<script>
set to B<uupdate>.

=item * A native package should not specify I<script>.

=item * A multiple upstream tarball (MUT) package should specify B<uupdate>
as I<script> in the last watch line and should skip specifying I<script> in the
rest of the watch lines.

=back

=item * The last format of the watch line is useful to set the persistent
parameters: B<user-agent>, B<compression>.  If this format is used, this must
be followed by the I<URL> defining watch line(s).

=item * [ and ] in the above format are there to mark the optional parts and
should not be typed.

=back

=back

There are a few special strings which are substituted by B<uscan> to make it easy
to write the watch file.

=over

=item B<@PACKAGE@>

This is substituted with the source package name found in the first line of the
F<debian/changelog> file.

=item B<@ANY_VERSION@>

This is substituted by the legal upstream version regex (capturing).

  [-_]?(\d[\-+\.:\~\da-zA-Z]*)

=item B<@ARCHIVE_EXT@>

This is substituted by the typical archive file extension regex (non-capturing).

  (?i)(?:\.(?:tar\.xz|tar\.bz2|tar\.gz|tar\.zstd?|zip|tgz|tbz|txz))

=item B<@SIGNATURE_EXT@>

This is substituted by the typical signature file extension regex (non-capturing).

  (?i)(?:\.(?:tar\.xz|tar\.bz2|tar\.gz|tar\.zstd?|zip|tgz|tbz|txz))'(?:\.(?:asc|pgp|gpg|sig|sign))'

=item B<@DEB_EXT@>

This is substituted by the typical Debian extension regexp (capturing).

  [\+~](debian|dfsg|ds|deb)(\.)?(\d+)?$

=back

Some file extensions are not included in the above intentionally to avoid false
positives.  You can still set such file extension patterns manually.

=head1 WATCH FILE OPTIONS

B<uscan> reads the watch options specified in B<opts="> I<...> B<"> to
customize its behavior. Multiple options I<option1>, I<option2>, I<option3>,
... can be set as B<opts=">I<option1>B<,> I<option2>B<,> I<option3>B<,> I< ...
>B<"> .  The double quotes are necessary if options contain any spaces.

Unless otherwise noted as persistent, most options are valid only within their
containing watch line.

The available watch options are:

=over

=item B<component=>I<component>

Set the name of the secondary source tarball as I<<
<spkg>_<oversion>.orig-<component>.tar.gz >> for a MUT package.

=item B<ctype=>I<component-type>

Set the type of component I<(only "nodejs" and "perl" are available for now)>.
This will help uscan to find current version if component version is ignored.

When using B<ctype=nodejs>, uscan tries to find a version in C<package.json>,
when using B<ctype=perl>, uscan tries to find a version in C<META.json>.
If a version is found, it is used as current version for this component,
regardless version found in Debian version string. This permits a better
change detection when using I<ignore> or I<checksum> as Debian version.

=item B<compression=>I<method>

Set the compression I<method> when the tarball is repacked (persistent).

Available I<method> values are what mk-origtargz supports, so B<xz>, B<gzip>
(alias B<gz>), B<bzip2> (alias B<bz2>), B<lzma>, B<default>. The default method
is currently B<xz>.
When uscan is launched in a debian source repository which format is "1.0" or
undefined, the method switches to B<gzip>.

Please note the repacking of the upstream tarballs by B<mk-origtargz> happens
only if one of the following conditions is satisfied:

=over

=item * B<USCAN_REPACK> is set in the devscript configuration.  See L<DEVSCRIPT
CONFIGURATION VARIABLES>.

=item * B<--repack> is set on the commandline.  See <COMMANDLINE OPTIONS>.

=item * B<repack> is set in the watch line as B<opts="repack,>I<...>B<">.

=item * The upstream archive is of B<zip> type including B<jar>, B<xpi>, ...

=item * The upstream archive is of B<zstd> (Zstandard) type.

=item * B<Files-Excluded> or B<Files-Excluded->I<component> stanzas are set in
F<debian/copyright> to make B<mk-origtargz> invoked from B<uscan> remove
files from the upstream tarball and repack it.  See L<COPYRIGHT FILE
EXAMPLES> and mk-origtargz(1).

=back

=item B<repack>

Force repacking of the upstream tarball using the compression I<method>.

=item B<repacksuffix=>I<suffix>

Add I<suffix> to the Debian package upstream version only when the
source tarball is repackaged.  This rule should be used only for a single
upstream tarball package.

=item B<mode=>I<mode>

Set the archive download I<mode>.

=over

=item B<LWP>

This mode is the default one which downloads the specified tarball from the
archive URL on the web.  Automatically internal B<mode> value is updated to
either B<http> or B<ftp> by URL.

=item B<git>

This mode accesses the upstream git archive directly with the B<git> command
and packs the source tree with the specified tag via I<matching-pattern> into
I<spkg-version>B<.tar.xz>.

If the upstream publishes the released tarball via its web interface, please
use it instead of using this mode. This mode is the last resort method.

For git mode, I<matching-pattern> specifies the full string matching pattern for
tags instead of hrefs. If I<matching-pattern> is set to
B<refs/tags/>I<tag-matching-pattern>, B<uscan> downloads source from the
B<refs/tags/>I<matched-tag> of the git repository.  The upstream version is
extracted from concatenating the matched parts in B<(> ... B<)> with B<.> .  See
L<WATCH FILE EXAMPLES>.

If I<matching-pattern> is set to B<HEAD>, B<uscan> downloads source from the
B<HEAD> of the git repository and the pertinent I<version> is automatically
generated with the date and hash of the B<HEAD> of the git repository.

If I<matching-pattern> is set to B<refs/heads/>I<branch>, B<uscan> downloads source
from the named I<branch> of the git repository.

The local repository is temporarily created as a bare git repository directory
under the destination directory where the downloaded archive is generated.  This
is normally erased after the B<uscan> execution.  This local repository is kept
if B<--debug> option is used.

If the current directory is a git repository and the searched repository is
listed among the registered "remotes", then uscan will use it instead of cloning
separately.  The only local change is that uscan will run a "fetch" command to
refresh the repository.

=item B<svn>

This mode accesses the upstream Subversion archive directly with the B<svn>
command and packs the source tree.

For svn mode, I<matching-pattern> specifies the full string matching pattern for
directories under Subversion repository directory, specified via URL.  The
upstream version is extracted from concatenating the matched parts in B<(> ...
B<)> with B<.> .

If I<matching-pattern> is set to B<HEAD>, B<uscan> downloads the latest source
tree of the URL.  The upstream version is then constructed by appending the last
revision of the URL to B<0.0~svn>.

As commit signing is not possible with Subversion, the default B<pgpmode> is set
to B<none> when B<mode=svn>. Settings of B<pgpmode> other than B<default> and
B<none> are reported as errors.

=back

=item B<pretty=>I<rule>

Set the upstream version string to an arbitrary format as an optional B<opts>
argument when the I<matching-pattern> is B<HEAD> or B<heads/>I<branch> for
B<git> mode.  For the exact syntax, see the B<git-log> manpage under B<tformat>.
The default is B<pretty=0.0~git%cd.%h>.  No B<uversionmangle> rules is
applicable for this case.

When B<pretty=describe> is used, the upstream version string is the output of
the "B<git describe --tags | sed s/-/./g>" command instead. For example, if the
commit is the B<5>-th after the last tag B<v2.17.12> and its short hash is
B<ged992511>, then the string is B<v2.17.12.5.ged992511> .  For this case, it is
good idea to add B<uversionmangle=s/^/0.0~/> or B<uversionmangle=s/^v//> to make
the upstream version string compatible with Debian.  B<uversionmangle=s/^v//>
may work as well.  Please note that in order for B<pretty=describe> to function
well, upstream need to avoid tagging with random alphabetic tags.

The B<pretty=describe> forces to set B<gitmode=full> to make a full local clone
of the repository automatically.

=item B<date=>I<rule>

Set the date string used by the B<pretty> option to an arbitrary format as an
optional B<opts> argument when the I<matching-pattern> is B<HEAD> or
B<heads/>I<branch> for B<git> mode.  For the exact syntax, see the
B<strftime> manpage.  The default is B<date=%Y%m%d>.

=item B<gitexport=>I<mode>

Set the git archive export operation I<mode>. The default is
B<gitexport=default>.  Set this to B<gitexport=all> to include all files in the
.orig.tar archive, ignoring any I<export-ignore> git attributes defined by the
upstream.

This option is valid only in git mode.

=item B<gitmode=>I<mode>

Set the git clone operation I<mode>. The default is B<gitmode=shallow>.  For
some dumb git server, you may need to manually set B<gitmode=full> to force full
clone operation.

If the current directory is a git repository and the searched repository is
listed among the registered "remotes", then uscan will use it instead of cloning
separately.

=item B<pgpmode=>I<mode>

Set the PGP/GPG signature verification I<mode>.

=over

=item B<auto>

B<uscan> checks possible URLs for the signature file and autogenerates a
B<pgpsigurlmangle> rule to use it.

=item B<default>

Use B<pgpsigurlmangle=>I<rules> to generate the candidate upstream signature
file URL string from the upstream tarball URL. (default)

If the specified B<pgpsigurlmangle> is missing, B<uscan> checks possible URLs
for the signature file and suggests adding a B<pgpsigurlmangle> rule.

=item B<mangle>

Use B<pgpsigurlmangle=>I<rules> to generate the candidate upstream signature
file URL string from the upstream tarball URL.

=item B<next>

Verify this downloaded tarball file with the signature file specified in the
next watch line.  The next watch line must be B<pgpmode=previous>.  Otherwise,
no verification occurs.

=item B<previous>

Verify the downloaded tarball file specified in the previous watch line with
this signature file.  The previous watch line must be B<pgpmode=next>.

=item B<self>

Verify the downloaded file I<foo.ext> with its self signature and extract its
content tarball file as I<foo>.

=item B<gittag>

Verify tag signature if B<mode=git>.

=item B<none>

No signature available. (No warning.)

=back

=item B<searchmode=>I<mode>

Set the parsing search mode.

=over

=item B<html> I<(default)>: search pattern in "href" parameter of E<lt>aE<gt>
HTML tags

=item B<plain>: search pattern in the full page

This is useful if page content is not HTML but JSON. Example with
npmjs.com:

  version=4
  opts="searchmode=plain" \
   https://registry.npmjs.org/aes-js \
   https://registry.npmjs.org/aes-js/-/aes-js-(\d[\d\.]*)@ARCHIVE_EXT@

=back

=item B<decompress>

Decompress compressed archive before the pgp/gpg signature verification.

=item B<bare>

Disable all site specific special case code such as URL redirector uses and
page content alterations. (persistent)

=item B<user-agent=>I<user-agent-string>

Set the user-agent string used to contact the HTTP(S) server as
I<user-agent-string>. (persistent)

B<user-agent> option should be specified by itself in the watch line without
I<URL>, to allow using semicolons and commas in it.

=item B<pasv>, B<passive>

Use PASV mode for the FTP connection.

If PASV mode is required due to the client side network environment, set
B<uscan> to use PASV mode via L<COMMANDLINE OPTIONS> or L<DEVSCRIPT
CONFIGURATION VARIABLES> instead.

=item B<active>, B<nopasv>

Don't use PASV mode for the FTP connection.

=item B<unzipopt=>I<options>

Add the extra options to use with the B<unzip> command, such as B<-a>, B<-aa>,
and B<-b>, when executed by B<mk-origtargz>.

=item B<dversionmangle=>I<rules>

Normalize the last upstream version string found in F<debian/changelog> to
compare it to the available upstream tarball version.  Removal of the Debian
specific suffix such as B<s/@DEB_EXT@//> is usually done here.

You can also use B<dversionmangle=auto>, this is exactly the same than
B<dversionmangle=s/@DEB_EXT@//>

=item B<dirversionmangle=>I<rules>

Normalize the directory path string matching the regex in a set of parentheses
of B<http://>I<URL> as the sortable version index string.  This is used as the
directory path sorting index only.

Substitution such as B<s/PRE/~pre/; s/RC/~rc/> may help.

=item B<pagemangle=>I<rules>

Normalize the downloaded web page string.  (Don't use this unless this is
absolutely needed.  Generally, B<g> flag is required for these I<rules>.)

This is handy if you wish to access Amazon AWS or Subversion repositories in
which <a href="..."> is not used.

=item B<uversionmangle=>I<rules>

Normalize the candidate upstream version strings extracted from hrefs in the
source of the web page.  This is used as the version sorting index when
selecting the latest upstream version.

Substitution such as B<s/PRE/~pre/; s/RC/~rc/> may help.

=item B<versionmangle=>I<rules>

Syntactic shorthand for B<uversionmangle=>I<rules>B<, dversionmangle=>I<rules>

=item B<hrefdecode=percent-encoding>

Convert the selected upstream tarball href string from the percent-encoded
hexadecimal string to the decoded normal URL string for obfuscated web sites.
Only B<percent-encoding> is available and it is decoded with
B<s/%([A-Fa-f\d]{2})/chr hex $1/eg>.

=item B<downloadurlmangle=>I<rules>

Convert the selected upstream tarball href string into the accessible URL for
obfuscated web sites.  This is run after B<hrefdecode>.

=item B<filenamemangle=>I<rules>

Generate the upstream tarball filename from the selected href string if
I<matching-pattern> can extract the latest upstream version I<< <uversion> >>
from the selected href string.  Otherwise, generate the upstream tarball
filename from its full URL string and set the missing I<< <uversion> >> from
the generated upstream tarball filename.

Without this option, the default upstream tarball filename is generated by
taking the last component of the URL and removing everything after any '?' or
'#'.

=item B<pgpsigurlmangle=>I<rules>

Generate the candidate upstream signature file URL string from the upstream
tarball URL.

=item B<oversionmangle=>I<rules>

Generate the version string I<< <oversion> >> of the source tarball I<<
<spkg>_<oversion>.orig.tar.gz >> from I<< <uversion> >>.  This should be used
to add a suffix such as B<+dfsg1> to a MUT package.

=back

Here, the mangling rules apply the I<rules> to the pertinent string.  Multiple
rules can be specified in a mangling rule string by making a concatenated
string of each mangling I<rule> separated by B<;> (semicolon).

Each mangling I<rule> cannot contain B<;> (semicolon), B<,> (comma), or B<">
(double quote).

Each mangling I<rule> behaves as if a Perl command "I<$string> B<=~> I<rule>"
is executed.  There are some notable details.

=over

=item * I<rule> may only use the B<s>, B<tr>, and B<y> operations.

=over

=item B<s/>I<regex>B</>I<replacement>B</>I<options>

Regex pattern match and replace the target string.  Only the B<g>, B<i> and
B<x> flags are available.  Use the B<$1> syntax for back references (No
B<\1> syntax).  Code execution is not allowed (i.e. no B<(?{})> or B<(??{})>
constructs).

=item B<y/>I<source>B</>I<dest>B</> or B<tr/>I<source>B</>I<dest>B</>

Transliterate the characters in the target string.

=back

=back

=head1 EXAMPLE OF EXECUTION

B<uscan> reads the first entry in F<debian/changelog> to determine the source
package name and the last upstream version.

For example, if the first entry of F<debian/changelog> is:

=over

=item * I<< bar >> (B<3:2.03+dfsg1-4>) unstable; urgency=low

=back

then, the source package name is I<< bar >> and the last Debian package version
is B<3:2.03+dfsg1-4>.

The last upstream version is normalized to B<2.03+dfsg1> by removing the epoch
and the Debian revision.

If the B<dversionmangle> rule exists, the last upstream version is further
normalized by applying this rule to it.  For example, if the last upstream
version is B<2.03+dfsg1> indicating the source tarball is repackaged, the
suffix B<+dfsg1> is removed by the string substitution B<s/\+dfsg\d*$//> to
make the (dversionmangled) last upstream version B<2.03> and it is compared to
the candidate upstream tarball versions such as B<2.03>, B<2.04>, ... found in
the remote site.  Thus, set this rule as:

=over

=item * B<opts="dversionmangle=s/\+dfsg\d*$//">

=back

B<uscan> downloads a web page from B<http://>I<URL> specified in
F<debian/watch>.

=over

=item * If the directory name part of I<URL> has no parentheses, B<(> and B<)>,
it is taken as verbatim.

=item * If the directory name part of I<URL> has parentheses, B<(> and B<)>,
then B<uscan> recursively searches all possible directories to find a page for
the newest version.  If the B<dirversionmangle> rule exists, the generated
sorting index is used to find the newest version.  If a specific version is
specified for the download, the matching version string has priority over the
newest version.

=back

For example, this B<http://>I<URL> may be specified as:

=over

=item * B<http://www.example.org/@ANY_VERSION@/>

=back

Please note the trailing B</> in the above to make B<@ANY_VERSION@> as the
directory.

If the B<pagemangle> rule exists, the whole downloaded web page as a string is
normalized by applying this rule to it.  This is very powerful tool and needs
to be used with caution.  If other mangling rules can be used to address your
objective, do not use this rule.

The downloaded web page is scanned for hrefs defined in the B<< <a href=" >>
I<...> B<< "> >> tag to locate the candidate upstream tarball hrefs.  These
candidate upstream tarball hrefs are matched by the Perl regex pattern
I<matching-pattern> such as B<< DL-(?:[\d\.]+?)/foo-(.+)\.tar\.gz >> to narrow
down the candidates.  This pattern match needs to be anchored at the beginning
and the end.  For example, candidate hrefs may be:

=over

=item * B<< DL-2.02/foo-2.02.tar.gz >>

=item * B<< DL-2.03/foo-2.03.tar.gz >>

=item * B<< DL-2.04/foo-2.04.tar.gz >>

=back

Here the matching string of B<(.+)> in I<matching-pattern> is considered as the
candidate upstream version.  If there are multiple matching strings of
capturing patterns in I<matching-pattern>, they are all concatenated with B<.>
(period) to form the candidate upstream version.  Make sure to use the
non-capturing regex such as B<(?:[\d\.]+?)> instead for the variable text
matching part unrelated to the version.

Then, the candidate upstream versions are:

=over

=item * B<2.02>

=item * B<2.03>

=item * B<2.04>

=back

The downloaded tarball filename is basically set to the same as the filename in
the remote URL of the selected href.

If the B<uversionmangle> rule exists, the candidate upstream versions are
normalized by applying this rule to them. (This rule may be useful if the
upstream version scheme doesn't sort correctly to identify the newest version.)

The upstream tarball href corresponding to the newest (uversionmangled)
candidate upstream version newer than the (dversionmangled) last upstream
version is selected.

If multiple upstream tarball hrefs corresponding to a single version with
different extensions exist, the highest compression one is chosen. (Priority:
B<< tar.xz > tar.lzma > tar.bz2 > tar.gz >>.)

If the selected upstream tarball href is the relative URL, it is converted to
the absolute URL using the base URL of the web page.  If the B<< <base href="
>> I< ... > B<< "> >> tag exists in the web page, the selected upstream tarball
href is converted to the absolute URL using the specified base URL in the base
tag, instead.

If the B<downloadurlmangle> rule exists, the selected upstream tarball href is
normalized by applying this rule to it. (This is useful for some sites with the
obfuscated download URL.)

If the B<filenamemangle> rule exists, the downloaded tarball filename is
generated by applying this rule to the selected href if I<matching-pattern> can
extract the latest upstream version I<< <uversion> >> from the selected href
string. Otherwise, generate the upstream tarball filename from its full URL
string and set the missing I<< <uversion> >> from the generated upstream
tarball filename.

Without the B<filenamemangle> rule, the default upstream tarball filename is
generated by taking the last component of the URL and removing everything after
any '?' or '#'.

B<uscan> downloads the selected upstream tarball to the parent B<../>
directory.  For example, the downloaded file may be:

=over

=item * F<../foo-2.04.tar.gz>

=back

Let's call this downloaded version B<2.04> in the above example generically as
I<< <uversion> >> in the following.

If the B<pgpsigurlmangle> rule exists, the upstream signature file URL is
generated by applying this rule to the (downloadurlmangled) selected upstream
tarball href and the signature file is tried to be downloaded from it.

If the B<pgpsigurlmangle> rule doesn't exist, B<uscan> warns user if the
matching upstream signature file is available from the same URL with their
filename being suffixed by the 5 common suffix B<asc>, B<gpg>, B<pgp>, B<sig>
and B<sign>. (You can avoid this warning by setting B<pgpmode=none>.)

If the signature file is downloaded, the downloaded upstream tarball is checked
for its authenticity against the downloaded signature file using the armored keyring
F<debian/upstream/signing-key.asc>  (see L<KEYRING FILE EXAMPLES>).  If its
signature is not valid, or not made by one of the listed keys, B<uscan> will
report an error.

If the B<oversionmangle> rule exists, the source tarball version I<oversion> is
generated from the downloaded upstream version I<uversion> by applying this
rule. This rule is useful to add suffix such as B<+dfsg1> to the version of all
the source packages of the MUT package for which the repacksuffix mechanism
doesn't work.

B<uscan> invokes B<mk-origtargz> to create the source tarball properly named
for the source package with B<.orig.> (or B<< .orig-<component>. >> for the
secondary tarballs) in its filename.

=over

=item case A: packaging of the upstream tarball as is

B<mk-origtargz> creates a symlink I<< ../bar_<oversion>.orig.tar.gz >>
linked to the downloaded local upstream tarball. Here, I<< bar >> is the source
package name found in F<debian/changelog>. The generated symlink may be:

=over

=item * F<../bar_2.04.orig.tar.gz> -> F<foo-2.04.tar.gz> (as is)

=back

Usually, there is no need to set up B<opts="dversionmangle=> I<...> B<"> for
this case.

=item case B: packaging of the upstream tarball after removing non-DFSG files

B<mk-origtargz> checks the filename glob of the B<Files-Excluded> stanza in the
first section of F<debian/copyright>, removes matching files to create a
repacked upstream tarball.  Normally, the repacked upstream tarball is renamed
with I<suffix> to I<< ../bar_<oversion><suffix>.orig.tar.gz >> using
the B<repacksuffix> option for the single upstream package.    Here I<< <oversion> >>
is updated to be I<< <oversion><suffix> >>.

The removal of files is required if files are not DFSG-compliant.  For such
case, B<+dfsg1> is used as I<suffix>.

So the combined options are set as
B<opts="dversionmangle=s/\+dfsg\d*$// ,repacksuffix=+dfsg1">, instead.

For example, the repacked upstream tarball may be:

=over

=item * F<../bar_2.04+dfsg1.orig.tar.gz> (repackaged)

=back

=back

B<uscan> normally invokes "B<uupdate> B<--find --upstream-version> I<oversion>
" for the version=4 watch file.

Please note that B<--find> option is used here since B<mk-origtargz> has been
invoked to make B<*.orig.tar.gz> file already.  B<uscan> picks I<< bar >> from
F<debian/changelog>.

It creates the new upstream source tree under the I<< ../bar-<oversion> >>
directory and Debianize it leveraging the last package contents.

=head1 WATCH FILE EXAMPLES

When writing the watch file, you should rely on the latest upstream source
announcement web page.  You should not try to second guess the upstream archive
structure if possible.  Here are the typical F<debian/watch> files.

Please note that executing B<uscan> with B<-v> or B<-vv> reveals what exactly
happens internally.

The existence and non-existence of a space the before tailing B<\> (back slash)
are significant.

Some undocumented shorter configuration strings are used in the below EXAMPLES
to help you with typing.  These are intentional ones.  B<uscan> is written to
accept such common sense abbreviations but don't push the limit.

=head2 HTTP site (basic)

Here is an example for the basic single upstream tarball.

  version=4
  http://example.com/~user/release/@PACKAGE@.html \
      files/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@

Or without using the substitution strings (not recommended):
  http://example.com/~user/release/foo.html \
      files/foo-([\d\.]+)\.tar\.gz

  version=4

For the upstream source package B<foo-2.0.tar.gz>, this watch file downloads
and creates the Debian B<orig.tar> file B<foo_2.0.orig.tar.gz>.

=head2 HTTP site (pgpsigurlmangle)

Here is an example for the basic single upstream tarball with the matching
signature file in the same file path.

  version=4
  opts="pgpsigurlmangle=s%$%.asc%" http://example.com/release/@PACKAGE@.html \
      files/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@

For the upstream source package B<foo-2.0.tar.gz> and the upstream signature
file B<foo-2.0.tar.gz.asc>, this watch file downloads these files, verifies the
authenticity using the keyring F<debian/upstream/signing-key.asc> and creates the
Debian B<orig.tar> file B<foo_2.0.orig.tar.gz>.

Here is another example for the basic single upstream tarball with the matching
signature file on decompressed tarball in the same file path.

  version=4
  opts="pgpsigurlmangle=s%@ARCHIVE_EXT@$%.asc%,decompress" \
      http://example.com/release/@PACKAGE@.html \
      files/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@

For the upstream source package B<foo-2.0.tar.gz> and the upstream signature
file B<foo-2.0.tar.asc>, this watch file downloads these files, verifies the
authenticity using the keyring F<debian/upstream/signing-key.asc> and creates the
Debian B<orig.tar> file B<foo_2.0.orig.tar.gz>.

=head2 HTTP site (pgpmode=next/previous)

Here is an example for the basic single upstream tarball with the matching
signature file in the unrelated file path.

  version=4
  opts="pgpmode=next" http://example.com/release/@PACKAGE@.html \
      files/(?:\d+)/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@ debian
  opts="pgpmode=previous" http://example.com/release/@PACKAGE@.html \
      files/(?:\d+)/@PACKAGE@@ANY_VERSION@@SIGNATURE_EXT@ previous

B<(?:\d+)> part can be any random value.  The tarball file can have B<53>,
while the signature file can have B<33>.

B<([\d\.]+)> part for the signature file has a strict requirement to match that
for the upstream tarball specified in the previous line by having B<previous>
as I<version> in the watch line.

=head2 HTTP site (flexible)

Here is an example for the maximum flexibility of upstream tarball and
signature file extensions.

  version=4
  opts="pgpmode=next" http://example.com/DL/ \
      files/(?:\d+)/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@ debian
  opts="pgpmode=previous" http://example.com/DL/ \
      files/(?:\d+)/@PACKAGE@@ANY_VERSION@@SIGNATURE_EXT@ \
      previous

=head2 HTTP site (basic MUT)

Here is an example for the basic multiple upstream tarballs.

  version=4
  opts="pgpsigurlmangle=s%$%.sig%" \
      http://example.com/release/foo.html \
      files/foo-@ANY_VERSION@@ARCHIVE_EXT@ debian
  opts="pgpsigurlmangle=s%$%.sig%, component=bar" \
      http://example.com/release/foo.html \
      files/foobar-@ANY_VERSION@@ARCHIVE_EXT@ same
  opts="pgpsigurlmangle=s%$%.sig%, component=baz" \
      http://example.com/release/foo.html \
      files/foobaz-@ANY_VERSION@@ARCHIVE_EXT@ same

For the main upstream source package B<foo-2.0.tar.gz> and the secondary
upstream source packages B<foobar-2.0.tar.gz> and B<foobaz-2.0.tar.gz> which
install under F<bar/> and F<baz/>, this watch file downloads and creates the
Debian B<orig.tar> file B<foo_2.0.orig.tar.gz>, B<foo_2.0.orig-bar.tar.gz> and
B<foo_2.0.orig-baz.tar.gz>.  Also, these upstream tarballs are verified by
their signature files.

=head2 HTTP site (recursive directory scanning)

Here is an example with the recursive directory scanning for the upstream tarball
and its signature files released in a directory named
after their version.

  version=4
  opts="pgpsigurlmangle=s%$%.sig%, dirversionmangle=s/-PRE/~pre/;s/-RC/~rc/" \
      http://tmrc.mit.edu/mirror/twisted/Twisted/@ANY_VERSION@/ \
      Twisted-@ANY_VERSION@@ARCHIVE_EXT@

Here, the web site should be accessible at the following URL:

  http://tmrc.mit.edu/mirror/twisted/Twisted/

Here, B<dirversionmangle> option is used to normalize the sorting order of the
directory names.

=head2 HTTP site (alternative shorthand)

For the bare HTTP site where you can directly see archive filenames, the normal
watch file:

  version=4
  opts="pgpsigurlmangle=s%$%.sig%" \
      http://www.cpan.org/modules/by-module/Text/ \
      Text-CSV_XS-@ANY_VERSION@@ARCHIVE_EXT@

can be rewritten in an alternative shorthand form only with a single string
covering URL and filename:

  version=4
  opts="pgpsigurlmangle=s%$%.sig%" \
      http://www.cpan.org/modules/by-module/Text/Text-CSV_XS-@ANY_VERSION@@ARCHIVE_EXT@

In version=4, initial white spaces are dropped.  Thus, this alternative
shorthand form can also be written as:

  version=4
  opts="pgpsigurlmangle=s%$%.sig%" \
      http://www.cpan.org/modules/by-module/Text/\
      Text-CSV_XS-@ANY_VERSION@@ARCHIVE_EXT@

Please note the subtle difference of a space before the tailing B<\>
between the first and the last examples.

=head2 HTTP site (funny version)

For a site which has funny version numbers, the parenthesized groups will be
joined with B<.> (period) to make a sanitized version number.

  version=4
  http://www.site.com/pub/foobar/foobar_v(\d+)_(\d+)@ARCHIVE_EXT@

=head2 HTTP site (DFSG)

The upstream part of the Debian version number can be mangled to indicate the
source package was repackaged to clean up non-DFSG files:

  version=4
  opts="dversionmangle=s/\+dfsg\d*$//,repacksuffix=+dfsg1" \
  http://some.site.org/some/path/foobar-@ANY_VERSION@@ARCHIVE_EXT@

See L<COPYRIGHT FILE EXAMPLES>.

=head2 HTTP site (filenamemangle)

The upstream tarball filename is found by taking the last component of the URL
and removing everything after any '?' or '#'.

If this does not fit to you, use B<filenamemangle>.  For example, F<< <A
href="http://foo.bar.org/dl/?path=&dl=foo-0.1.1.tar.gz"> >> could be handled
as:

  version=4
  opts=filenamemangle=s/.*=(.*)/$1/ \
  http://foo.bar.org/dl/\?path=&dl=foo-@ANY_VERSION@@ARCHIVE_EXT@

F<< <A href="http://foo.bar.org/dl/?path=&dl_version=0.1.1"> >>
could be handled as:

  version=4
  opts=filenamemangle=s/.*=(.*)/foo-$1\.tar\.gz/ \
  http://foo.bar.org/dl/\?path=&dl_version=@ANY_VERSION@

If the href string has no version using <I>matching-pattern>, the version can
be obtained from the full URL using B<filenamemangle>.

  version=4
  opts=filenamemangle=s&.*/dl/(.*)/foo\.tar\.gz&foo-$1\.tar\.gz& \
  http://foo.bar.org/dl/@ANY_VERSION@/ foo.tar.gz

=head2 HTTP site (downloadurlmangle)

The option B<downloadurlmangle> can be used to mangle the URL of the file
to download.  This can only be used with B<http://> URLs.  This may be
necessary if the link given on the web page needs to be transformed in
some way into one which will work automatically, for example:

  version=4
  opts=downloadurlmangle=s/prdownload/download/ \
  http://developer.berlios.de/project/showfiles.php?group_id=2051 \
  http://prdownload.berlios.de/softdevice/vdr-softdevice-@ANY_VERSION@@ARCHIVE_EXT@

=head2 HTTP site (oversionmangle, MUT)

The option B<oversionmangle> can be used to mangle the version of the source
tarball (B<.orig.tar.gz> and B<.orig-bar.tar.gz>).  For example, B<+dfsg1> can
be added to the upstream version as:

  version=4
  opts=oversionmangle=s/(.*)/$1+dfsg1/ \
  http://example.com/~user/release/foo.html \
  files/foo-@ANY_VERSION@@ARCHIVE_EXT@ debian
  opts="component=bar" \
  http://example.com/~user/release/foo.html \
  files/bar-@ANY_VERSION@@ARCHIVE_EXT@ same

See L<COPYRIGHT FILE EXAMPLES>.

=head2 HTTP site (pagemangle)

The option B<pagemangle> can be used to mangle the downloaded web page before
applying other rules.  The non-standard web page without proper B<< <a href="
>> << ... >> B<< "> >> entries can be converted.  For example, if F<foo.html>
uses B<< <a bogus=" >> I<< ... >> B<< "> >>, this can be converted to the
standard page format with:

  version=4
  opts=pagemangle="s/<a\s+bogus=/<a href=/g" \
  http://example.com/release/foo.html \
  files/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@

Please note the use of B<g> here to replace all occurrences.

If F<foo.html> uses B<< <Key> >> I<< ... >> B<< </Key> >>, this can be
converted to the standard page format with:

  version=4
  opts="pagemangle=s%<Key>([^<]*)</Key>%<Key><a href="$1">$1</a></Key>%g" \
  http://example.com/release/foo.html \
  (?:.*)/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@

=head2 FTP site (basic):

  version=4
  ftp://ftp.tex.ac.uk/tex-archive/web/c_cpp/cweb/cweb-@ANY_VERSION@@ARCHIVE_EXT@

=head2 FTP site (regex special characters):

  version=4
  ftp://ftp.worldforge.org/pub/worldforge/libs/\
  Atlas-C++/transitional/Atlas-C\+\+-@ANY_VERSION@@ARCHIVE_EXT@

Please note that this URL is connected to be I< ... >B<libs/Atlas-C++/>I< ... >
. For B<++>, the first one in the directory path is verbatim while the one in
the filename is escaped by B<\>.

=head2 FTP site (funny version)

This is another way of handling site with funny version numbers,
this time using mangling.  (Note that multiple groups will be
concatenated before mangling is performed, and that mangling will
only be performed on the basename version number, not any path
version numbers.)

  version=4
  opts="uversionmangle=s/^/0.0./" \
  ftp://ftp.ibiblio.org/pub/Linux/ALPHA/wine/\
  development/Wine-@ANY_VERSION@@ARCHIVE_EXT@

=head2 sf.net

For SourceForge based projects, qa.debian.org runs a redirector which allows a
simpler form of URL. The format below will automatically be rewritten to use
the redirector with the watch file:

  version=4
  https://sf.net/<project>/ <tar-name>-@ANY_VERSION@@ARCHIVE_EXT@

For B<audacity>, set the watch file as:

  version=4
  https://sf.net/audacity/ audacity-minsrc-@ANY_VERSION@@ARCHIVE_EXT@

Please note, you can still use normal functionalities of B<uscan> to set up a
watch file for this site without using the redirector.

  version=4
  opts="uversionmangle=s/-pre/~pre/, \
    filenamemangle=s%(?:.*)audacity-minsrc-(.+)\.tar\.xz/download%\
                         audacity-$1.tar.xz%" \
    http://sourceforge.net/projects/audacity/files/audacity/@ANY_VERSION@/ \
    (?:.*)audacity-minsrc-@ANY_VERSION@@ARCHIVE_EXT@/download

Here, B<%> is used as the separator instead of the standard B</>.

=head2 github.com

For GitHub based projects, you can use the tags or releases page.  The archive
URL uses only the version as the filename.  You can rename the downloaded
upstream tarball from into the standard F<< <project>-<version>.tar.gz >> using
B<filenamemangle>:

  version=4
  opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*@ARCHIVE_EXT@)%@PACKAGE@-$1%" \
      https://github.com/<user>/<project>/tags \
      (?:.*?/)?v?@ANY_VERSION@@ARCHIVE_EXT@

=head2 PyPI

For PyPI based projects, pypi.debian.net runs a redirector which allows a
simpler form of URL. The format below will automatically be rewritten to use
the redirector with the watch file:

  version=4
  https://pypi.python.org/packages/source/<initial>/<project>/ \
      <tar-name>-@@ANY_VERSION@@ARCHIVE_EXT@

For B<cfn-sphere>, set the watch file as:

  version=4
  https://pypi.python.org/packages/source/c/cfn-sphere/ \
      cfn-sphere-@ANY_VERSION@@ARCHIVE_EXT@

Please note, you can still use normal functionalities of B<uscan> to set up a
watch file for this site without using the redirector.

  version=4
  opts="pgpmode=none" \
      https://pypi.python.org/pypi/cfn-sphere/ \
      https://pypi.python.org/packages/.*/.*/.*/\
      cfn-sphere-@ANY_VERSION@@ARCHIVE_EXT@#.*

=head2 code.google.com

Sites which used to be hosted on the Google Code service should have migrated
to elsewhere (github?).  Please look for the newer upstream site if available.

=head2 npmjs.org (node modules)

npmjs.org modules are published in JSON files. Here is a way to read them:

  version=4
  opts="searchmode=plain" \
   https://registry.npmjs.org/aes-js \
   https://registry.npmjs.org/aes-js/-/aes-js-@ANY_VERSION@@ARCHIVE_EXT@

=head2 grouped package

Some node modules are split into multiple little upstream package. Here is
a way to group them:

  version=4
  opts="searchmode=plain,pgpmode=none" \
    https://registry.npmjs.org/mongodb \
    https://registry.npmjs.org/mongodb/-/mongodb-@ANY_VERSION@@ARCHIVE_EXT@ group
  opts="searchmode=plain,pgpmode=none,component=bson" \
    https://registry.npmjs.org/bson \
    https://registry.npmjs.org/bson/-/bson-@ANY_VERSION@@ARCHIVE_EXT@ group
  opts="searchmode=plain,pgpmode=none,component=mongodb-core" \
    https://registry.npmjs.org/mongodb-core \
    https://registry.npmjs.org/mongodb-core/-/mongodb-core-@ANY_VERSION@@ARCHIVE_EXT@ group
  opts="searchmode=plain,pgpmode=none,component=requireoptional" \
    https://registry.npmjs.org/require_optional \
    https://registry.npmjs.org/require_optional/-/require_optional-@ANY_VERSION@@ARCHIVE_EXT@ group

Package version is then the concatenation of upstream versions separated by
"+~".

To avoid having a too long version, the "checksum" method can be used.
In this case, the main source has to be declared as "group":

  version=4
  opts="searchmode=plain,pgpmode=none" \
    https://registry.npmjs.org/mongodb \
    https://registry.npmjs.org/mongodb/-/mongodb-@ANY_VERSION@@ARCHIVE_EXT@ group
  opts="searchmode=plain,pgpmode=none,component=bson" \
    https://registry.npmjs.org/bson \
    https://registry.npmjs.org/bson/-/bson-@ANY_VERSION@@ARCHIVE_EXT@ checksum
  opts="searchmode=plain,pgpmode=none,component=mongodb-core" \
    https://registry.npmjs.org/mongodb-core \
    https://registry.npmjs.org/mongodb-core/-/mongodb-core-@ANY_VERSION@@ARCHIVE_EXT@ checksum
  opts="searchmode=plain,pgpmode=none,component=requireoptional" \
    https://registry.npmjs.org/require_optional \
    https://registry.npmjs.org/require_optional/-/require_optional-@ANY_VERSION@@ARCHIVE_EXT@ checksum

The "checksum" is made up of the separate sum of each number composing the
component versions.  Following is an example with 3 components whose versions
are "1.2.4", "2.0.1" and "10.0", with the main tarball having version "2.0.6":

  Main: 2.0.6
  Comp1:         1 .     2 .     4
  Comp2:         2 .     0 .     1
  Comp3:        10 .     0
  ================================
  Result  : 1+2+10 . 2+0+0 .   4+1
  Checksum:     13 .     2 .     5
  ================================
  Final Version:   2.0.6+~cs13.2.5

uscan will also display the original version string before being encoded into
the checksum, which can for example be used in a debian/changelog entry to
easily follow the changes:

  2.0.6+~1.2.4+~2.0.1+~10.0

B<Note>: This feature currently accepts only versions composed of digits and
full stops (`.`).

=head2 direct access to the git repository (tags)

If the upstream only publishes its code via the git repository and its code has
no web interface to obtain the release tarball, you can use B<uscan> with the
tags of the git repository to track and package the new upstream release.

  version=4
  opts="mode=git, gitmode=full, pgpmode=none" \
  http://git.ao2.it/tweeper.git \
  refs/tags/v@ANY_VERSION@

Please note "B<git ls-remote>" is used to obtain references for tags.

If a tag B<v20.5> is the newest tag, the above example downloads
I<spkg>B<-20.5.tar.xz> after making a full clone of the git repository which is
needed for dumb git server.

If tags are signed, set B<pgpmode=gittag> to verify them.

=head2 direct access to the git repository (HEAD)

If the upstream only publishes its code via the git repository and its code has
no web interface nor the tags to obtain the released tarball, you can use
B<uscan> with the HEAD of the git repository to track and package the new
upstream release with an automatically generated version string.

  version=4
  opts="mode=git, pgpmode=none" \
  https://github.com/Debian/dh-make-golang \
  HEAD

Please note that a local shallow copy of the git repository is made with "B<git
clone --bare --depth=1> ..." normally in the target directory.  B<uscan>
generates the new upstream version with "B<git log --date=format:%Y%m%d
--pretty=0.0~git%cd.%h>" on this local copy of repository as its default
behavior.

The generation of the upstream version string may the adjusted to your taste by
adding B<pretty> and B<date> options to the B<opts> arguments.

=head2 direct access to the Subversion repository (tags)

If the upstream only publishes its code via the Subversion repository and its
code has no web interface to obtain the release tarball, you can use B<uscan>
with the tags of the Subversion repository to track and package the new upstream
release.

  version=4
  opts="mode=svn, pgpmode=none" \
  svn://svn.code.sf.net/p/jmol/code/tags/ \
  @ANY_VERSION@\/

=head2 direct access to the Subversion repository (HEAD)

If the upstream only publishes its code via the Subversion repository and its
code has no web interface to obtain the release tarball, you can use B<uscan>
to get the most recent source of a subtree in the repository with an
automatically generated version string.

  version=4
  opts="mode=svn, pgpmode=none" \
  svn://svn.code.sf.net/p/jmol/code/trunk/ \
  HEAD

By default, B<uscan> generates the new upstream version by appending the
revision number to "0.0~svn". This can later be changed using B<uversionmangle>.

=head1 COPYRIGHT FILE EXAMPLES

Here is an example for the F<debian/copyright> file which initiates automatic
repackaging of the upstream tarball into I<< <spkg>_<oversion>.orig.tar.gz >>
(In F<debian/copyright>, the B<Files-Excluded> and
B<Files-Excluded->I<component> stanzas are a part of the first paragraph and
there is a blank line before the following paragraphs which contain B<Files>
and other stanzas.):

  Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
  Files-Excluded: exclude-this
   exclude-dir
   */exclude-dir
   .*
   */js/jquery.js

   Files: *
   Copyright: ...
   ...

Here is another example for the F<debian/copyright> file which initiates
automatic repackaging of the multiple upstream tarballs into
I<< <spkg>_<oversion>.orig.tar.gz >> and
I<< <spkg>_<oversion>.orig-bar.tar.gz >>:

  Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
  Files-Excluded: exclude-this
   exclude-dir
   */exclude-dir
   .*
   */js/jquery.js
  Files-Excluded-bar: exclude-this
   exclude-dir
   */exclude-dir
   .*
   */js/jquery.js

   Files: *
   Copyright: ...
   ...

See mk-origtargz(1).

=head1 KEYRING FILE EXAMPLES

Let's assume that the upstream "B<< uscan test key (no secret)
<none@debian.org> >>" signs its package with a secret OpenPGP key and publishes
the corresponding public OpenPGP key.  This public OpenPGP key can be
identified in 3 ways using the hexadecimal form.

=over

=item * The fingerprint as the 20 byte data calculated from the public OpenPGP
key. E.  g., 'B<CF21 8F0E 7EAB F584 B7E2 0402 C77E 2D68 7254 3FAF>'

=item * The long keyid as the last 8 byte data of the fingerprint. E. g.,
'B<C77E2D6872543FAF>'

=item * The short keyid is the last 4 byte data of the fingerprint. E. g.,
'B<72543FAF>'

=back

Considering the existence of the collision attack on the short keyid, the use
of the long keyid is recommended for receiving keys from the public key
servers.  You must verify the downloaded OpenPGP key using its full fingerprint
value which you know is the trusted one.

The armored keyring file F<debian/upstream/signing-key.asc> can be created by
using the B<gpg> (or B<gpg2>) command as follows.

  $ gpg --recv-keys "C77E2D6872543FAF"
  ...
  $ gpg --finger "C77E2D6872543FAF"
  pub   4096R/72543FAF 2015-09-02
        Key fingerprint = CF21 8F0E 7EAB F584 B7E2  0402 C77E 2D68 7254 3FAF
  uid                  uscan test key (no secret) <none@debian.org>
  sub   4096R/52C6ED39 2015-09-02
  $ cd path/to/<upkg>-<uversion>
  $ mkdir -p debian/upstream
  $ gpg --export --export-options export-minimal --armor \
        'CF21 8F0E 7EAB F584 B7E2  0402 C77E 2D68 7254 3FAF' \
        >debian/upstream/signing-key.asc

The binary keyring files, F<debian/upstream/signing-key.pgp> and
F<debian/upstream-signing-key.pgp>, are still supported but deprecated.

If a group of developers sign the package, you need to list fingerprints of all
of them in the argument for B<gpg --export ...> to make the keyring to contain
all OpenPGP keys of them.

Sometimes you may wonder who made a signature file.  You can get the public
keyid used to create the detached signature file F<foo-2.0.tar.gz.asc> by
running B<gpg> as:

  $ gpg -vv foo-2.0.tar.gz.asc
  gpg: armor: BEGIN PGP SIGNATURE
  gpg: armor header: Version: GnuPG v1
  :signature packet: algo 1, keyid C77E2D6872543FAF
  	version 4, created 1445177469, md5len 0, sigclass 0x00
  	digest algo 2, begin of digest 7a c7
  	hashed subpkt 2 len 4 (sig created 2015-10-18)
  	subpkt 16 len 8 (issuer key ID C77E2D6872543FAF)
  	data: [4091 bits]
  gpg: assuming signed data in `foo-2.0.tar.gz'
  gpg: Signature made Sun 18 Oct 2015 11:11:09 PM JST using RSA key ID 72543FAF
  ...

=head1 COMMANDLINE OPTIONS

For the basic usage, B<uscan> does not require to set these options.

=over

=item B<--conffile>, B<--conf-file>

Add or replace default configuration files (C</etc/devscripts.conf> and
C<~/.devscripts>). This can only be used as the first option given on the
command-line.

=over

=item replace:

  uscan --conf-file test.conf --verbose

=item add:

  uscan --conf-file +test.conf --verbose

If one B<--conf-file> has no C<+>, default configuration files are ignored.

=back

=item B<--no-conf>, B<--noconf>

Don't read any configuration files. This can only be used as the first option
given on the command-line.

=item B<--no-verbose>

Don't report verbose information. (default)

=item B<--verbose>, B<-v>

Report verbose information.

=item B<--debug>, B<-vv>

Report verbose information and some internal state values.

=item B<--extra-debug>, B<-vvv>

Report verbose information including the downloaded
web pages as processed to STDERR for debugging.

=item B<--dehs>

Send DEHS style output (XML-type) to STDOUT, while
send all other uscan output to STDERR.

=item B<--no-dehs>

Use only traditional uscan output format. (default)

=item B<--download>, B<-d>

Download the new upstream release. (default)

=item B<--force-download>, B<-dd>

Download the new upstream release even if up-to-date. (may not overwrite the local file)

=item B<--overwrite-download>, B<-ddd>

Download the new upstream release even if up-to-date. (may overwrite the local file)

=item B<--no-download>, B<--nodownload>

Don't download and report information.

Previously downloaded tarballs may be used.

Change default to B<--skip-signature>.

=item B<--signature>

Download signature. (default)

=item B<--no-signature>

Don't download signature but verify if already downloaded.

=item B<--skip-signature>

Don't bother download signature nor verifying signature.

=item B<--safe>, B<--report>

Avoid running unsafe scripts by skipping both the repacking of the downloaded
package and the updating of the new source tree.

Change default to B<--no-download> and B<--skip-signature>.

When the objective of running B<uscan> is to gather the upstream package status
under the security conscious environment, please make sure to use this option.

=item B<--report-status>

This is equivalent of setting "B<--verbose --safe>".

=item B<--download-version> I<version>

Specify the I<version> which the upstream release must match in order to be
considered, rather than using the release with the highest version.
(a best effort feature)

=item B<--download-debversion> I<version>

Specify the Debian package version to download the corresponding upstream
release version.  The B<dversionmangle> and B<uversionmangle> rules are considered.
(a best effort feature)

=item B<--download-current-version>

Download the currently packaged version.
(a best effort feature)

=item B<--check-dirname-level> I<N>

See the below section L<Directory name checking> for an explanation of this option.

=item B<--check-dirname-regex> I<regex>

See the below section L<Directory name checking> for an explanation of this option.

=item B<--destdir> I<path>
Normally, B<uscan> changes its internal current directory to the package's
source directory where the F<debian/> is located.  Then the destination
directory for the downloaded tarball and other files is set to the parent
directory F<../> from this internal current directory.

This default destination directory can be overridden by setting B<--destdir>
option to a particular I<path>.  If this I<path> is a relative path, the
destination directory is determined in relative to the internal current
directory of B<uscan> execution. If this I<path> is a absolute path, the
destination directory is set to I<path> irrespective of the internal current
directory of B<uscan> execution.

The above is true not only for the simple B<uscan> run in the single source tree
but also for the advanced scanning B<uscan> run with subdirectories holding
multiple source trees.

One exception is when B<--watchfile> and B<--package> are used together.  For
this case, the internal current directory of B<uscan> execution and the default
destination directory are set to the current directory F<.> where B<uscan> is
started.  The default destination directory can be overridden by setting
B<--destdir> option as well.

=item B<--package> I<package>

Specify the name of the package to check for rather than examining
F<debian/changelog>; this requires the B<--upstream-version> (unless a version
is specified in the F<watch> file) and B<--watchfile> options as well.
Furthermore, no directory scanning will be done and nothing will be downloaded.
This option automatically sets B<--no-download> and B<--skip-signature>; and
probably most useful in conjunction with the DEHS system (and B<--dehs>).

=item B<--upstream-version> I<upstream-version>

Specify the current upstream version rather than examine F<debian/watch> or
F<debian/changelog> to determine it. This is ignored if a directory scan is being
performed and more than one F<debian/watch> file is found.

=item B<--watchfile> I<watchfile>

Specify the I<watchfile> rather than perform a directory scan to determine it.
If this option is used without B<--package>, then B<uscan> must be called from
within the Debian package source tree (so that F<debian/changelog> can be found
simply by stepping up through the tree).

One exception is when B<--watchfile> and B<--package> are used together.
B<uscan> can be called from anywhare and the internal current directory of
B<uscan> execution and the default destination directory are set to the current
directory F<.> where B<uscan> is started.

See more in the B<--destdir> explanation.

=item B<--bare>

Disable all site specific special case codes to perform URL redirections and
page content alterations.

=item B<--http-header>

Add specified header in HTTP requests for matching url. This option can be used
more than one time, values must be in the form "baseUrl@Name=value. Example:

  uscan --http-header https://example.org@My-Token=qwertyuiop

Security:

=over

=item The given I<baseUrl> must exactly match the base url before '/'.
Examples:

  |        --http-header value         |           Good for          | Never used |
  +------------------------------------+-----------------------------+------------+
  | https://example.org.com@Hdr=Value  | https://example.org.com/... |            |
  | https://example.org.com/@Hdr=Value |                             |     X      |
  | https://e.com:1879@Hdr=Value       | https://e.com:1879/...      |            |
  | https://e.com:1879/dir@Hdr=Value   | https://e.com:1879/dir/...  |            |
  | https://e.com:1879/dir/@Hdr=Value  |                             |     X      |

=item It is strongly recommended to not use this feature to pass a secret
token over unciphered connection I<(http://)>

=item You can use C<USCAN_HTTP_HEADER> variable (in C<~/.devscripts>) to hide
secret token from scripts

=back

=item B<--no-exclusion>

Don't automatically exclude files mentioned in F<debian/copyright> field B<Files-Excluded>.

=item B<--pasv>

Force PASV mode for FTP connections.

=item B<--no-pasv>

Don't use PASV mode for FTP connections.

=item B<--no-symlink>

Don't rename nor repack upstream tarball.

=item B<--timeout> I<N>

Set timeout to I<N> seconds (default 20 seconds).

=item B<--user-agent>, B<--useragent>

Override the default user agent header.

=item B<--help>

Give brief usage information.

=item B<--version>

Display version information.

=back

B<uscan> also accepts following options and passes them to B<mk-origtargz>:

=over

=item B<--symlink>

Make B<orig.tar.gz> (with the appropriate extension) symlink to the downloaded
files. (This is the default behavior.)

=item B<--copy>

Instead of symlinking as described above, copy the downloaded files.

=item B<--rename>

Instead of symlinking as described above, rename the downloaded files.

=item B<--repack>

After having downloaded an lzma tar, xz tar, bzip tar, gz tar, zip, jar, xpi,
zstd archive, repack it to the specified compression (see B<--compression>).

The unzip package must be installed in order to repack zip and jar archives,
the mozilla-devscripts package must be installed to repack xpi archives,
the xz-utils package must be installed to repack lzma or xz tar archives, and
zstd must be installed to repack zstd archives.

=item B<--compression> [ B<gzip> | B<bzip2> | B<lzma> | B<xz> ]

In the case where the upstream sources are repacked (either because B<--repack>
option is given or F<debian/copyright> contains the field B<Files-Excluded>),
it is possible to control the compression method via the parameter.  The
default is B<gzip> for normal tarballs, and B<xz> for tarballs generated
directly from the git repository.

=item B<--copyright-file> I<copyright-file>

Exclude files mentioned in B<Files-Excluded> in the given I<copyright-file>.
This is useful when running B<uscan> not within a source package directory.

=back

=head1 DEVSCRIPT CONFIGURATION VARIABLES

For the basic usage, B<uscan> does not require to set these configuration
variables.

The two configuration files F</etc/devscripts.conf> and F<~/.devscripts> are
sourced by a shell in that order to set configuration variables. These
may be overridden by command line options. Environment variable settings are
ignored for this purpose. If the first command line option given is
B<--noconf>, then these files will not be read. The currently recognized
variables are:

=over

=item B<USCAN_DOWNLOAD>

Download or report only:

=over

=item B<no>: equivalent to B<--no-download>, newer upstream files will
not be downloaded.

=item B<yes>: equivalent to B<--download>, newer upstream files will
be downloaded. This is the default behavior.

See also B<--force-download> and B<--overwrite-download>.

=back

=item B<USCAN_SAFE>

If this is set to B<yes>, then B<uscan> avoids running unsafe scripts by
skipping both the repacking of the downloaded package and the updating of the
new source tree; this is equivalent to the B<--safe> options; this also sets
the default to B<--no-download> and B<--skip-signature>.

=item B<USCAN_PASV>

If this is set to yes or no, this will force FTP connections to use PASV mode
or not to, respectively. If this is set to default, then B<Net::FTP(3)> makes
the choice (primarily based on the B<FTP_PASSIVE> environment variable).

=item B<USCAN_TIMEOUT>

If set to a number I<N>, then set the timeout to I<N> seconds. This is
equivalent to the B<--timeout> option.

=item B<USCAN_SYMLINK>

If this is set to no, then a I<pkg>_I<version>B<.orig.tar.{gz|bz2|lzma|xz}>
symlink will not be made (equivalent to the B<--no-symlink> option). If it is
set to B<yes> or B<symlink>, then the symlinks will be made. If it is set to
B<rename>, then the files are renamed (equivalent to the B<--rename> option).

=item B<USCAN_DEHS_OUTPUT>

If this is set to B<yes>, then DEHS-style output will be used. This is
equivalent to the B<--dehs> option.

=item B<USCAN_VERBOSE>

If this is set to B<yes>, then verbose output will be given.  This is
equivalent to the B<--verbose> option.

=item B<USCAN_USER_AGENT>

If set, the specified user agent string will be used in place of the default.
This is equivalent to the B<--user-agent> option.

=item B<USCAN_DESTDIR>

If set, the downloaded files will be placed in this  directory.  This is
equivalent to the B<--destdir> option.

=item B<USCAN_REPACK>

If this is set to yes, then after having downloaded a bzip tar, lzma tar, xz
tar, zip or zstd archive, uscan will repack it to the specified compression
(see B<--compression>). This is equivalent to the B<--repack> option.

=item B<USCAN_EXCLUSION>

If this is set to no, files mentioned in the field B<Files-Excluded> of
F<debian/copyright> will be ignored and no exclusion of files will be tried.
This is equivalent to the B<--no-exclusion> option.

=item B<USCAN_HTTP_HEADER>

If set, the specified http header will be used if URL match. This is equivalent
to B<--http-header> option.

=back

=head1 EXIT STATUS

The exit status gives some indication of whether a newer version was found or
not; one is advised to read the output to determine exactly what happened and
whether there were any warnings to be noted.

=over

=item B<0>

Either B<--help> or B<--version> was used, or for some F<watch> file which was
examined, a newer upstream version was located.

=item B<1>

No newer upstream versions were located for any of the F<watch> files examined.

=back

=head1 ADVANCED FEATURES

B<uscan> has many other enhanced features which are skipped in the above
section for the simplicity.  Let's check their highlights.

B<uscan> can be executed with I<path> as its argument to change the starting
directory of search from the current directory to I<path> .

If you are not sure what exactly is happening behind the scene, please enable
the B<--verbose> option.  If this is not enough, enable the B<--debug> option
too see all the internal activities.

See L<COMMANDLINE OPTIONS> and L<DEVSCRIPT CONFIGURATION VARIABLES> for other
variations.

=head2 Custom script

The optional I<script> parameter in F<debian/watch> means to execute I<script>
with options after processing this line if specified.

See L<HISTORY AND UPGRADING> for how B<uscan> invokes the custom I<script>.

For compatibility with other tools such as B<git-buildpackage>, it may not be
wise to create custom scripts with random behavior.  In general, B<uupdate> is
the best choice for the non-native package and custom scripts, if created,
should behave as if B<uupdate>.  For possible use case, see
L<http://bugs.debian.org/748474> as an example.

=head2 URL diversion

Some popular web sites changed their web page structure causing maintenance
problems to the watch file.  There are some redirection services created to
ease maintenance of the watch file.  Currently, B<uscan> makes automatic
diversion of URL requests to the following URLs to cope with this situation.

=over

=item * L<http://sf.net>

=item * L<http://pypi.python.org>

=back

=head2 Directory name checking

Similarly to several other scripts in the B<devscripts> package, B<uscan>
explores the requested directory trees looking for F<debian/changelog> and
F<debian/watch> files. As a safeguard against stray files causing potential
problems, and in order to promote efficiency, it will examine the name of the
parent directory once it finds the F<debian/changelog> file, and check that the
directory name corresponds to the package name. It will only attempt to
download newer versions of the package and then perform any requested action if
the directory name matches the package name. Precisely how it does this is
controlled by two configuration file variables
B<DEVSCRIPTS_CHECK_DIRNAME_LEVEL> and B<DEVSCRIPTS_CHECK_DIRNAME_REGEX>, and
their corresponding command-line options B<--check-dirname-level> and
B<--check-dirname-regex>.

B<DEVSCRIPTS_CHECK_DIRNAME_LEVEL> can take the following values:

=over

=item B<0>

Never check the directory name.

=item B<1>

Only check the directory name if we have had to change directory in
our search for F<debian/changelog>, that is, the directory containing
F<debian/changelog> is not the directory from which B<uscan> was invoked.
This is the default behavior.

=item B<2>

Always check the directory name.

=back

The directory name is checked by testing whether the current directory name (as
determined by pwd(1)) matches the regex given by the configuration file
option B<DEVSCRIPTS_CHECK_DIRNAME_REGEX> or by the command line option
B<--check-dirname-regex> I<regex>. Here regex is a Perl regex (see
perlre(3perl)), which will be anchored at the beginning and the end. If regex
contains a B</>, then it must match the full directory path. If not, then
it must match the full directory name. If regex contains the string I<package>,
this will be replaced by the source package name, as determined from the
F<debian/changelog>. The default value for the regex is: I<package>B<(-.+)?>, thus matching
directory names such as I<package> and I<package>-I<version>.

=head1 HISTORY AND UPGRADING

This section briefly describes the backwards-incompatible F<watch> file features
which have been added in each F<watch> file version, and the first version of the
B<devscripts> package which understood them.

=over

=item Pre-version 2

The F<watch> file syntax was significantly different in those days. Don't use it.
If you are upgrading from a pre-version 2 F<watch> file, you are advised to read
this manpage and to start from scratch.

=item Version 2

B<devscripts> version 2.6.90: The first incarnation of the current style of
F<watch> files. This version is also deprecated and will be rejected after
the Debian 11 release.

=item Version 3

B<devscripts> version 2.8.12: Introduced the following: correct handling of
regex special characters in the path part, directory/path pattern matching,
version number in several parts, version number mangling. Later versions
have also introduced URL mangling.

If you are upgrading from version 2, the key incompatibility is if you have
multiple groups in the pattern part; whereas only the first one would be used
in version 2, they will all be used in version 3. To avoid this behavior,
change the non-version-number groups to be B<(?:> I< ...> B<)> instead of a
plain B<(> I< ... > B<)> group.

=over

=item * B<uscan> invokes the custom I<script> as "I<script> B<--upstream-version>
I<version> B<../>I<spkg>B<_>I<version>B<.orig.tar.gz>".

=item * B<uscan> invokes the standard B<uupdate> as "B<uupdate> B<--no-symlink
--upstream-version> I<version> B<../>I<spkg>B<_>I<version>B<.orig.tar.gz>".

=back

=item Version 4

B<devscripts> version 2.15.10: The first incarnation of F<watch> files
supporting multiple upstream tarballs.

The syntax of the watch file is relaxed to allow more spaces for readability.

If you have a custom script in place of B<uupdate>, you may also encounter
problems updating from Version 3.

=over

=item * B<uscan> invokes the custom I<script> as "I<script> B<--upstream-version>
I<version>".

=item * B<uscan> invokes the standard B<uupdate> as "B<uupdate> B<--find>
B<--upstream-version> I<version>".

=back

Restriction for B<--dehs> is lifted by redirecting other output to STDERR when
it is activated.

=back

=head1 SEE ALSO

dpkg(1), mk-origtargz(1), perlre(1), uupdate(1), devscripts.conf(5)

=head1 AUTHOR

The original version of uscan was written by Christoph Lameter
<clameter@debian.org>. Significant improvements, changes and bugfixes were
made by Julian Gilbey <jdg@debian.org>. HTTP support was added by Piotr
Roszatycki <dexter@debian.org>. The program was rewritten in Perl by Julian
Gilbey. Xavier Guimard converted it in object-oriented Perl using L<Moo>.

=cut

#######################################################################
# }}} code 0: POD for manpage
#######################################################################
#######################################################################
# {{{ code 1: initializer, command parser, and loop over watchfiles
#######################################################################

# This code block is the start up of uscan.
# Actual processing is performed by process_watchfile in the next block
#
# This has 3 different modes to process watchfiles
#
#  * If $opt_watchfile and $opt_package are defined, test specified watchfile
#    without changelog (sanity check for $opt_uversion may be good idea)
#  * If $opt_watchfile is defined but $opt_package isn't defined, test specified
#    watchfile assuming you are in source tree and debian/changelogis used to
#    set variables
#  * If $opt_watchfile isn't defined, scan subdirectories of directories
#    specified as ARGS (if none specified, "." is scanned).
#    * Normal packaging has no ARGS and uses "."
#    * Archive status scanning tool uses many ARGS pointing to the expanded
#      source tree to be checked.
# Comments below focus on Normal packaging case and sometimes ignores first 2
# watch file testing setup.

use 5.010;    # defined-or (//)
use strict;
use warnings;
use Cwd qw/cwd/;
use Devscripts::Uscan::Config;
use Devscripts::Uscan::FindFiles;
use Devscripts::Uscan::Output;
use Devscripts::Uscan::WatchFile;

our $uscan_version = "2.22.1ubuntu1";

BEGIN {
    pop @INC if $INC[-1] eq '.';
}

my $config = Devscripts::Uscan::Config->new->parse;

uscan_verbose "$progname (version $uscan_version) See $progname(1) for help";
if ($dehs) {
    uscan_verbose "The --dehs option enabled.\n"
      . "        STDOUT = XML output for use by other programs\n"
      . "        STDERR = plain text output for human\n"
      . "        Use the redirection of STDOUT to a file to get the clean XML data";
}

# Did we find any new upstream versions on our wanderings?
my $res = 0;

my @wf = find_watch_files($config);
foreach (@wf) {
    my $tmp = process_watchfile(@$_);
    $res ||= $tmp;

    # Are there any warnings to give if we're using dehs?
    dehs_output if ($dehs);
}

uscan_verbose "Scan finished";

# Are there any warnings to give if we're using dehs?
$dehs_end_output = 1;
dehs_output if ($dehs);

exit($res ? $res : $found ? 0 : 1);

#######################################################################
# {{{ code 2: process watchfile by looping over watchline
#######################################################################

sub process_watchfile {
    my ($pkg_dir, $package, $version, $watchfile) = @_;
    my $opwd = cwd();
    chdir $pkg_dir;

    my $wf = Devscripts::Uscan::WatchFile->new({
        config      => $config,
        package     => $package,
        pkg_dir     => $pkg_dir,
        pkg_version => $version,
        watchfile   => $watchfile,
    });
    return $wf->status if ($wf->status);

    my $res = $wf->process_lines;
    chdir $opwd;
    return $res;
}
#######################################################################
# }}} code 2: process watchfile by looping over watchline
#######################################################################