#!/usr/bin/perl

# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: 2020-2025 Collabora Ltd.
# SPDX-FileCopyrightText: 2020-2025 Walter Lozano <walter.lozano@collabora.com>
# SPDX-FileCopyrightText: 2022 Andre Moreira Magalhaes <andre.magalhaes@collabora.com>
# SPDX-FileCopyrightText: 2022-2024 Ryan Gonzalez <ryan.gonzalez@collabora.com>
# SPDX-FileCopyrightText: 2025 Dylan Aïssi <dylan.aissi@collabora.com>

=head1 NAME

dh_setup_copyright - extract source file names from DWARF debug information in ELF binaries via dwarf2sources

=cut

use strict;
use warnings;
use Digest::SHA;
use File::Basename;
use File::Copy;
use File::Find;
use File::Path;
use File::Spec;
use JSON::PP;
use Debian::Copyright;
use Debian::Debhelper::Dh_Lib;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use List::MoreUtils 'uniq';
use Text::Glob;

our $VERSION = DH_BUILTIN_VERSION;

=head1 SYNOPSIS

B<dh_setup_copyright> [S<I<debhelper options>>] [B<-X>I<item>] [B<--copyrightfile=>I<file>] [B<--blacklist=>I<packages>] [B<--buildsuffix=>I<regex>] [S<B<--> I<params>>]

=head1 DESCRIPTION

B<dh_setup_copyright> is a debhelper program that generates the list for source
file names used to build every binary and pulls the licenses from any sources
part of other packages. The source file list is extracted from DWARF debug
information by running L<dwarf2sources(1)> on every ELF binaries in the package
and saving the output to /usr/share/doc/<package>/<binary>_bin2sources_<arch>.json
and /usr/share/doc/<package>/<binary>_metadata_<arch>.json.

/usr/share/doc/<package>/<binary>_bin2sources_<arch>.json lists all source files
used to build a binary by extracting data from DWARF debug information.

/usr/share/doc/<package>/<binary>_metadata_<arch>.json lists all source files,
licenses, copyrights as well as the other source packages used during
build, as well as their versions.

=head1 OPTIONS

=over 4

=item B<-X>I<item>, B<--exclude=>I<item>

Exclude files that contain I<item> anywhere in their filename from being
extracted. You may use this option multiple times to build up a list of
things to exclude.

=item B<--copyrightfile=>I<file>

Path to the copyright file, usually debian/copyright.

=item B<--blacklist=>I<packages>

List of binary packages to ignore. These packages won't be processed.

=item B<--buildsuffix=>I<regex>

Build suffix to strip off of package versions.

=back

=head1 NOTES

If the B<DEB_BUILD_OPTIONS> environment variable contains B<nodwarf2sources>,
this procedure is skipped.

=cut

init(options => {
	"copyrightfile=s" => \$dh{COPYRIGHTFILE},
	"blacklist=s" => \$dh{BLACKLIST},
	"buildsuffix=s" => \$dh{BUILDSUFFIX},
});

# This variable can be used to turn off file name extraction.
exit 0 if get_buildoption('nodwarf2sources');

use constant CURRENT_PACKAGE => "<CURRENT_PACKAGE>";

# Drop mapping of external sources to packages
my $DROP_EXTERNAL_SOURCES_MAP = 1;
# Drop source list, we need to keep this for statically linked packages
# as this mapping will help us to find the copyright of the source files
my $DROP_SOURCES = 0;
# Force the run of the script if output is detected (development)
my $FORCE_PROCESS = 0;
# Skip copyright inclusion for documents
my $SKIP_DOC = 1;
# Skip dwarf2sources extraction (development)
my $SKIP_EXTRACT = 0;
# Rootfs used for testing (development)
my $TEST_ROOTFS = "";

my $NO_INFO_FOUND = "NoInfoFound";

my @BLACKLIST = ();
if (defined($dh{BLACKLIST})) {
	@BLACKLIST = $dh{BLACKLIST};
}

my %external_files_to_packages_cache = ();
my %packages_to_srcpackages_cache = ();
my %metadata_cache = ();
my %copyright_cache = ();
my %copyright_report_cache = ();

my $DEB_COPYRIGHT = 'debian/copyright';
if (defined($dh{COPYRIGHTFILE})) {
	$DEB_COPYRIGHT = $dh{COPYRIGHTFILE};
}

my $BUILD_SUFFIX_RE = "";
if (defined($dh{BUILDSUFFIX})) {
	$BUILD_SUFFIX_RE = $dh{BUILDSUFFIX};
} elsif (defined &Debian::Debhelper::Dh_Lib::BUILD_SUFFIX_RE) {
	$BUILD_SUFFIX_RE = Debian::Debhelper::Dh_Lib::BUILD_SUFFIX_RE();
}

my $canonical_json = JSON::PP->new->utf8->canonical;

my @find_results = ();
my %find_exclude_stat = ();

sub find_is_excluded {
	my ($fn) = @_;
	return 1 if excludefile($fn);

	my ($dev, $ino) = stat $fn;
	return defined $find_exclude_stat{$dev,$ino};
}

sub find_test {
	my $fn = $_;
	return if -l $fn; # Always skip symlinks.

	# See if we were asked to exclude this file.
	# Note that we have to test on the full filename, including directory.
	if (find_is_excluded($fn)) {
		$File::Find::prune = 1 if -d _;
		return;
	}
	return if -d _;
	push(@find_results, $fn);
	return;
}

# Wrapper over `find` that avoids callers needing to interact with any globals
# and adds the ability to exclude extra locations.
sub find_local {
	my ($dir, $options) = @_;

	@find_results = ();
	%find_exclude_stat = ();
	if (defined $options) {
		my $exclude = $options->{'exclude'} || [];
		foreach my $path (@$exclude) {
			my ($dev, $ino) = stat $path;
			$find_exclude_stat{$dev, $ino} = 1;
		}
	}
	find({
		wanted => \&find_test,
		no_chdir => 1,
	}, $dir);

	return @find_results;
}

sub open_metadata {
	my ($fname_metadata) = @_;

	if (!exists $metadata_cache{$fname_metadata}) {
		my $metadata;

		open my $metadata_handle, "<", "$fname_metadata";
		if (!defined $metadata_handle || !fileno $metadata_handle){
			print STDERR "Failed to open $fname_metadata\n";
			$metadata = undef;
		}
		else {
			my $metadata_data = do { local $/; <$metadata_handle> };
			close $metadata_handle;
			$metadata = decode_json $metadata_data;
		}
		$metadata_cache{$fname_metadata} = $metadata;
	}

	return $metadata_cache{$fname_metadata};
}

sub extract_debug_info {
	my ($fname_list, $tmpfolder, @binaries) = @_;
	# In order to have the list of file names as will be find in the rootfs
	# generate the file name relative to tmpdir($package), execute the command
	# in chdir and save the output to $tmp_fname_list file
	doit_noerror({chdir => $tmpfolder}, "rm", "-f", $fname_list);
	print "Extracting debug information to $fname_list from @binaries in tmp folder $tmpfolder\n";
	doit_noerror({chdir => $tmpfolder}, "dwarf2sources", "-o", $fname_list, @binaries);
}

sub collect_sources_by_basename {
	my @sources = find_local('.', { 'exclude' => ['debian'] });
	my %sources_by_basename = ();
	foreach my $source (@sources) {
		# Clean up stuff like `./`.
		$source = File::Spec->canonpath($source);

		my $with_basename = $sources_by_basename{basename $source} ||= [];
		push @$with_basename, $source;
	}

	\%sources_by_basename
}

# Returns the number of path components shared between the tails of the two
# arguments.
#
# common_path_suffix_length('a/b/c', 'b/c') # => 2 (for 'b/c')
sub common_path_suffix_length {
	my ($a, $b) = @_;
	my @parts_a = reverse split '/', $a;
	my @parts_b = reverse split '/', $b;

	my $min_len = @parts_a < @parts_b ? @parts_a : @parts_b;
	my $i = 0;
	for (; $i < $min_len; $i++) {
		last if $parts_a[$i] ne $parts_b[$i];
	}
	return $i;
}

# Function to tweak paths from comp units according to
# the way different compilers generate them
sub tweak_path {
	my ($path) = @_;

	# rust-coreutils uses a different place for the registry
	# making it difficult to find the source package based on the path
	# so replace the file path with the expected one
	$path =~ s!^.*/debian/cargo_registry/!/usr/share/cargo/registry/!g;

	return $path;
}

sub check_possible_path {
	my ($comp_dir, $comp_name, $possible_path) = @_;
	my $comp_path;
	if ($possible_path ne "" &&
		($possible_path =~ /$comp_name/ || $comp_name =~ /$possible_path/)){
		return $possible_path;
	}
	if ($comp_name =~ '^/') {
		$comp_path = File::Spec->canonpath($comp_name);
	}
	else{
		$comp_path = File::Spec->canonpath(File::Spec->catfile($comp_dir, $comp_name));
	}

	$comp_path = tweak_path $comp_path;

	return $comp_path;
}

sub find_source_path {
	my ($comp_dir, $comp_name, $sources_by_basename,) = @_;
	my $current_dir = Cwd::cwd();
	my $possible_paths = $sources_by_basename->{basename $comp_name};
	if (!defined $possible_paths){
		return check_possible_path($comp_dir, $comp_name, "");
	}
	if (scalar @$possible_paths == 1){
		my $possible_path = @$possible_paths[0];
		return check_possible_path($comp_dir, $comp_name, $possible_path);
	}
	else{
		my @matches = map {
			my $score = common_path_suffix_length(File::Spec->catfile($comp_dir, $comp_name), $_);
			[$score, $_]
		} @$possible_paths;
		@matches = sort { $b->[0] <=> $a->[0] } @matches;
		foreach my $match (@matches) {
			my $possible_path = $match->[1];
			return check_possible_path($comp_dir, $comp_name, $possible_path);
		}
	}
}

sub sort_and_uniq {
	my ($list) = @_;

	@{$list} = sort(uniq(@{$list}));
}

# Function to parse the information generated from dwarf2sources
# and based on it generate medatada information needed to generate SBOMs
sub process_debug_info {
	my ($fname_list, $sources_by_basename, $metadata) = @_;

	print "Processing debug information\n";

	if (! -e $fname_list) {
		print STDERR "WARNING: $fname_list is missing, dwarf2sources extraction failed\n";
		return;
	}

	open my $sources_handle, '<', $fname_list or die "Failed to open $fname_list: $!";
	my $sources_data = do { local $/; <$sources_handle> };
	close $sources_handle;

	my $sources = decode_json $sources_data;
	my $sources_changed = 0;

	my %processed_units;

	while (my ($file, $info) = each %$sources) {
		my @source_list = ();
		foreach my $unit (@{$info->{'units'}}) {
			if (($unit->{'comp_name'} =~ s|/@/.*||) && !$sources_changed) {
				$sources_changed = 1;
				print "Stripping trailing '/@/...' from Rust compilation units\n";
			}

			my $comp_dir = $unit->{'comp_dir'};
			my $comp_name = $unit->{'comp_name'};
			my $key = $comp_dir . $comp_name;

			next if exists($processed_units{$key});

			$processed_units{$key} = 1;

			my $source_path = find_source_path $comp_dir, $comp_name, $sources_by_basename;

			next if -d $source_path;

			push(@source_list, $source_path);
		};
		foreach my $source_path (@{$info->{'external_files'}}) {
			$source_path = File::Spec->canonpath($source_path);
			next if -d $source_path;
			push @source_list, tweak_path($source_path);
		}
		sort_and_uniq(\@source_list);
		$metadata->{"contents"}->{$file} = {};
		$metadata->{"contents"}->{$file}{'sources'} = \@source_list;
	}
}

sub find_package_providing_path {
	my ($path) = @_;

	use constant PATH_HAS_NO_PACKAGE => '';

	my $package = $external_files_to_packages_cache{$path};
	if (defined $package) {
		return $package eq PATH_HAS_NO_PACKAGE ? undef : $package;
	}

	# Skip non-existent files, since dpkg-query only works on installed packages
	# anyway.
	if (-e $path) {
		($package) = `dpkg-query -S "$path" 2>/dev/null` =~ /^([^\s:]+):(?:[^\s:]+:)? /;
	}
	if (!defined $package) {
		$external_files_to_packages_cache{$path} = PATH_HAS_NO_PACKAGE;
		print STDERR "WARNING: $path was not found in any package\n";
		return undef;
	}

	# Most likely there will be more files from this package, so cache its
	# contents for lookup later.
	foreach my $pkgfile (split "\n", `dpkg -L "$package"`) {
		# Exclude directories to avoid filling up the cache with directories
		# like '/' that will be in multiple packages and thus trip up the
		# checks below.
		next if -d $pkgfile;

		my $existing = $external_files_to_packages_cache{$pkgfile};
		if (defined $existing) {
			print STDERR "WARNING: $pkgfile is in multiple packages:",
				" '$existing' and '$package'\n";
		} else {
			$external_files_to_packages_cache{$pkgfile} = $package;
		}
	}

	$package;
}

sub find_source_package_for_package {
	my ($package) = @_;

	my $cached = $packages_to_srcpackages_cache{$package};
	return @$cached if defined $cached;

	my ($version, $srcpackage, $srcversion) =
		`dpkg-query -f '\${Version} \${Source}' -W '$package'`
		# Note that the source version *might* be in the Source field, so make
		# sure to extract it if so.
		=~ /(\S+) (\S+)(?: \((.*)\))?/;
	$srcpackage ||= $package;
	$srcversion ||= $version;
	return undef if !defined $srcversion;

	if (!length $BUILD_SUFFIX_RE) {
		# Strip off the build suffix.
		$srcversion =~ s/$BUILD_SUFFIX_RE//;
	}

	$packages_to_srcpackages_cache{$package} = [$srcpackage, $srcversion];
	($srcpackage, $srcversion);
}

sub add_referenced_srcpackage {
	my ($metadata, $srcpackage, $srcversion, $origin) = @_;

	my $referenced_source_packages = $metadata->{'referenced_source_packages'};
	my $dest = $referenced_source_packages->{$srcpackage};
	if (defined $dest) {
		return 0 if grep /^$origin$/, @{$dest->{'origins'}};
		push @{$dest->{'origins'}}, $origin;
	} else {
		$referenced_source_packages->{$srcpackage} = {
			'version' => $srcversion,
			'origins' => [$origin],
		};
	}

	1;
}

sub process_external_sources {
	my ($metadata, $arch) = @_;

	print "Processing external sources\n";

	my $contents = $metadata->{'contents'};
	my $external_sources_to_packages = $metadata->{'external_sources_to_packages'};
	my $added_referenced_srcpackages = 0;
	my %processed_copyrights = ();

	while (my ($artifact, $data) = each %$contents) {
		my %artifact_license = ();
		my %artifact_copyright = ();
		foreach my $source_file (@{$data->{"sources"}}) {
			next if (is_local_file($source_file));
			next if defined $external_sources_to_packages->{$source_file};
			my $package = find_package_providing_path $source_file;
			unless (defined $package) {
				print STDERR "WARNING: unable to find package providing $source_file\n";
				next;
			}
			$external_sources_to_packages->{$source_file} = $package;
			my ($srcpackage, $srcversion) = find_source_package_for_package $package;
			unless (defined $srcpackage) {
				print STDERR "WARNING: unable to find source package for $package\n";
				next;
			}
			$added_referenced_srcpackages += add_referenced_srcpackage $metadata,
				$srcpackage, $srcversion, 'external_files';
		}
	}

	if (%$external_sources_to_packages) {
		my $count = scalar %$external_sources_to_packages;
		print "Adding metadata for $count external source file(s)\n";
	}
	if ($added_referenced_srcpackages) {
		print "Adding metadata for $added_referenced_srcpackages source package(s)",
			" from external_files\n";
	}
}

sub scan_binary_shlibs {
	my ($tmpdir, $binaries, $metadata) = @_;

	print "Scanning shared libraries\n";

	my $added_referenced_srcpackages = 0;

	foreach my $binary (@$binaries) {
		foreach my $shlib (`LD_PRELOAD= ldd "$tmpdir/$binary"` =~ /=> (\/\S+)/g) {
			# Ignore shlibs part of this package.
			next if -e "$tmpdir/$shlib";

			my $package = find_package_providing_path $shlib;
			next if !defined $package;

			my ($srcpackage, $srcversion) = find_source_package_for_package $package;
			next if !defined $srcpackage;

			$added_referenced_srcpackages += add_referenced_srcpackage $metadata,
				  $srcpackage, $srcversion, 'shared_libraries';
		}
	}

	if ($added_referenced_srcpackages) {
		print "Adding metadata for $added_referenced_srcpackages source package(s)",
			" from shared library scanning\n";
	}
}

sub build_copied_sources_map {
	my ($tmpdir, $others, $sources_by_basename, $metadata) = @_;

	print "Building copied source map\n";

	my $contents = $metadata->{'contents'};
	my $count = 0;

	foreach my $file (@$others) {
		my $unscored_matches = $sources_by_basename->{basename $file};
		next if !defined $unscored_matches;

		# Assign a "score" to each matching file based on the number of path
		# components in common, so that we'll be more likely to match e.g.
		# "/usr/include/test" as "src/include/test" than "examples/test".
		my @matches = map {
			my $score = common_path_suffix_length $file, $_;
			[$score, $_]
		} @$unscored_matches;

		my $file_sha256 = Digest::SHA->new('256')->addfile("$tmpdir/$file", 'b')->hexdigest;
		# Skip if...
		next if (
			# ...the file was empty...
			$file_sha256 eq "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
			# ...or a single blank line.
			|| $file_sha256 eq "01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b"
		);

		# b is on the left, so this will sort in the reverse / descending direction,
		# so higher-scored files will come first.
		@matches = sort { $b->[0] <=> $a->[0] } @matches;

		foreach my $match (@matches) {
			my $source = $match->[1];
			my $source_sha256 = Digest::SHA->new('256')->addfile($source, 'b')->hexdigest;
			if ($file_sha256 eq $source_sha256) {
				$contents->{$file} = {};
				$contents->{$file}{"sources"} = [$source];
				$count += 1;
			}
		}
	}

	if ($count > 0) {
		print "Adding metadata for $count copied source(s)\n";
	}
}


sub is_local_file {
	my ($source) = @_;
	return 1 if ($source !~ "^/");
	return 0;
}

sub get_external_source {
	my ($source, $metadata, $arch) = @_;

	if ($source !~ "^/") {
		return (undef, $source);
	}

	my $package = $metadata->{"external_sources_to_packages"}->{$source};
	if (!defined($package)){
		print STDERR "WARNING: No package information found for external source $source\n";
		return (undef, undef);
	}

	my $folder = "${TEST_ROOTFS}/usr/share/doc/${package}";
	my $dependency_metadata = "${folder}/${package}_metadata_${arch}.json";

	$dependency_metadata = open_metadata $dependency_metadata;

	if (!defined($dependency_metadata)) {
		return ($package, undef);
	}

	my $pkg_source_file;
	my $result = eval {
		$pkg_source_file = $dependency_metadata->{"contents"}{substr($source, 1)}[0];
	};

	if ($@){
		print STDERR "WARNING: No source file found for $source in package $package\n";
		return ($package, undef);
	}

	return ($package, $pkg_source_file);
}

sub get_copyright_report_files {
	my ($package) = @_;
	my $folder = "${TEST_ROOTFS}/usr/share/doc/${package}";
	my $copyright_report = "${folder}/copyright_report";
	my $copyright_report_gz = "${copyright_report}.gz";
	my $copyrights_report_tmp = "/tmp/${package}_copyright_report";
	my $copyright_fname;
	my $copyright;
	my $copyright_report_files;

	if (!exists($copyright_report_cache{$package})) {
		if ($package eq CURRENT_PACKAGE) {
			$copyright_fname = $DEB_COPYRIGHT;
		}
		elsif (-e $copyright_report) {
			$copyright_fname = $copyright_report;
		}
		elsif (-e $copyright_report_gz) {
			gunzip $copyright_report_gz, $copyrights_report_tmp
				or die "gunzip failed: $GunzipError\n";
			$copyright_fname = $copyrights_report_tmp;
		}
		else {
			print STDERR "WARNING: No report was found for $package\n";
			$copyright_report_cache{$package} = undef;
		}

		if (defined $copyright_fname) {
			$copyright = Debian::Copyright->new();
			$copyright->read($copyright_fname);
			$copyright_report_files = $copyright->files;
			$copyright_report_cache{$package} = $copyright_report_files;
		}
	}

	return $copyright_report_cache{$package};
}

sub parse_copyright_line {
	my ($copyright_line) = @_;
	my @copyright = split /\n/, $copyright_line;
	for my $i (0 .. $#copyright){
		$copyright[$i] =~ s/^\s+|\s+$//g
	}
	return @copyright;
}

sub find_copyright {
	my ($source, $copyright_report_files) = @_;

	# If there is an exact match in the hash use it
	# if not we need to traverse the whole hash in reverse
	# order since debian copyright files can have multiple
	# stanzas that matches, and the last one takes precedence
	my $fs = $copyright_report_files->FETCH($source);
	if (defined $fs) {
		my @license = ($fs->License);
		my @copyright = parse_copyright_line $fs->Copyright;
		return (\@license, \@copyright);
	}

	foreach $fs (reverse($copyright_report_files->Values)) {
		last if ($fs->Files eq "");
		# If there is a space, it means that this stanza has only one pattern
		# and if there are no wildcards, it means that it is an exact match
		# in which case the initial check for exact match should have caught it
		# this approach is faster than splitting and checking globs
		if (index($fs->Files, " ") != -1 && index($fs->Files, "*") == -1) {
			next;
		}
		my @glob_patterns = split / /, $fs->Files;
		for my $g (@glob_patterns){
			if (Text::Glob::match_glob $g, $source) {
				my @license = ($fs->License);
				my @copyright = parse_copyright_line $fs->Copyright;
				return (\@license, \@copyright);
			}
		}
	}

	return ([$NO_INFO_FOUND], [$NO_INFO_FOUND]);
}

sub get_copyright {
	my ($source, $metadata, $arch) = @_;
	my ($package, $real_source, $copyright_report_files);

	if (!exists($copyright_cache{$source})) {
		# Default case is local files, it is easy since we already have the data
		if (is_local_file $source) {
			$package = CURRENT_PACKAGE;
			$real_source = $source;
		}
		# If the file is not local it means that it came from a different package
		# we need to find the source file in the external package and its copyright
		else {
			($package, $real_source) = get_external_source $source, $metadata, $arch;
			if (!defined $package || !defined $real_source) {
				print STDERR "WARNING: Missing information for $source, skipping it\n";
				$copyright_cache{$source} = [[$NO_INFO_FOUND], [$NO_INFO_FOUND]];
				return ([$NO_INFO_FOUND], [$NO_INFO_FOUND]);
			}
		}
		$copyright_report_files = get_copyright_report_files $package;
		if (!defined($copyright_report_files)) {
			print STDERR "WARNING: No copyright report for package $package, skipping it\n";
			$copyright_cache{$source} = [[$NO_INFO_FOUND], [$NO_INFO_FOUND]];
			return ([$NO_INFO_FOUND], [$NO_INFO_FOUND]);
		}
		$Text::Glob::strict_wildcard_slash = 0;
		my ($license, $copyright) = find_copyright $real_source, $copyright_report_files;
		$Text::Glob::strict_wildcard_slash = 1;

		$copyright_cache{$source} = [$license, $copyright];
	}

	my ($license, $copyright) = @{$copyright_cache{$source}};
	return ($license, $copyright);

}

sub generate_copyright {
	my ($metadata, $arch) = @_;

	print "Generating copyright report\n";

	my $contents = $metadata->{'contents'};
	my $external_sources_to_packages = $metadata->{'external_sources_to_packages'};

	my %artifact_license = ();
	my %artifact_copyright = ();
	while (my ($artifact, $data) = each %$contents) {
		next if ($SKIP_DOC = 1 && $artifact =~ "^usr/share/doc/");
		foreach my $source_file (@{$data->{"sources"}}) {
			my ($license, $copyright) = get_copyright $source_file, $metadata, $arch;
			foreach my $l (@{$license}) {
				$artifact_license{$l} = 1;
			}
			foreach my $c (@{$copyright}) {
				$artifact_copyright{$c} = 1;
			}
		}
	}

	$metadata->{"license"} = [sort keys(%artifact_license)];
	$metadata->{"copyright"} = [sort keys(%artifact_copyright)];
}

if (grep { $_ eq sourcepackage() } @BLACKLIST) {
	print "Source package blacklisted, skipping it\n";
	exit 0;
}

my $sources_by_basename = collect_sources_by_basename;

for my $package (@{$dh{DOPACKAGES}}) {
	my $tmp = tmpdir($package);

	print "Processing package $package for metadata extraction\n";
	if (! -d $tmp) {
		print "Folder $tmp does not exist, skipping it\n";
		next;
	}

	my $fname_dir = "usr/share/doc/" . $package;
	if (-l "$tmp/$fname_dir") {
		my $target = readlink "$tmp/$fname_dir";
		my $package_doc = basename($target);
		$fname_dir = "usr/share/doc/$package_doc";
	}
	my $arch = package_binary_arch($package);
	my $fname_list = "$fname_dir/" . $package . "_bin2sources_" . $arch . ".json";
	my $fname_metadata = "$fname_dir/${package}_metadata_${arch}.json";

	if ($FORCE_PROCESS == 0){
		if (-e "$tmp/$fname_metadata") {
			print "File $tmp/$fname_metadata already exists, skipping\n";
			next;
		}
	}

	my @pkg_files = sort(find_local($tmp));

	my @binaries = ();
	my @others = ();
	foreach my $file (@pkg_files) {
		my $relative_file = substr($file, length($tmp) + 1);
		if (is_so_or_exec_elf_file($file)) {
			push(@binaries, $relative_file);
		} else {
			push(@others, $relative_file);
		}
	}

	if (!@binaries && !@others) {
		print "Package $package has no files to check, skipping\n";
		next;
	}

	if (not -e "$tmp/$fname_dir") {
		print "Folder $tmp/$fname_dir does not exists, creating it\n";
		mkpath("$tmp/$fname_dir");
	}

	my %metadata = (
		contents => {},
		external_sources_to_packages => {},
		referenced_source_packages => {},
		license => {},
		copyright => {},
	);

	if (@binaries) {
		if (!$SKIP_EXTRACT) {
			extract_debug_info($fname_list, $tmp, @binaries);
		}
		process_debug_info("$tmp/$fname_list", $sources_by_basename, \%metadata);
		process_external_sources(\%metadata, $arch);
		scan_binary_shlibs($tmp, \@binaries, \%metadata);
	}
	if (@others) {
		build_copied_sources_map($tmp, \@others, $sources_by_basename, \%metadata);
	}

	generate_copyright(\%metadata, $arch);

	if ($DROP_EXTERNAL_SOURCES_MAP) {
		delete $metadata{"external_sources_to_packages"};
	}

	if ($DROP_SOURCES) {
		my $contents = $metadata{"contents"};
		print "Deleting source information\n";
		foreach my $artifact (keys %$contents) {
			delete $metadata{"contents"}->{$artifact}{"sources"};
		}
	}

	if (%metadata) {
		open my $meta_handle, '>', "$tmp/$fname_metadata" or die "Failed to open $fname_metadata: $!";
		print $meta_handle $canonical_json->encode(\%metadata);
		close $meta_handle;
	}
}

=head1 SEE ALSO

L<debhelper(7)>

This program is a part of debhelper.

=head1 AUTHOR

Walter Lozano <walter.lozano@collabora.com>

=cut
