Coverage for britney2/inputs/suiteloader.py: 92%
258 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-04-18 20:48 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-04-18 20:48 +0000
1from abc import abstractmethod
2import apt_pkg
3import logging
4import os
5import sys
6from typing import Callable, Iterable, Optional
8from britney2 import SuiteClass, Suite, TargetSuite, Suites, BinaryPackage, BinaryPackageId, SourcePackage
9from britney2.utils import (
10 read_release_file, possibly_compressed, read_sources_file, create_provides_map, parse_provides, parse_builtusing
11)
14class MissingRequiredConfigurationError(RuntimeError):
15 pass
18class SuiteContentLoader(object):
20 def __init__(self, base_config):
21 self._base_config = base_config
22 self._architectures = SuiteContentLoader.config_str_as_list(base_config.architectures)
23 self._nobreakall_arches = SuiteContentLoader.config_str_as_list(base_config.nobreakall_arches, [])
24 self._outofsync_arches = SuiteContentLoader.config_str_as_list(base_config.outofsync_arches, [])
25 self._break_arches = SuiteContentLoader.config_str_as_list(base_config.break_arches, [])
26 self._new_arches = SuiteContentLoader.config_str_as_list(base_config.new_arches, [])
27 self._components = []
28 self._all_binaries = {}
29 logger_name = ".".join((self.__class__.__module__, self.__class__.__name__))
30 self.logger = logging.getLogger(logger_name)
32 @staticmethod
33 def config_str_as_list(value, default_value=None):
34 if value is None:
35 return default_value
36 if isinstance(value, str): 36 ↛ 38line 36 didn't jump to line 38, because the condition on line 36 was never false
37 return value.split()
38 return value
40 @property
41 def architectures(self):
42 return self._architectures
44 @property
45 def nobreakall_arches(self):
46 return self._nobreakall_arches
48 @property
49 def outofsync_arches(self):
50 return self._outofsync_arches
52 @property
53 def break_arches(self):
54 return self._break_arches
56 @property
57 def new_arches(self):
58 return self._new_arches
60 @property
61 def components(self):
62 return self._components
64 def all_binaries(self):
65 return self._all_binaries
67 @abstractmethod
68 def load_suites(self): # pragma: no cover
69 pass
72class DebMirrorLikeSuiteContentLoader(SuiteContentLoader):
74 CHECK_FIELDS = [
75 'source',
76 'source_version',
77 'architecture',
78 'multi_arch',
79 'depends',
80 'conflicts',
81 'provides',
82 ]
84 def load_suites(self):
85 suites = []
86 target_suite = None
87 missing_config_msg = "Configuration %s is not set in the config (and cannot be auto-detected)"
88 for suitename in ('testing', 'unstable', 'pu', 'tpu'):
89 suffix = suitename if suitename in {'pu', 'tpu'} else ''
90 if hasattr(self._base_config, suitename):
91 suite_path = getattr(self._base_config, suitename)
92 suite_class = SuiteClass.TARGET_SUITE
93 if suitename != 'testing':
94 suite_class = SuiteClass.ADDITIONAL_SOURCE_SUITE if suffix else SuiteClass.PRIMARY_SOURCE_SUITE
95 suites.append(Suite(suite_class, suitename, suite_path, suite_short_name=suffix))
96 else:
97 target_suite = TargetSuite(suite_class, suitename, suite_path, suite_short_name=suffix)
98 else:
99 if suitename in {'testing', 'unstable'}: # pragma: no cover
100 self.logger.error(missing_config_msg, suitename.upper())
101 raise MissingRequiredConfigurationError(missing_config_msg % suitename.upper())
102 self.logger.info("Optional suite %s is not defined (config option: %s) ", suitename, suitename.upper())
104 assert target_suite is not None
106 self._check_release_file(target_suite, missing_config_msg)
107 self._setup_architectures()
109 # read the source and binary packages for the involved distributions. Notes:
110 # - Load testing last as some live-data tests have more complete information in
111 # unstable
112 # - Load all sources before any of the binaries.
113 for suite in [target_suite, *suites]:
114 sources = self._read_sources(suite.path)
115 self._update_suite_name(suite)
116 suite.sources = sources
117 (suite.binaries, suite.provides_table) = self._read_binaries(suite, self._architectures)
118 self._fixup_faux_arch_all_binaries(suite)
120 return Suites(target_suite, suites)
122 def _fixup_faux_arch_all_binaries(self, suite: Suite) -> None:
123 """remove faux arch:all binary if a real arch:all binary is available
125 We don't know for which architectures bin/$something must be available
126 except for arch:all, which should be available in each arch. The
127 information that a source builds an arch:all binary is available during
128 the loading of the sources, but we have to pick an order in which to
129 load the files and the Sources is loaded before the Packages are
130 read. Hence we fake an arch:all binary during source loading, but it
131 shouldn't be there in the final list if real arch:all binaries are
132 present in the Packages file.
134 Also, if we keep the fake binary, it should be added to the lists of
135 known binaries in the suite, otherwise britney2 trips later on.
137 """
139 all_binaries = self._all_binaries
140 binaries = suite.binaries
141 faux_arches = (set(self.architectures) - set(self.break_arches) -
142 set(self.outofsync_arches) - set(self.new_arches))
144 for srcpkg in suite.sources.values():
145 faux = {x for x in srcpkg.binaries if x[2] == 'faux'}
146 if faux and [x for x in (srcpkg.binaries - faux)
147 if all_binaries[x].architecture == 'all']:
148 srcpkg.binaries -= faux
150 # Calculate again because we may have changed the set
151 faux = {x for x in srcpkg.binaries if x[2] == 'faux'}
152 for binpkg_id in faux:
153 bin_data = BinaryPackage(
154 binpkg_id[1],
155 sys.intern('faux'),
156 srcpkg.source,
157 srcpkg.version,
158 'all',
159 'no',
160 None,
161 None,
162 [],
163 False,
164 binpkg_id,
165 [],
166 )
167 for arch_all in faux_arches:
168 binaries[arch_all][binpkg_id[0]] = bin_data
169 all_binaries[binpkg_id] = bin_data
170 suite.binaries = binaries
172 def _setup_architectures(self):
173 allarches = self._architectures
174 # Re-order the architectures such as that the most important architectures are listed first
175 # (this is to make the log easier to read as most important architectures will be listed
176 # first)
177 arches = [x for x in allarches if x in self._nobreakall_arches]
178 arches += [x for x in allarches if x not in arches and x not in self._outofsync_arches]
179 arches += [x for x in allarches if x not in arches and x not in self._break_arches]
180 arches += [x for x in allarches if x not in arches and x not in self._new_arches]
181 arches += [x for x in allarches if x not in arches]
183 # Intern architectures for efficiency; items in this list will be used for lookups and
184 # building items/keys - by intern strings we reduce memory (considerably).
185 self._architectures = [sys.intern(arch) for arch in allarches]
186 assert 'all' not in self._architectures, "all not allowed in architectures"
188 def _get_suite_name(self, suite, release_file):
189 name = None
190 codename = None
191 if 'Suite' in release_file: 191 ↛ 193line 191 didn't jump to line 193, because the condition on line 191 was never false
192 name = release_file['Suite']
193 if 'Codename' in release_file:
194 codename = release_file['Codename']
196 if name is None: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true
197 name = codename
198 elif codename is None:
199 codename = name
201 if name is None: 201 ↛ 202line 201 didn't jump to line 202, because the condition on line 201 was never true
202 self.logger.warning('Either of the fields "Suite" or "Codename" ' +
203 'should be present in a release file.')
204 self.logger.error('Release file for suite %s is missing both the ' +
205 '"Suite" and the "Codename" fields.', suite.name)
206 raise KeyError('Suite')
208 return (name, codename)
210 def _update_suite_name(self, suite):
211 try:
212 release_file = read_release_file(suite.path)
213 except FileNotFoundError:
214 self.logger.info("The %s suite does not have a Release file, unable to update the name",
215 suite.name)
216 release_file = None
218 if release_file is not None:
219 (suite.name, suite.codename) = self._get_suite_name(
220 suite, release_file)
221 self.logger.info("Using suite name from Release file: %s", suite.name)
222 self.logger.debug("Using suite codename from Release file: %s", suite.codename)
224 def _check_release_file(self, target_suite, missing_config_msg):
225 try:
226 release_file = read_release_file(target_suite.path)
227 self.logger.info("Found a Release file in %s - using that for defaults", target_suite.name)
228 except FileNotFoundError:
229 self.logger.info("The %s suite does not have a Release file.", target_suite.name)
230 release_file = None
232 if release_file is not None:
233 self._components = release_file['Components'].split()
234 self.logger.info("Using components listed in Release file: %s", ' '.join(self._components))
236 if self._architectures is None:
237 if release_file is None: # pragma: no cover
238 self.logger.error("No configured architectures and there is no release file in the %s suite.",
239 target_suite.name)
240 self.logger.error("Please check if there is a \"Release\" file in %s",
241 target_suite.path)
242 self.logger.error("or if the config file contains a non-empty \"ARCHITECTURES\" field")
243 raise MissingRequiredConfigurationError(missing_config_msg % "ARCHITECTURES")
244 self._architectures = sorted(x for x in release_file['Architectures'].split() if x != 'all')
245 self.logger.info("Using architectures listed in Release file: %s", ' '.join(self._architectures))
247 def _read_sources(self, basedir: str) -> dict[str, SourcePackage]:
248 """Read the list of source packages from the specified directory
250 The source packages are read from the `Sources' file within the
251 directory specified as `basedir' parameter. Considering the
252 large amount of memory needed, not all the fields are loaded
253 in memory. The available fields are Version, Maintainer and Section.
255 The method returns a list where every item represents a source
256 package as a dictionary.
257 """
259 if self._components:
260 sources: dict[str, SourcePackage] = {}
261 for component in self._components:
262 filename = os.path.join(basedir, component, "source", "Sources")
263 try:
264 filename = possibly_compressed(filename)
265 except FileNotFoundError:
266 if component == "non-free-firmware":
267 self.logger.info("Skipping %s as it doesn't exist", filename)
268 continue
269 raise
270 self.logger.info("Loading source packages from %s", filename)
271 read_sources_file(filename, sources,
272 not self._base_config.archall_inconsistency_allowed)
273 else:
274 filename = os.path.join(basedir, "Sources")
275 self.logger.info("Loading source packages from %s", filename)
276 sources = read_sources_file(filename, None,
277 not self._base_config.archall_inconsistency_allowed)
279 return sources
281 @staticmethod
282 def merge_fields(get_field: Callable[[str], Optional[str]], *field_names: str,
283 separator: str = ', ') -> Optional[str]:
284 """Merge two or more fields (filtering out empty fields; returning None if all are empty)
285 """
286 return separator.join(filter(None, (get_field(x) for x in field_names))) or None
288 def _read_packages_file(self, filename: str, arch, srcdist, packages: Optional[dict[str, BinaryPackage]] = None,
289 intern=sys.intern) -> dict[str, BinaryPackage]:
290 self.logger.info("Loading binary packages from %s", filename)
292 if packages is None:
293 packages = {}
295 all_binaries = self._all_binaries
297 tag_file = apt_pkg.TagFile(filename)
298 get_field = tag_file.section.get
299 step = tag_file.step
301 while step():
302 pkg = get_field('Package')
303 version = get_field('Version')
305 # There may be multiple versions of any arch:all packages
306 # (in unstable) if some architectures have out-of-date
307 # binaries. We only ever consider the package with the
308 # largest version for migration.
309 pkg = intern(pkg)
310 version = intern(version)
311 pkg_id = BinaryPackageId(pkg, version, arch)
313 if pkg in packages:
314 old_pkg_data = packages[pkg]
315 if apt_pkg.version_compare(old_pkg_data.version, version) > 0:
316 continue
317 old_pkg_id = old_pkg_data.pkg_id
318 old_src_binaries = srcdist[old_pkg_data.source].binaries
319 old_src_binaries.remove(old_pkg_id)
320 # This may seem weird at first glance, but the current code rely
321 # on this behaviour to avoid issues like #709460. Admittedly it
322 # is a special case, but Britney will attempt to remove the
323 # arch:all packages without this. Even then, this particular
324 # stop-gap relies on the packages files being sorted by name
325 # and the version, so it is not particularly resilient.
326 if pkg_id not in old_src_binaries: 326 ↛ 332line 326 didn't jump to line 332, because the condition on line 326 was never false
327 old_src_binaries.add(pkg_id)
329 # Merge Pre-Depends with Depends and Conflicts with
330 # Breaks. Britney is not interested in the "finer
331 # semantic differences" of these fields anyway.
332 deps = DebMirrorLikeSuiteContentLoader.merge_fields(get_field, 'Pre-Depends', 'Depends')
333 conflicts = DebMirrorLikeSuiteContentLoader.merge_fields(get_field, 'Conflicts', 'Breaks')
335 ess = False
336 if get_field('Essential', 'no') == 'yes':
337 ess = True
339 source = pkg
340 source_version = version
341 # retrieve the name and the version of the source package
342 source_raw = get_field('Source')
343 if source_raw:
344 source = intern(source_raw.split(" ")[0])
345 if "(" in source_raw:
346 source_version = intern(source_raw[source_raw.find("(")+1:source_raw.find(")")])
348 provides_raw = get_field('Provides')
349 if provides_raw:
350 provides = parse_provides(provides_raw, pkg_id=pkg_id, logger=self.logger)
351 else:
352 provides = []
354 raw_arch = intern(get_field('Architecture'))
355 if raw_arch not in {'all', arch}: # pragma: no cover
356 raise AssertionError("%s has wrong architecture (%s) - should be either %s or all" % (
357 str(pkg_id), raw_arch, arch))
359 builtusing_raw = get_field('Built-Using')
360 if builtusing_raw:
361 builtusing = parse_builtusing(builtusing_raw, pkg_id=pkg_id, logger=self.logger)
362 else:
363 builtusing = []
365 dpkg = BinaryPackage(version,
366 intern(get_field('Section')),
367 source,
368 source_version,
369 raw_arch,
370 get_field('Multi-Arch'),
371 deps,
372 conflicts,
373 provides,
374 ess,
375 pkg_id,
376 builtusing,
377 )
379 # if the source package is available in the distribution, then register this binary package
380 if source in srcdist:
381 # There may be multiple versions of any arch:all packages
382 # (in unstable) if some architectures have out-of-date
383 # binaries. We only want to include the package in the
384 # source -> binary mapping once. It doesn't matter which
385 # of the versions we include as only the package name and
386 # architecture are recorded.
387 srcdist[source].binaries.add(pkg_id)
388 # if the source package doesn't exist, create a fake one
389 else:
390 srcdist[source] = SourcePackage(source, source_version, 'faux', {pkg_id}, None, True, None, None, [], [])
392 # add the resulting dictionary to the package list
393 packages[pkg] = dpkg
394 if pkg_id in all_binaries:
395 self._merge_pkg_entries(pkg, arch, all_binaries[pkg_id], dpkg)
396 else:
397 all_binaries[pkg_id] = dpkg
399 # add the resulting dictionary to the package list
400 packages[pkg] = dpkg
402 return packages
404 def _read_binaries(self, suite: Suite, architectures: Iterable[str]
405 ) -> tuple[dict[str, dict[str, BinaryPackage]], dict[str, dict[str, set[tuple[str, str]]]]]:
406 """Read the list of binary packages from the specified directory
408 This method reads all the binary packages for a given suite.
410 If the "components" config parameter is set, the directory should
411 be the "suite" directory of a local mirror (i.e. the one containing
412 the "Release" file). Otherwise, Britney will read the packages
413 information from all the "Packages_${arch}" files referenced by
414 the "architectures" parameter.
416 Considering the
417 large amount of memory needed, not all the fields are loaded
418 in memory. The available fields are Version, Source, Multi-Arch,
419 Depends, Conflicts, Provides and Architecture.
421 The `Provides' field is used to populate the virtual packages list.
423 The method returns a tuple of two dicts with architecture as key and
424 another dict as value. The value dicts of the first dict map
425 from binary package name to "BinaryPackage" objects; the other second
426 value dicts map a package name to the packages providing them.
427 """
428 binaries = {}
429 provides_table = {}
430 basedir = suite.path
432 if self._components:
433 release_file = read_release_file(basedir)
434 listed_archs = set(release_file['Architectures'].split())
435 for arch in architectures:
436 packages = {}
437 if arch not in listed_archs: 437 ↛ 438line 437 didn't jump to line 438, because the condition on line 437 was never true
438 self.logger.info("Skipping arch %s for %s: It is not listed in the Release file",
439 arch, suite.name)
440 binaries[arch] = {}
441 provides_table[arch] = {}
442 continue
443 for component in self._components:
444 binary_dir = "binary-%s" % arch
445 filename = os.path.join(basedir,
446 component,
447 binary_dir,
448 'Packages')
449 try:
450 filename = possibly_compressed(filename)
451 except FileNotFoundError:
452 if component == "non-free-firmware":
453 self.logger.info("Skipping %s as it doesn't exist", filename)
454 continue
455 raise
456 udeb_filename = os.path.join(basedir,
457 component,
458 "debian-installer",
459 binary_dir,
460 "Packages")
461 # We assume the udeb Packages file is present if the
462 # regular one is present
463 udeb_filename = possibly_compressed(udeb_filename)
464 self._read_packages_file(filename,
465 arch,
466 suite.sources,
467 packages)
468 self._read_packages_file(udeb_filename,
469 arch,
470 suite.sources,
471 packages)
472 # create provides
473 provides = create_provides_map(packages)
474 binaries[arch] = packages
475 provides_table[arch] = provides
476 else:
477 for arch in architectures:
478 filename = os.path.join(basedir, "Packages_%s" % arch)
479 packages = self._read_packages_file(filename,
480 arch,
481 suite.sources)
482 provides = create_provides_map(packages)
483 binaries[arch] = packages
484 provides_table[arch] = provides
486 return (binaries, provides_table)
488 def _merge_pkg_entries(self, package, parch, pkg_entry1, pkg_entry2):
489 bad = []
490 for f in self.CHECK_FIELDS:
491 v1 = getattr(pkg_entry1, f)
492 v2 = getattr(pkg_entry2, f)
493 if v1 != v2: # pragma: no cover
494 bad.append((f, v1, v2))
496 if bad: # pragma: no cover
497 self.logger.error("Mismatch found %s %s %s differs", package, pkg_entry1.version, parch)
498 for f, v1, v2 in bad:
499 self.logger.info(" ... %s %s != %s", f, v1, v2)
500 raise ValueError("Inconsistent / Unsupported data set")
502 # Merge ESSENTIAL if necessary
503 assert pkg_entry1.is_essential or not pkg_entry2.is_essential