Coverage for britney2/inputs/suiteloader.py: 92%

258 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-04-18 20:48 +0000

1from abc import abstractmethod 

2import apt_pkg 

3import logging 

4import os 

5import sys 

6from typing import Callable, Iterable, Optional 

7 

8from britney2 import SuiteClass, Suite, TargetSuite, Suites, BinaryPackage, BinaryPackageId, SourcePackage 

9from britney2.utils import ( 

10 read_release_file, possibly_compressed, read_sources_file, create_provides_map, parse_provides, parse_builtusing 

11) 

12 

13 

14class MissingRequiredConfigurationError(RuntimeError): 

15 pass 

16 

17 

18class SuiteContentLoader(object): 

19 

20 def __init__(self, base_config): 

21 self._base_config = base_config 

22 self._architectures = SuiteContentLoader.config_str_as_list(base_config.architectures) 

23 self._nobreakall_arches = SuiteContentLoader.config_str_as_list(base_config.nobreakall_arches, []) 

24 self._outofsync_arches = SuiteContentLoader.config_str_as_list(base_config.outofsync_arches, []) 

25 self._break_arches = SuiteContentLoader.config_str_as_list(base_config.break_arches, []) 

26 self._new_arches = SuiteContentLoader.config_str_as_list(base_config.new_arches, []) 

27 self._components = [] 

28 self._all_binaries = {} 

29 logger_name = ".".join((self.__class__.__module__, self.__class__.__name__)) 

30 self.logger = logging.getLogger(logger_name) 

31 

32 @staticmethod 

33 def config_str_as_list(value, default_value=None): 

34 if value is None: 

35 return default_value 

36 if isinstance(value, str): 36 ↛ 38line 36 didn't jump to line 38, because the condition on line 36 was never false

37 return value.split() 

38 return value 

39 

40 @property 

41 def architectures(self): 

42 return self._architectures 

43 

44 @property 

45 def nobreakall_arches(self): 

46 return self._nobreakall_arches 

47 

48 @property 

49 def outofsync_arches(self): 

50 return self._outofsync_arches 

51 

52 @property 

53 def break_arches(self): 

54 return self._break_arches 

55 

56 @property 

57 def new_arches(self): 

58 return self._new_arches 

59 

60 @property 

61 def components(self): 

62 return self._components 

63 

64 def all_binaries(self): 

65 return self._all_binaries 

66 

67 @abstractmethod 

68 def load_suites(self): # pragma: no cover 

69 pass 

70 

71 

72class DebMirrorLikeSuiteContentLoader(SuiteContentLoader): 

73 

74 CHECK_FIELDS = [ 

75 'source', 

76 'source_version', 

77 'architecture', 

78 'multi_arch', 

79 'depends', 

80 'conflicts', 

81 'provides', 

82 ] 

83 

84 def load_suites(self): 

85 suites = [] 

86 target_suite = None 

87 missing_config_msg = "Configuration %s is not set in the config (and cannot be auto-detected)" 

88 for suitename in ('testing', 'unstable', 'pu', 'tpu'): 

89 suffix = suitename if suitename in {'pu', 'tpu'} else '' 

90 if hasattr(self._base_config, suitename): 

91 suite_path = getattr(self._base_config, suitename) 

92 suite_class = SuiteClass.TARGET_SUITE 

93 if suitename != 'testing': 

94 suite_class = SuiteClass.ADDITIONAL_SOURCE_SUITE if suffix else SuiteClass.PRIMARY_SOURCE_SUITE 

95 suites.append(Suite(suite_class, suitename, suite_path, suite_short_name=suffix)) 

96 else: 

97 target_suite = TargetSuite(suite_class, suitename, suite_path, suite_short_name=suffix) 

98 else: 

99 if suitename in {'testing', 'unstable'}: # pragma: no cover 

100 self.logger.error(missing_config_msg, suitename.upper()) 

101 raise MissingRequiredConfigurationError(missing_config_msg % suitename.upper()) 

102 self.logger.info("Optional suite %s is not defined (config option: %s) ", suitename, suitename.upper()) 

103 

104 assert target_suite is not None 

105 

106 self._check_release_file(target_suite, missing_config_msg) 

107 self._setup_architectures() 

108 

109 # read the source and binary packages for the involved distributions. Notes: 

110 # - Load testing last as some live-data tests have more complete information in 

111 # unstable 

112 # - Load all sources before any of the binaries. 

113 for suite in [target_suite, *suites]: 

114 sources = self._read_sources(suite.path) 

115 self._update_suite_name(suite) 

116 suite.sources = sources 

117 (suite.binaries, suite.provides_table) = self._read_binaries(suite, self._architectures) 

118 self._fixup_faux_arch_all_binaries(suite) 

119 

120 return Suites(target_suite, suites) 

121 

122 def _fixup_faux_arch_all_binaries(self, suite: Suite) -> None: 

123 """remove faux arch:all binary if a real arch:all binary is available 

124 

125 We don't know for which architectures bin/$something must be available 

126 except for arch:all, which should be available in each arch. The 

127 information that a source builds an arch:all binary is available during 

128 the loading of the sources, but we have to pick an order in which to 

129 load the files and the Sources is loaded before the Packages are 

130 read. Hence we fake an arch:all binary during source loading, but it 

131 shouldn't be there in the final list if real arch:all binaries are 

132 present in the Packages file. 

133 

134 Also, if we keep the fake binary, it should be added to the lists of 

135 known binaries in the suite, otherwise britney2 trips later on. 

136 

137 """ 

138 

139 all_binaries = self._all_binaries 

140 binaries = suite.binaries 

141 faux_arches = (set(self.architectures) - set(self.break_arches) - 

142 set(self.outofsync_arches) - set(self.new_arches)) 

143 

144 for srcpkg in suite.sources.values(): 

145 faux = {x for x in srcpkg.binaries if x[2] == 'faux'} 

146 if faux and [x for x in (srcpkg.binaries - faux) 

147 if all_binaries[x].architecture == 'all']: 

148 srcpkg.binaries -= faux 

149 

150 # Calculate again because we may have changed the set 

151 faux = {x for x in srcpkg.binaries if x[2] == 'faux'} 

152 for binpkg_id in faux: 

153 bin_data = BinaryPackage( 

154 binpkg_id[1], 

155 sys.intern('faux'), 

156 srcpkg.source, 

157 srcpkg.version, 

158 'all', 

159 'no', 

160 None, 

161 None, 

162 [], 

163 False, 

164 binpkg_id, 

165 [], 

166 ) 

167 for arch_all in faux_arches: 

168 binaries[arch_all][binpkg_id[0]] = bin_data 

169 all_binaries[binpkg_id] = bin_data 

170 suite.binaries = binaries 

171 

172 def _setup_architectures(self): 

173 allarches = self._architectures 

174 # Re-order the architectures such as that the most important architectures are listed first 

175 # (this is to make the log easier to read as most important architectures will be listed 

176 # first) 

177 arches = [x for x in allarches if x in self._nobreakall_arches] 

178 arches += [x for x in allarches if x not in arches and x not in self._outofsync_arches] 

179 arches += [x for x in allarches if x not in arches and x not in self._break_arches] 

180 arches += [x for x in allarches if x not in arches and x not in self._new_arches] 

181 arches += [x for x in allarches if x not in arches] 

182 

183 # Intern architectures for efficiency; items in this list will be used for lookups and 

184 # building items/keys - by intern strings we reduce memory (considerably). 

185 self._architectures = [sys.intern(arch) for arch in allarches] 

186 assert 'all' not in self._architectures, "all not allowed in architectures" 

187 

188 def _get_suite_name(self, suite, release_file): 

189 name = None 

190 codename = None 

191 if 'Suite' in release_file: 191 ↛ 193line 191 didn't jump to line 193, because the condition on line 191 was never false

192 name = release_file['Suite'] 

193 if 'Codename' in release_file: 

194 codename = release_file['Codename'] 

195 

196 if name is None: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 name = codename 

198 elif codename is None: 

199 codename = name 

200 

201 if name is None: 201 ↛ 202line 201 didn't jump to line 202, because the condition on line 201 was never true

202 self.logger.warning('Either of the fields "Suite" or "Codename" ' + 

203 'should be present in a release file.') 

204 self.logger.error('Release file for suite %s is missing both the ' + 

205 '"Suite" and the "Codename" fields.', suite.name) 

206 raise KeyError('Suite') 

207 

208 return (name, codename) 

209 

210 def _update_suite_name(self, suite): 

211 try: 

212 release_file = read_release_file(suite.path) 

213 except FileNotFoundError: 

214 self.logger.info("The %s suite does not have a Release file, unable to update the name", 

215 suite.name) 

216 release_file = None 

217 

218 if release_file is not None: 

219 (suite.name, suite.codename) = self._get_suite_name( 

220 suite, release_file) 

221 self.logger.info("Using suite name from Release file: %s", suite.name) 

222 self.logger.debug("Using suite codename from Release file: %s", suite.codename) 

223 

224 def _check_release_file(self, target_suite, missing_config_msg): 

225 try: 

226 release_file = read_release_file(target_suite.path) 

227 self.logger.info("Found a Release file in %s - using that for defaults", target_suite.name) 

228 except FileNotFoundError: 

229 self.logger.info("The %s suite does not have a Release file.", target_suite.name) 

230 release_file = None 

231 

232 if release_file is not None: 

233 self._components = release_file['Components'].split() 

234 self.logger.info("Using components listed in Release file: %s", ' '.join(self._components)) 

235 

236 if self._architectures is None: 

237 if release_file is None: # pragma: no cover 

238 self.logger.error("No configured architectures and there is no release file in the %s suite.", 

239 target_suite.name) 

240 self.logger.error("Please check if there is a \"Release\" file in %s", 

241 target_suite.path) 

242 self.logger.error("or if the config file contains a non-empty \"ARCHITECTURES\" field") 

243 raise MissingRequiredConfigurationError(missing_config_msg % "ARCHITECTURES") 

244 self._architectures = sorted(x for x in release_file['Architectures'].split() if x != 'all') 

245 self.logger.info("Using architectures listed in Release file: %s", ' '.join(self._architectures)) 

246 

247 def _read_sources(self, basedir: str) -> dict[str, SourcePackage]: 

248 """Read the list of source packages from the specified directory 

249 

250 The source packages are read from the `Sources' file within the 

251 directory specified as `basedir' parameter. Considering the 

252 large amount of memory needed, not all the fields are loaded 

253 in memory. The available fields are Version, Maintainer and Section. 

254 

255 The method returns a list where every item represents a source 

256 package as a dictionary. 

257 """ 

258 

259 if self._components: 

260 sources: dict[str, SourcePackage] = {} 

261 for component in self._components: 

262 filename = os.path.join(basedir, component, "source", "Sources") 

263 try: 

264 filename = possibly_compressed(filename) 

265 except FileNotFoundError: 

266 if component == "non-free-firmware": 

267 self.logger.info("Skipping %s as it doesn't exist", filename) 

268 continue 

269 raise 

270 self.logger.info("Loading source packages from %s", filename) 

271 read_sources_file(filename, sources, 

272 not self._base_config.archall_inconsistency_allowed) 

273 else: 

274 filename = os.path.join(basedir, "Sources") 

275 self.logger.info("Loading source packages from %s", filename) 

276 sources = read_sources_file(filename, None, 

277 not self._base_config.archall_inconsistency_allowed) 

278 

279 return sources 

280 

281 @staticmethod 

282 def merge_fields(get_field: Callable[[str], Optional[str]], *field_names: str, 

283 separator: str = ', ') -> Optional[str]: 

284 """Merge two or more fields (filtering out empty fields; returning None if all are empty) 

285 """ 

286 return separator.join(filter(None, (get_field(x) for x in field_names))) or None 

287 

288 def _read_packages_file(self, filename: str, arch, srcdist, packages: Optional[dict[str, BinaryPackage]] = None, 

289 intern=sys.intern) -> dict[str, BinaryPackage]: 

290 self.logger.info("Loading binary packages from %s", filename) 

291 

292 if packages is None: 

293 packages = {} 

294 

295 all_binaries = self._all_binaries 

296 

297 tag_file = apt_pkg.TagFile(filename) 

298 get_field = tag_file.section.get 

299 step = tag_file.step 

300 

301 while step(): 

302 pkg = get_field('Package') 

303 version = get_field('Version') 

304 

305 # There may be multiple versions of any arch:all packages 

306 # (in unstable) if some architectures have out-of-date 

307 # binaries. We only ever consider the package with the 

308 # largest version for migration. 

309 pkg = intern(pkg) 

310 version = intern(version) 

311 pkg_id = BinaryPackageId(pkg, version, arch) 

312 

313 if pkg in packages: 

314 old_pkg_data = packages[pkg] 

315 if apt_pkg.version_compare(old_pkg_data.version, version) > 0: 

316 continue 

317 old_pkg_id = old_pkg_data.pkg_id 

318 old_src_binaries = srcdist[old_pkg_data.source].binaries 

319 old_src_binaries.remove(old_pkg_id) 

320 # This may seem weird at first glance, but the current code rely 

321 # on this behaviour to avoid issues like #709460. Admittedly it 

322 # is a special case, but Britney will attempt to remove the 

323 # arch:all packages without this. Even then, this particular 

324 # stop-gap relies on the packages files being sorted by name 

325 # and the version, so it is not particularly resilient. 

326 if pkg_id not in old_src_binaries: 326 ↛ 332line 326 didn't jump to line 332, because the condition on line 326 was never false

327 old_src_binaries.add(pkg_id) 

328 

329 # Merge Pre-Depends with Depends and Conflicts with 

330 # Breaks. Britney is not interested in the "finer 

331 # semantic differences" of these fields anyway. 

332 deps = DebMirrorLikeSuiteContentLoader.merge_fields(get_field, 'Pre-Depends', 'Depends') 

333 conflicts = DebMirrorLikeSuiteContentLoader.merge_fields(get_field, 'Conflicts', 'Breaks') 

334 

335 ess = False 

336 if get_field('Essential', 'no') == 'yes': 

337 ess = True 

338 

339 source = pkg 

340 source_version = version 

341 # retrieve the name and the version of the source package 

342 source_raw = get_field('Source') 

343 if source_raw: 

344 source = intern(source_raw.split(" ")[0]) 

345 if "(" in source_raw: 

346 source_version = intern(source_raw[source_raw.find("(")+1:source_raw.find(")")]) 

347 

348 provides_raw = get_field('Provides') 

349 if provides_raw: 

350 provides = parse_provides(provides_raw, pkg_id=pkg_id, logger=self.logger) 

351 else: 

352 provides = [] 

353 

354 raw_arch = intern(get_field('Architecture')) 

355 if raw_arch not in {'all', arch}: # pragma: no cover 

356 raise AssertionError("%s has wrong architecture (%s) - should be either %s or all" % ( 

357 str(pkg_id), raw_arch, arch)) 

358 

359 builtusing_raw = get_field('Built-Using') 

360 if builtusing_raw: 

361 builtusing = parse_builtusing(builtusing_raw, pkg_id=pkg_id, logger=self.logger) 

362 else: 

363 builtusing = [] 

364 

365 dpkg = BinaryPackage(version, 

366 intern(get_field('Section')), 

367 source, 

368 source_version, 

369 raw_arch, 

370 get_field('Multi-Arch'), 

371 deps, 

372 conflicts, 

373 provides, 

374 ess, 

375 pkg_id, 

376 builtusing, 

377 ) 

378 

379 # if the source package is available in the distribution, then register this binary package 

380 if source in srcdist: 

381 # There may be multiple versions of any arch:all packages 

382 # (in unstable) if some architectures have out-of-date 

383 # binaries. We only want to include the package in the 

384 # source -> binary mapping once. It doesn't matter which 

385 # of the versions we include as only the package name and 

386 # architecture are recorded. 

387 srcdist[source].binaries.add(pkg_id) 

388 # if the source package doesn't exist, create a fake one 

389 else: 

390 srcdist[source] = SourcePackage(source, source_version, 'faux', {pkg_id}, None, True, None, None, [], []) 

391 

392 # add the resulting dictionary to the package list 

393 packages[pkg] = dpkg 

394 if pkg_id in all_binaries: 

395 self._merge_pkg_entries(pkg, arch, all_binaries[pkg_id], dpkg) 

396 else: 

397 all_binaries[pkg_id] = dpkg 

398 

399 # add the resulting dictionary to the package list 

400 packages[pkg] = dpkg 

401 

402 return packages 

403 

404 def _read_binaries(self, suite: Suite, architectures: Iterable[str] 

405 ) -> tuple[dict[str, dict[str, BinaryPackage]], dict[str, dict[str, set[tuple[str, str]]]]]: 

406 """Read the list of binary packages from the specified directory 

407 

408 This method reads all the binary packages for a given suite. 

409 

410 If the "components" config parameter is set, the directory should 

411 be the "suite" directory of a local mirror (i.e. the one containing 

412 the "Release" file). Otherwise, Britney will read the packages 

413 information from all the "Packages_${arch}" files referenced by 

414 the "architectures" parameter. 

415 

416 Considering the 

417 large amount of memory needed, not all the fields are loaded 

418 in memory. The available fields are Version, Source, Multi-Arch, 

419 Depends, Conflicts, Provides and Architecture. 

420 

421 The `Provides' field is used to populate the virtual packages list. 

422 

423 The method returns a tuple of two dicts with architecture as key and 

424 another dict as value. The value dicts of the first dict map 

425 from binary package name to "BinaryPackage" objects; the other second 

426 value dicts map a package name to the packages providing them. 

427 """ 

428 binaries = {} 

429 provides_table = {} 

430 basedir = suite.path 

431 

432 if self._components: 

433 release_file = read_release_file(basedir) 

434 listed_archs = set(release_file['Architectures'].split()) 

435 for arch in architectures: 

436 packages = {} 

437 if arch not in listed_archs: 437 ↛ 438line 437 didn't jump to line 438, because the condition on line 437 was never true

438 self.logger.info("Skipping arch %s for %s: It is not listed in the Release file", 

439 arch, suite.name) 

440 binaries[arch] = {} 

441 provides_table[arch] = {} 

442 continue 

443 for component in self._components: 

444 binary_dir = "binary-%s" % arch 

445 filename = os.path.join(basedir, 

446 component, 

447 binary_dir, 

448 'Packages') 

449 try: 

450 filename = possibly_compressed(filename) 

451 except FileNotFoundError: 

452 if component == "non-free-firmware": 

453 self.logger.info("Skipping %s as it doesn't exist", filename) 

454 continue 

455 raise 

456 udeb_filename = os.path.join(basedir, 

457 component, 

458 "debian-installer", 

459 binary_dir, 

460 "Packages") 

461 # We assume the udeb Packages file is present if the 

462 # regular one is present 

463 udeb_filename = possibly_compressed(udeb_filename) 

464 self._read_packages_file(filename, 

465 arch, 

466 suite.sources, 

467 packages) 

468 self._read_packages_file(udeb_filename, 

469 arch, 

470 suite.sources, 

471 packages) 

472 # create provides 

473 provides = create_provides_map(packages) 

474 binaries[arch] = packages 

475 provides_table[arch] = provides 

476 else: 

477 for arch in architectures: 

478 filename = os.path.join(basedir, "Packages_%s" % arch) 

479 packages = self._read_packages_file(filename, 

480 arch, 

481 suite.sources) 

482 provides = create_provides_map(packages) 

483 binaries[arch] = packages 

484 provides_table[arch] = provides 

485 

486 return (binaries, provides_table) 

487 

488 def _merge_pkg_entries(self, package, parch, pkg_entry1, pkg_entry2): 

489 bad = [] 

490 for f in self.CHECK_FIELDS: 

491 v1 = getattr(pkg_entry1, f) 

492 v2 = getattr(pkg_entry2, f) 

493 if v1 != v2: # pragma: no cover 

494 bad.append((f, v1, v2)) 

495 

496 if bad: # pragma: no cover 

497 self.logger.error("Mismatch found %s %s %s differs", package, pkg_entry1.version, parch) 

498 for f, v1, v2 in bad: 

499 self.logger.info(" ... %s %s != %s", f, v1, v2) 

500 raise ValueError("Inconsistent / Unsupported data set") 

501 

502 # Merge ESSENTIAL if necessary 

503 assert pkg_entry1.is_essential or not pkg_entry2.is_essential