Introduce bioconductor lister

This commit is contained in:
Kumar Shivendu 2023-09-28 12:54:37 +00:00 committed by Antoine R. Dumont
parent a04975571c
commit 88611642fc
13 changed files with 1845 additions and 0 deletions

View file

@ -28,6 +28,7 @@ following Python modules:
- `swh.lister.pypi`
- `swh.lister.rpm`
- `swh.lister.tuleap`
- `swh.lister.bioconductor`
Dependencies
------------

View file

@ -93,6 +93,7 @@ setup(
lister.stagit=swh.lister.stagit:register
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
lister.bioconductor=swh.lister.bioconductor:register
""",
classifiers=[
"Programming Language :: Python :: 3",

View file

@ -0,0 +1,13 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def register():
from .lister import BioconductorLister
return {
"lister": BioconductorLister,
"task_modules": [f"{__name__}.tasks"],
}

View file

@ -0,0 +1,314 @@
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from dataclasses import dataclass, field
import json
import logging
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from debian.deb822 import Sources
import iso8601
from packaging import version
from requests import HTTPError
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from ..pattern import CredentialsType, Lister
logger = logging.getLogger(__name__)
Release = str
Category = str
BioconductorListerPage = Optional[Tuple[Release, Category, Dict[str, Any]]]
@dataclass
class BioconductorListerState:
"""State of the Bioconductor lister"""
package_versions: Dict[str, Set[str]] = field(default_factory=dict)
"""Dictionary mapping a package name to all the versions found during
last listing"""
class BioconductorLister(Lister[BioconductorListerState, BioconductorListerPage]):
"""List origins from Bioconductor, a collection of open source software
for bioinformatics based on the R statistical programming language."""
LISTER_NAME = "bioconductor"
VISIT_TYPE = "bioconductor"
INSTANCE = "bioconductor"
BIOCONDUCTOR_HOMEPAGE = "https://www.bioconductor.org"
def __init__(
self,
scheduler: SchedulerInterface,
url: str = BIOCONDUCTOR_HOMEPAGE,
instance: str = INSTANCE,
credentials: Optional[CredentialsType] = None,
releases: Optional[List[Release]] = None,
categories: Optional[List[Category]] = None,
incremental: bool = False,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
record_batch_size: int = 1000,
):
super().__init__(
scheduler=scheduler,
url=url,
instance=instance,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
record_batch_size=record_batch_size,
)
if releases is None:
self.releases = self.fetch_versions()
else:
self.releases = releases
self.categories = categories or [
"bioc",
"workflows",
"data/annotation",
"data/experiment",
]
self.incremental = incremental
self.listed_origins: Dict[str, ListedOrigin] = {}
self.origins_to_send: Set[str] = set()
self.package_versions: Dict[str, Set[str]] = {}
def state_from_dict(self, d: Dict[str, Any]) -> BioconductorListerState:
return BioconductorListerState(
package_versions={k: set(v) for k, v in d.items()}
)
def state_to_dict(self, state: BioconductorListerState) -> Dict[str, Any]:
return {k: list(v) for k, v in state.package_versions.items()}
def origin_url_for_package(self, package_name: str) -> str:
return f"{self.BIOCONDUCTOR_HOMEPAGE}/packages/{package_name}"
def get_pages(self) -> Iterator[BioconductorListerPage]:
"""Return an iterator for each page. Every page is a (release, category) pair."""
for release in self.releases:
if version.parse(release) < version.parse("1.8"):
# only bioc category existed before 1.8
url_template = urljoin(
self.url, "/packages/{category}/{release}/src/contrib/PACKAGES"
)
categories = {"bioc"}
elif version.parse(release) < version.parse("2.5"):
# workflows category won't exist for these
url_template = urljoin(
self.url, "/packages/{release}/{category}/src/contrib/PACKAGES"
)
categories = {"bioc", "data/annotation", "data/experiment"}
else:
url_template = urljoin(
self.url, "/packages/json/{release}/{category}/packages.json"
)
categories = set(self.categories)
for category in categories:
url = url_template.format(release=release, category=category)
try:
packages_txt = self.http_request(url).text
packages = self.parse_packages(packages_txt)
except HTTPError as e:
logger.debug(
"Skipping page since got %s response for %s",
e.response.status_code,
url,
)
continue
yield (release, category, packages)
# Yield extra none to signal get_origins_from_page()
# to stop iterating and yield the extracted origins
yield None
def fetch_versions(self) -> List[str]:
html = self.http_request(
f"{self.BIOCONDUCTOR_HOMEPAGE}/about/release-announcements"
).text
bs = BeautifulSoup(html, "html.parser")
return [
tr.find_all("td")[0].text
for tr in reversed(bs.find("table").find("tbody").find_all("tr"))
if tr.find_all("td")[2].find("a")
]
def parse_packages(self, text: str) -> Dict[str, Any]:
"""Parses packages.json and PACKAGES files"""
try:
return json.loads(text)
except json.JSONDecodeError:
pass
sources = Sources.iter_paragraphs(text)
return {s["Package"]: dict(s) for s in sources}
def get_origins_from_page(
self, page: BioconductorListerPage
) -> Iterator[ListedOrigin]:
"""Convert a page of BioconductorLister PACKAGES/packages.json
metadata into a list of ListedOrigins"""
assert self.lister_obj.id is not None
if page is None:
for origin_url in self.origins_to_send:
yield self.listed_origins[origin_url]
return
release, category, packages = page
origins_to_send = set()
for pkg_name, pkg_metadata in packages.items():
pkg_version = pkg_metadata["Version"]
last_update_date = None
last_update_str = ""
if version.parse(release) < version.parse("1.8"):
tar_url = urljoin(
self.url,
f"/packages/{category}/{release}/src/contrib/Source/{pkg_name}_{pkg_metadata['Version']}.tar.gz",
)
elif version.parse(release) < version.parse("2.5"):
tar_url = urljoin(
self.url,
f"/packages/{release}/{category}/src/contrib/{pkg_name}_{pkg_metadata['Version']}.tar.gz",
)
else:
# Some packages don't have don't have a download URL (based on source.ver)
# and hence can't be archived. For example see the package
# maEndToEnd at the end of
# https://bioconductor.org/packages/json/3.17/workflows/packages.json
# Even guessing tar url path based on the expected url format doesn't work. i.e.
# https://bioconductor.org/packages/3.17/workflows/src/contrib/maEndToEnd_2.20.0.tar.gz
# doesn't respond with a tar file. Plus, the mirror clearly shows
# that maEndToEnd tar is missing.
# https://ftp.gwdg.de/pub/misc/bioconductor/packages/3.17/workflows/src/contrib/
# So skipping such packages
if "source.ver" not in pkg_metadata:
logger.info(
(
"Skipping package %s listed in release %s "
"category %s since it doesn't have a download URL"
),
pkg_name,
release,
category,
)
continue
if "git_url" in pkg_metadata:
# Along with the .tar.gz files grab the git repo as well
git_origin_url = pkg_metadata["git_url"]
git_last_update_str = pkg_metadata.get("git_last_commit_date")
self.listed_origins[git_origin_url] = ListedOrigin(
lister_id=self.lister_obj.id,
visit_type="git",
url=git_origin_url,
last_update=(
iso8601.parse_date(git_last_update_str)
if git_last_update_str
else None
),
)
origins_to_send.add(git_origin_url)
tar_url = urljoin(
self.url,
f"/packages/{release}/{category}/{pkg_metadata['source.ver']}",
)
last_update_str = pkg_metadata.get(
"Date/Publication", pkg_metadata.get("git_last_commit_date")
)
last_update_date = (
iso8601.parse_date(last_update_str) if last_update_str else None
)
# For some packages in releases >= 2.5, last_update can still
# remain None. Example: See "adme16cod.db" entry in
# https://bioconductor.org/packages/json/3.17/data/annotation/packages.json
origin_url = self.origin_url_for_package(pkg_name)
package_version_key = f"{release}/{category}/{pkg_version}"
if origin_url not in self.listed_origins:
self.listed_origins[origin_url] = ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
url=origin_url,
last_update=last_update_date,
extra_loader_arguments={"packages": {}},
)
self.package_versions[pkg_name] = set()
origins_to_send.add(origin_url)
optional_fields: Dict[str, Any] = {}
if "MD5sum" in pkg_metadata:
optional_fields["checksums"] = {"md5": pkg_metadata["MD5sum"]}
if last_update_str:
optional_fields["last_update_date"] = last_update_str
self.listed_origins[origin_url].extra_loader_arguments["packages"][
package_version_key
] = {
"release": release,
"version": pkg_version,
"category": category,
"package": pkg_name,
"tar_url": tar_url,
}
self.listed_origins[origin_url].extra_loader_arguments["packages"][
package_version_key
].update(optional_fields)
last_update = self.listed_origins[origin_url].last_update
if (
last_update is not None
and last_update_date is not None
and last_update_date > last_update
):
self.listed_origins[origin_url].last_update = last_update_date
self.package_versions[pkg_name].add(package_version_key)
# package has been listed during a previous listing
if self.incremental and pkg_name in self.state.package_versions:
new_versions = (
self.package_versions[pkg_name]
- self.state.package_versions[pkg_name]
)
# no new versions, no need to send the origin to the scheduler
if not new_versions:
origins_to_send.remove(origin_url)
self.origins_to_send.update(origins_to_send)
def finalize(self) -> None:
if self.incremental:
self.state.package_versions = self.package_versions
self.updated = len(self.listed_origins) > 0

View file

@ -0,0 +1,28 @@
# Copyright (C) 2022-2023 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Dict
from celery import shared_task
from .lister import BioconductorLister
@shared_task(name=__name__ + ".BioconductorListerTask")
def list_bioconductor_full(**lister_args) -> Dict[str, int]:
"""Full listing of Bioconductor packages"""
lister = BioconductorLister.from_configfile(**lister_args)
return lister.run().dict()
@shared_task(name=__name__ + ".BioconductorIncrementalListerTask")
def list_bioconductor_incremental(**lister_args) -> Dict[str, int]:
"""Incremental listing of Bioconductor packages"""
lister = BioconductorLister.from_configfile(**lister_args, incremental=True)
return lister.run().dict()
@shared_task(name=__name__ + ".ping")
def _ping() -> str:
return "OK"

View file

@ -0,0 +1,13 @@
Package: affylmGUI
Version: 1.4.0
Depends: limma, tcltk, affy
Suggests: tkrplot, affyPLM, R2HTML, xtable
Package: affypdnn
Version: 1.4.0
Depends: R (>= 1.9.0), affy (>= 1.5), affydata, hgu95av2probe
Package: affyPLM
Version: 1.6.0
Depends: R (>= 2.0.0), affy (>= 1.5.0), affydata, Biobase, methods,
gcrma

View file

@ -0,0 +1,12 @@
Package: ABarray
Version: 1.8.0
Depends: Biobase, multtest, tcltk
Suggests: limma, LPE
Package: AnnotationDbi
Version: 1.2.2
Depends: R (>= 2.7.0), methods, utils, Biobase (>= 1.17.0), DBI (>=
0.2-4), RSQLite (>= 0.6-4)
Imports: methods, utils, Biobase, DBI, RSQLite
Suggests: hgu95av2.db, hgu95av2, GO.db, GO, human.db0, mouse.db0,
rat.db0, fly.db0, yeast.db0, arabidopsis.db0

View file

@ -0,0 +1,166 @@
{
"annotation": {
"Package": "annotation",
"Version": "1.24.1",
"Depends": [
"R (>= 3.3.0)",
"VariantAnnotation",
"AnnotationHub",
"Organism.dplyr",
"TxDb.Hsapiens.UCSC.hg19.knownGene",
"TxDb.Hsapiens.UCSC.hg38.knownGene",
"TxDb.Mmusculus.UCSC.mm10.ensGene",
"org.Hs.eg.db",
"org.Mm.eg.db",
"Homo.sapiens",
"BSgenome.Hsapiens.UCSC.hg19",
"biomaRt",
"BSgenome",
"TxDb.Athaliana.BioMart.plantsmart22"
],
"Suggests": [
"knitr",
"rmarkdown",
"BiocStyle"
],
"License": "Artistic-2.0",
"MD5sum": "4cb4db8807acb2e164985636091faa93",
"NeedsCompilation": "no",
"Title": "Genomic Annotation Resources",
"Description": "Annotation resources make up a significant proportion of the Bioconductor project. And there are also a diverse set of online resources available which are accessed using specific packages. This walkthrough will describe the most popular of these resources and give some high level examples on how to use them.",
"biocViews": [
"AnnotationWorkflow",
"Workflow"
],
"Author": "Marc RJ Carlson [aut], Herve Pages [aut], Sonali Arora [aut], Valerie Obenchain [aut], Martin Morgan [aut], Bioconductor Package Maintainer [cre]",
"Maintainer": "Bioconductor Package Maintainer <maintainer@bioconductor.org>",
"URL": "http://bioconductor.org/help/workflows/annotation/Annotation_Resources/",
"VignetteBuilder": "knitr",
"git_url": "https://git.bioconductor.org/packages/annotation",
"git_branch": "RELEASE_3_17",
"git_last_commit": "4568557",
"git_last_commit_date": "2023-06-28",
"Date/Publication": "2023-06-30",
"source.ver": "src/contrib/annotation_1.24.1.tar.gz",
"vignettes": [
"vignettes/annotation/inst/doc/Annotating_Genomic_Ranges.html",
"vignettes/annotation/inst/doc/Annotation_Resources.html"
],
"vignetteTitles": [
"Annotating Genomic Ranges",
"Genomic Annotation Resources"
],
"hasREADME": false,
"hasNEWS": false,
"hasINSTALL": false,
"hasLICENSE": false,
"Rfiles": [
"vignettes/annotation/inst/doc/Annotating_Genomic_Ranges.R",
"vignettes/annotation/inst/doc/Annotation_Resources.R"
],
"dependencyCount": "143",
"Rank": 23
},
"variants": {
"Package": "variants",
"Version": "1.24.0",
"Depends": [
"R (>= 3.3.0)",
"VariantAnnotation",
"org.Hs.eg.db",
"TxDb.Hsapiens.UCSC.hg19.knownGene",
"BSgenome.Hsapiens.UCSC.hg19",
"PolyPhen.Hsapiens.dbSNP131"
],
"Suggests": [
"knitr",
"rmarkdown",
"BiocStyle"
],
"License": "Artistic-2.0",
"MD5sum": "38f2c00b73e1a695f5ef4c9b4a728923",
"NeedsCompilation": "no",
"Title": "Annotating Genomic Variants",
"Description": "Read and write VCF files. Identify structural location of variants and compute amino acid coding changes for non-synonymous variants. Use SIFT and PolyPhen database packages to predict consequence of amino acid coding changes.",
"biocViews": [
"AnnotationWorkflow",
"ImmunoOncologyWorkflow",
"Workflow"
],
"Author": "Valerie Obenchain [aut], Martin Morgan [ctb], Bioconductor Package Maintainer [cre]",
"Maintainer": "Bioconductor Package Maintainer <maintainer@bioconductor.org>",
"URL": "https://bioconductor.org/help/workflows/variants/",
"VignetteBuilder": "knitr",
"git_url": "https://git.bioconductor.org/packages/variants",
"git_branch": "RELEASE_3_17",
"git_last_commit": "d311e59",
"git_last_commit_date": "2023-04-25",
"Date/Publication": "2023-04-28",
"source.ver": "src/contrib/variants_1.24.0.tar.gz",
"vignettes": [
"vignettes/variants/inst/doc/Annotating_Genomic_Variants.html"
],
"vignetteTitles": [
"Annotating Genomic Variants"
],
"hasREADME": false,
"hasNEWS": false,
"hasINSTALL": false,
"hasLICENSE": false,
"Rfiles": [
"vignettes/variants/inst/doc/Annotating_Genomic_Variants.R"
],
"dependencyCount": "103",
"Rank": 16
},
"maEndToEnd": {
"Package": "maEndToEnd",
"Version": "2.20.0",
"Depends": [
"R (>= 3.5.0)",
"Biobase",
"oligoClasses",
"ArrayExpress",
"pd.hugene.1.0.st.v1",
"hugene10sttranscriptcluster.db",
"oligo",
"arrayQualityMetrics",
"limma",
"topGO",
"ReactomePA",
"clusterProfiler",
"gplots",
"ggplot2",
"geneplotter",
"pheatmap",
"RColorBrewer",
"dplyr",
"tidyr",
"stringr",
"matrixStats",
"genefilter",
"openxlsx",
"Rgraphviz",
"enrichplot"
],
"Suggests": [
"BiocStyle",
"knitr",
"devtools",
"rmarkdown"
],
"License": "MIT + file LICENSE",
"NeedsCompilation": "no",
"Title": "An end to end workflow for differential gene expression using Affymetrix microarrays",
"Description": "In this article, we walk through an end-to-end Affymetrix microarray differential expression workflow using Bioconductor packages. This workflow is directly applicable to current \"Gene\" type arrays, e.g. the HuGene or MoGene arrays, but can easily be adapted to similar platforms. The data analyzed here is a typical clinical microarray data set that compares inflamed and non-inflamed colon tissue in two disease subtypes. For each disease, the differential gene expression between inflamed- and non-inflamed colon tissue was analyzed. We will start from the raw data CEL files, show how to import them into a Bioconductor ExpressionSet, perform quality control and normalization and finally differential gene expression (DE) analysis, followed by some enrichment analysis.",
"biocViews": [
"GeneExpressionWorkflow",
"Workflow"
],
"Author": "Bernd Klaus [aut], Stefanie Reisenauer [aut, cre]",
"Maintainer": "Stefanie Reisenauer <steffi.reisenauer@tum.de>",
"URL": "https://www.bioconductor.org/help/workflows/",
"VignetteBuilder": "knitr",
"Rank": 21
}
}

View file

@ -0,0 +1,162 @@
{
"annotation": {
"Package": "annotation",
"Version": "1.24.1",
"Depends": [
"R (>= 3.3.0)",
"VariantAnnotation",
"AnnotationHub",
"Organism.dplyr",
"TxDb.Hsapiens.UCSC.hg19.knownGene",
"TxDb.Hsapiens.UCSC.hg38.knownGene",
"TxDb.Mmusculus.UCSC.mm10.ensGene",
"org.Hs.eg.db",
"org.Mm.eg.db",
"Homo.sapiens",
"BSgenome.Hsapiens.UCSC.hg19",
"biomaRt",
"BSgenome",
"TxDb.Athaliana.BioMart.plantsmart22"
],
"Suggests": [
"knitr",
"rmarkdown",
"BiocStyle"
],
"License": "Artistic-2.0",
"MD5sum": "4cb4db8807acb2e164985636091faa93",
"NeedsCompilation": "no",
"Title": "Genomic Annotation Resources",
"Description": "Annotation resources make up a significant proportion of the Bioconductor project. And there are also a diverse set of online resources available which are accessed using specific packages. This walkthrough will describe the most popular of these resources and give some high level examples on how to use them.",
"biocViews": [
"AnnotationWorkflow",
"Workflow"
],
"Author": "Marc RJ Carlson [aut], Herve Pages [aut], Sonali Arora [aut], Valerie Obenchain [aut], Martin Morgan [aut], Bioconductor Package Maintainer [cre]",
"Maintainer": "Bioconductor Package Maintainer <maintainer@bioconductor.org>",
"URL": "http://bioconductor.org/help/workflows/annotation/Annotation_Resources/",
"VignetteBuilder": "knitr",
"git_url": "https://git.bioconductor.org/packages/annotation",
"git_branch": "RELEASE_3_17",
"git_last_commit": "4568557",
"git_last_commit_date": "2023-06-28",
"Date/Publication": "2023-06-30",
"source.ver": "src/contrib/annotation_1.24.1.tar.gz",
"vignettes": [
"vignettes/annotation/inst/doc/Annotating_Genomic_Ranges.html",
"vignettes/annotation/inst/doc/Annotation_Resources.html"
],
"vignetteTitles": [
"Annotating Genomic Ranges",
"Genomic Annotation Resources"
],
"hasREADME": false,
"hasNEWS": false,
"hasINSTALL": false,
"hasLICENSE": false,
"Rfiles": [
"vignettes/annotation/inst/doc/Annotating_Genomic_Ranges.R",
"vignettes/annotation/inst/doc/Annotation_Resources.R"
],
"dependencyCount": "144",
"Rank": 8
},
"arrays": {
"Package": "arrays",
"Version": "1.26.0",
"Depends": [
"R (>= 3.0.0)"
],
"Suggests": [
"affy",
"limma",
"hgfocuscdf",
"knitr",
"rmarkdown",
"BiocStyle"
],
"License": "Artistic-2.0",
"MD5sum": "009ef917ebc047246b8c62c48e02a237",
"NeedsCompilation": "no",
"Title": "Using Bioconductor for Microarray Analysis",
"Description": "Using Bioconductor for Microarray Analysis workflow",
"biocViews": [
"BasicWorkflow",
"Workflow"
],
"Author": "Bioconductor Package Maintainer [aut, cre]",
"Maintainer": "Bioconductor Package Maintainer <maintainer@bioconductor.org>",
"VignetteBuilder": "knitr",
"git_url": "https://git.bioconductor.org/packages/arrays",
"git_branch": "RELEASE_3_17",
"git_last_commit": "9981a8c",
"git_last_commit_date": "2023-04-25",
"Date/Publication": "2023-04-28",
"source.ver": "src/contrib/arrays_1.26.0.tar.gz",
"vignettes": [
"vignettes/arrays/inst/doc/arrays.html"
],
"vignetteTitles": [
"Using Bioconductor for Microarray Analysis"
],
"hasREADME": false,
"hasNEWS": false,
"hasINSTALL": false,
"hasLICENSE": false,
"Rfiles": [
"vignettes/arrays/inst/doc/arrays.R"
],
"dependencyCount": "0",
"Rank": 13
},
"maEndToEnd": {
"Package": "maEndToEnd",
"Version": "2.20.0",
"Depends": [
"R (>= 3.5.0)",
"Biobase",
"oligoClasses",
"ArrayExpress",
"pd.hugene.1.0.st.v1",
"hugene10sttranscriptcluster.db",
"oligo",
"arrayQualityMetrics",
"limma",
"topGO",
"ReactomePA",
"clusterProfiler",
"gplots",
"ggplot2",
"geneplotter",
"pheatmap",
"RColorBrewer",
"dplyr",
"tidyr",
"stringr",
"matrixStats",
"genefilter",
"openxlsx",
"Rgraphviz",
"enrichplot"
],
"Suggests": [
"BiocStyle",
"knitr",
"devtools",
"rmarkdown"
],
"License": "MIT + file LICENSE",
"NeedsCompilation": "no",
"Title": "An end to end workflow for differential gene expression using Affymetrix microarrays",
"Description": "In this article, we walk through an end-to-end Affymetrix microarray differential expression workflow using Bioconductor packages. This workflow is directly applicable to current \"Gene\" type arrays, e.g. the HuGene or MoGene arrays, but can easily be adapted to similar platforms. The data analyzed here is a typical clinical microarray data set that compares inflamed and non-inflamed colon tissue in two disease subtypes. For each disease, the differential gene expression between inflamed- and non-inflamed colon tissue was analyzed. We will start from the raw data CEL files, show how to import them into a Bioconductor ExpressionSet, perform quality control and normalization and finally differential gene expression (DE) analysis, followed by some enrichment analysis.",
"biocViews": [
"GeneExpressionWorkflow",
"Workflow"
],
"Author": "Bernd Klaus [aut], Stefanie Reisenauer [aut, cre]",
"Maintainer": "Stefanie Reisenauer <steffi.reisenauer@tum.de>",
"URL": "https://www.bioconductor.org/help/workflows/",
"VignetteBuilder": "knitr",
"Rank": 10
}
}

View file

@ -0,0 +1,553 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" dir="ltr" lang="en-US"
prefix="og: http://ogp.me/ns#">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Language" content="en-us" />
<meta name="robots" content="all" />
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-WJMEEH1J58"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-WJMEEH1J58');
</script>
<script type="text/javascript" src="/js/jquery.js"></script>
<script src="/js/jquery.tools.min.js"></script>
<title>Bioconductor - Release Announcements</title>
<link rel="stylesheet" type="text/css" href="/style/bioconductor.css" media="screen" />
<link rel="SHORTCUT ICON" type="image/x-icon" href="/favicon.ico" />
<link rel="ICON" type="image/x-icon" href="/favicon.ico" />
<script type="text/javascript" src="/js/bioconductor.js"></script>
<script type="text/javascript" src="/js/jquery.corner.js"></script>
<script type="text/javascript" src="/js/jquery.timeago.js"></script>
<script type="text/javascript" src="/js/bioc-style.js"></script>
<script type="text/javascript" src="/js/versions.js"></script>
</head>
<body>
<a name="top"></a>
<!-- a few hooks for screen readers -->
<a href="#site-navigation" title="Jump to site nav"></a>
<a href="#section-navigation" title="Jump to section nav"></a>
<a href="#site-map" title="Jump to site map"></a>
<div id="SiteContainer" class="SiteContainer">
<div id="PageContent" class="PageContent WithRightRail">
<div id="PageBreadcrumbs" class="PageBreadcrumbs">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/about/">About</a></li>
<li>Release Announcements</li>
</ul>
</div>
<h1 id="bioconductor-releases">Bioconductor releases</h1>
<p>Each <em>Bioconductor</em> release is designed to work with a specific
version of <em>R</em>. The following table summarizes the relationship, and
links to packages designed to work with the corresponding <em>R</em> /
<em>Bioconductor</em> version.</p>
<p><em>Bioconductor</em> versions are linked to their release announcement (when
available). Release announcements summarize new package additions,
updates to existing packages, and package removals.</p>
<table>
<thead>
<tr>
<th style="text-align: left">Release</th>
<th style="text-align: left">Date</th>
<th style="text-align: right">Software packages</th>
<th style="text-align: left">R</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_17_release">3.17</a></td>
<td style="text-align: left">April 26, 2023</td>
<td style="text-align: right"><a href="/packages/3.17/">2230</a></td>
<td style="text-align: left">4.3</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_16_release">3.16</a></td>
<td style="text-align: left">November 2, 2022</td>
<td style="text-align: right"><a href="/packages/3.16/">2183</a></td>
<td style="text-align: left">4.2</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_15_release">3.15</a></td>
<td style="text-align: left">April 27, 2022</td>
<td style="text-align: right"><a href="/packages/3.15/">2140</a></td>
<td style="text-align: left">4.2</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_14_release">3.14</a></td>
<td style="text-align: left">October 27, 2021</td>
<td style="text-align: right"><a href="/packages/3.14/">2083</a></td>
<td style="text-align: left">4.1</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_13_release">3.13</a></td>
<td style="text-align: left">May 20, 2021</td>
<td style="text-align: right"><a href="/packages/3.13/">2042</a></td>
<td style="text-align: left">4.1</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_12_release">3.12</a></td>
<td style="text-align: left">October 28, 2020</td>
<td style="text-align: right"><a href="/packages/3.12/">1974</a></td>
<td style="text-align: left">4.0</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_11_release">3.11</a></td>
<td style="text-align: left">April 28, 2020</td>
<td style="text-align: right"><a href="/packages/3.11/">1903</a></td>
<td style="text-align: left">4.0</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_10_release">3.10</a></td>
<td style="text-align: left">October 30, 2019</td>
<td style="text-align: right"><a href="/packages/3.10/">1823</a></td>
<td style="text-align: left">3.6</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_9_release">3.9</a></td>
<td style="text-align: left">May 3, 2019</td>
<td style="text-align: right"><a href="/packages/3.9/">1741</a></td>
<td style="text-align: left">3.6</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_8_release">3.8</a></td>
<td style="text-align: left">October 31, 2018</td>
<td style="text-align: right"><a href="/packages/3.8/">1649</a></td>
<td style="text-align: left">3.5</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_7_release">3.7</a></td>
<td style="text-align: left">May 1, 2018</td>
<td style="text-align: right"><a href="/packages/3.7/">1560</a></td>
<td style="text-align: left">3.5</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_6_release">3.6</a></td>
<td style="text-align: left">October 31, 2017</td>
<td style="text-align: right"><a href="/packages/3.6/">1473</a></td>
<td style="text-align: left">3.4</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_5_release">3.5</a></td>
<td style="text-align: left">April 25, 2017</td>
<td style="text-align: right"><a href="/packages/3.5/">1383</a></td>
<td style="text-align: left">3.4</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_4_release">3.4</a></td>
<td style="text-align: left">October 18, 2016</td>
<td style="text-align: right"><a href="/packages/3.4/">1296</a></td>
<td style="text-align: left">3.3</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_3_release">3.3</a></td>
<td style="text-align: left">May 4, 2016</td>
<td style="text-align: right"><a href="/packages/3.3/">1211</a></td>
<td style="text-align: left">3.3</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_2_release">3.2</a></td>
<td style="text-align: left">October 14, 2015</td>
<td style="text-align: right"><a href="/packages/3.2/">1104</a></td>
<td style="text-align: left">3.2</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_1_release">3.1</a></td>
<td style="text-align: left">April 17, 2015</td>
<td style="text-align: right"><a href="/packages/3.1/">1024</a></td>
<td style="text-align: left">3.2</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_3_0_release">3.0</a></td>
<td style="text-align: left">October 14, 2014</td>
<td style="text-align: right"><a href="/packages/3.0/">934</a></td>
<td style="text-align: left">3.1</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_14_release">2.14</a></td>
<td style="text-align: left">April 14, 2014</td>
<td style="text-align: right"><a href="/packages/2.14/">824</a></td>
<td style="text-align: left">3.1</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_13_release">2.13</a></td>
<td style="text-align: left">October 15, 2013</td>
<td style="text-align: right"><a href="/packages/2.13/">749</a></td>
<td style="text-align: left">3.0</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_12_release">2.12</a></td>
<td style="text-align: left">April 4, 2013</td>
<td style="text-align: right"><a href="/packages/2.12/">671</a></td>
<td style="text-align: left">3.0</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_11_release">2.11</a></td>
<td style="text-align: left">October 3, 2012</td>
<td style="text-align: right"><a href="/packages/2.11/">610</a></td>
<td style="text-align: left">2.15</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_10_release">2.10</a></td>
<td style="text-align: left">April 2, 2012</td>
<td style="text-align: right"><a href="/packages/2.10/">554</a></td>
<td style="text-align: left">2.15</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_9_release">2.9</a></td>
<td style="text-align: left">November 1, 2011</td>
<td style="text-align: right"><a href="/packages/2.9/">517</a></td>
<td style="text-align: left">2.14</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_8_release">2.8</a></td>
<td style="text-align: left">April 14, 2011</td>
<td style="text-align: right"><a href="/packages/2.8/">466</a></td>
<td style="text-align: left">2.13</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_7_release">2.7</a></td>
<td style="text-align: left">October 18, 2010</td>
<td style="text-align: right"><a href="/packages/2.7/">418</a></td>
<td style="text-align: left">2.12</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_6_release">2.6</a></td>
<td style="text-align: left">April 23, 2010</td>
<td style="text-align: right"><a href="/packages/2.6/">389</a></td>
<td style="text-align: left">2.11</td>
</tr>
<tr>
<td style="text-align: left"><a href="/news/bioc_2_5_release">2.5</a></td>
<td style="text-align: left">October 28, 2009</td>
<td style="text-align: right"><a href="/packages/2.5/">352</a></td>
<td style="text-align: left">2.10</td>
</tr>
<tr>
<td style="text-align: left">2.4</td>
<td style="text-align: left">April 21, 2009</td>
<td style="text-align: right"><a href="/packages/2.4/BiocViews.html">320</a></td>
<td style="text-align: left">2.9</td>
</tr>
<tr>
<td style="text-align: left">2.3</td>
<td style="text-align: left">October 22, 2008</td>
<td style="text-align: right"><a href="/packages/2.3/BiocViews.html">294</a></td>
<td style="text-align: left">2.8</td>
</tr>
<tr>
<td style="text-align: left">2.2</td>
<td style="text-align: left">May 1, 2008</td>
<td style="text-align: right"><a href="/packages/2.2/BiocViews.html">260</a></td>
<td style="text-align: left">2.7</td>
</tr>
<tr>
<td style="text-align: left">2.1</td>
<td style="text-align: left">October 8, 2007</td>
<td style="text-align: right"><a href="/packages/2.1/BiocViews.html">233</a></td>
<td style="text-align: left">2.6</td>
</tr>
<tr>
<td style="text-align: left">2.0</td>
<td style="text-align: left">April 26, 2007</td>
<td style="text-align: right"><a href="/packages/2.0/BiocViews.html">214</a></td>
<td style="text-align: left">2.5</td>
</tr>
<tr>
<td style="text-align: left">1.9</td>
<td style="text-align: left">October 4, 2006</td>
<td style="text-align: right"><a href="/packages/1.9/BiocViews.html">188</a></td>
<td style="text-align: left">2.4</td>
</tr>
<tr>
<td style="text-align: left">1.8</td>
<td style="text-align: left">April 27, 2006</td>
<td style="text-align: right"><a href="/packages/1.8/BiocViews.html">172</a></td>
<td style="text-align: left">2.3</td>
</tr>
<tr>
<td style="text-align: left">1.7</td>
<td style="text-align: left">October 14, 2005</td>
<td style="text-align: right"><a href="/packages/bioc/1.7/src/contrib/html/">141</a></td>
<td style="text-align: left">2.2</td>
</tr>
<tr>
<td style="text-align: left">1.6</td>
<td style="text-align: left">May 18, 2005</td>
<td style="text-align: right"><a href="/packages/bioc/1.6/src/contrib/html/">123</a></td>
<td style="text-align: left">2.1</td>
</tr>
<tr>
<td style="text-align: left">1.5</td>
<td style="text-align: left">October 25, 2004</td>
<td style="text-align: right"><a href="/packages/bioc/1.5/src/contrib/html/">100</a></td>
<td style="text-align: left">2.0</td>
</tr>
<tr>
<td style="text-align: left">1.4</td>
<td style="text-align: left">May 17, 2004</td>
<td style="text-align: right">81</td>
<td style="text-align: left">1.9</td>
</tr>
<tr>
<td style="text-align: left">1.3</td>
<td style="text-align: left">October 30, 2003</td>
<td style="text-align: right">49</td>
<td style="text-align: left">1.8</td>
</tr>
<tr>
<td style="text-align: left">1.2</td>
<td style="text-align: left">May 29, 2003</td>
<td style="text-align: right">30</td>
<td style="text-align: left">1.7</td>
</tr>
<tr>
<td style="text-align: left">1.1</td>
<td style="text-align: left">November 19, 2002</td>
<td style="text-align: right">20</td>
<td style="text-align: left">1.6</td>
</tr>
<tr>
<td style="text-align: left">1.0</td>
<td style="text-align: left">May 1, 2002</td>
<td style="text-align: right">15</td>
<td style="text-align: left">1.5</td>
</tr>
</tbody>
</table>
</div>
<div id="RightRail" class="RightRail">
<a name="section-navigation"></a> <!-- accessibility anchor -->
<ul class="section_nav">
<li><a href="/about/community-advisory-board/">Advisory Board -- Community</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/scientific-advisory-board/">Advisory Board -- Scientific</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/technical-advisory-board/">Advisory Board -- Technical</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/annual-reports/">Annual Reports</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/awards/">BiocAwards</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/code-of-conduct/">Code of Conduct Policy</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/core-team/">Core Team</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/european-bioconductor-society/">European Bioconductor Society</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/logo/">Logos</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/mirrors/">Mirrors</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/package-reviewers/">Package Reviewers</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/related-projects/">Related Projects</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/release-announcements/">Release Announcements</a></li>
</ul>
<ul class="section_nav">
<li><a href="/about/removed-packages/">Removed Packages</a></li>
</ul>
</div>
<div id="SiteGlobalFooter" class="SiteGlobalFooter">
<span class="global_footer_logo_bc noprint"><img src="/images/logo_global_footer_bioconductor.gif" height="51" width="154" alt=""/></span>
<div class="attribution_copyright">
<p>Contact us: <a href="https://support.bioconductor.org/">support.bioconductor.org</a></p>
<p>Copyright &copy; 2003 - 2023, Bioconductor</p>
</div>
<a name="site-map"></a> <!-- accessibility anchor -->
<ul class="footer_nav_list noprint">
<li class="footer_nav_list_element footer_nav_list_element_0">
<b><a href="/index.html">Home</a></b>
</li>
<li class="footer_nav_list_element footer_nav_list_element_1">
<ul>
<li><b><a href="/install/index.html">Install</a></b></li>
<li><a href="/install/index.html#install-R">Install R</a></li>
<li><a href="/install/index.html#find-bioconductor-packages">Find Bioconductor Packages</a></li>
<li><a href="/install/index.html#install-bioconductor-packages">Install Bioconductor Packages</a></li>
<li><a href="/install/index.html#update-bioconductor-packages">Update Bioconductor Packages</a></li>
</ul>
</li>
<li class="footer_nav_list_element footer_nav_list_element_2">
<ul>
<li><b><a href="/help/index.html">Help</a></b></li>
<li><a href="/packages/release/workflows/">Workflows</a></li>
<li><a href="/help/package-vignettes/">Package Vignettes</a></li>
<li><a href="/help/faq/">FAQ</a></li>
<li><a href="/help/support/">Support</a></li>
<li><a href="http://cran.r-project.org/">Using R</a></li>
<li><a href="/help/course-materials/">Courses</a></li>
<li><a href="/help/publications/">Publications</a></li>
<li><a href="/help/docker/">Docker Images</a></li>
<li><a href="https://anvil.bioconductor.org/">Bioc in AnVIL</a></li>
<li><a href="/help/community/">Community Resources</a></li>
</ul>
</li>
<li class="footer_nav_list_element footer_nav_list_element_3">
<ul>
<li><b><a href="/developers/index.html">Developers</a></b></li>
<li><a href="https://contributions.bioconductor.org/develop-overview.html">Package Guidelines</a></li>
<li><a href="https://contributions.bioconductor.org/submission-overview.html">Package Submission</a></li>
<li><a href="/developers/release-schedule/">Release Schedule</a></li>
<li><a href="https://contributions.bioconductor.org/git-version-control.html">Source Control</a></li>
</ul>
</li>
<li class="footer_nav_list_element footer_nav_list_element_4">
<ul>
<li><b><a href="/about/index.html">About</a></b></li>
<li><a href="/about/annual-reports/">Annual Reports</a></li>
<li><a href="/about/core-team/">Core Team</a></li>
<li><a href="/about/mirrors/">Mirrors</a></li>
<li><a href="/about/related-projects/">Related Projects</a></li>
<li><a href="/about/code-of-conduct/">Code of Conduct</a></li>
</ul>
</li>
</ul>
<br style="clear:both"/>
&nbsp;
</div>
<div id="SiteMasthead" class="SiteMasthead">
<a name="site-navigation"></a> <!-- accessibility anchor -->
<span class="logo_vanity_bar noprint"></span>
<a href="/">
<img src="/images/logo_bioconductor.gif" border="0" class="masthead_logo" height="78" width="260" alt="Bioconductor - open source software for bioinformatics"/>
</a>
<div id="SiteMastheadRight" class="SiteMastheadRight">
<div id="SiteMastheadRightBackground" class="SiteMastheadRightBackground">
<a name="site-search"></a> <!-- accessibility anchor -->
<form class="site_search" id="search_form" method="GET"
action="/help/search/index.html">
Search: <input id="q" name="q" />
</form>
<ul class="masthead_nav noprint">
<li class="masthead_nav_element" id="masthead_nav_element_1">
<a href="/">Home</a>
</li>
<li class="masthead_nav_element" id="masthead_nav_element_2">
<a href="/install/">Install</a>
</li>
<li class="masthead_nav_element" id="masthead_nav_element_3">
<a href="/help/">Help</a>
</li>
<li class="masthead_nav_element" id="masthead_nav_element_4">
<a href="/developers/">Developers</a>
</li>
<li class="masthead_nav_element" id="masthead_nav_element_5">
<a href="/about/">About</a>
</li>
</ul>
</div>
</div>
</div>
</div>
</body>
</html>

View file

@ -0,0 +1,501 @@
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from pathlib import Path
from unittest.mock import Mock
import pytest
from requests_mock.mocker import Mocker as RequestsMocker
from swh.lister.bioconductor.lister import BioconductorLister
from swh.scheduler.interface import SchedulerInterface
BIOCONDUCTOR_URL = "https://www.bioconductor.org"
@pytest.fixture
def packages_json1(datadir):
text = Path(
datadir, "https_bioconductor.org", "3.17-bioc-packages.json"
).read_text()
return text, {}
@pytest.fixture
def packages_json2(datadir):
text = Path(
datadir, "https_bioconductor.org", "3.17-workflows-packages.json"
).read_text()
return text, {}
@pytest.fixture
def packages_txt1(datadir):
text = Path(datadir, "https_bioconductor.org", "1.17-PACKAGES").read_text()
return text, {}
@pytest.fixture
def packages_txt2(datadir):
text = Path(datadir, "https_bioconductor.org", "2.2-PACKAGES").read_text()
return text, {}
@pytest.fixture(autouse=True)
def mock_release_announcements(datadir, requests_mock):
text = Path(
datadir, "https_bioconductor.org", "about", "release-announcements"
).read_text()
requests_mock.get(
"https://www.bioconductor.org/about/release-announcements",
text=text,
headers={},
)
def test_bioconductor_incremental_listing(
swh_scheduler, requests_mock, mocker, packages_json1, packages_json2
):
kwargs = dict()
lister = BioconductorLister(
scheduler=swh_scheduler,
releases=["3.17"],
categories=["bioc", "workflows"],
incremental=True,
**kwargs,
)
assert lister.url == BIOCONDUCTOR_URL
lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
for category, packages_json in [
("bioc", packages_json1),
("workflows", packages_json2),
]:
text, headers = packages_json
requests_mock.get(
(
"https://www.bioconductor.org/packages/"
f"json/3.17/{category}/packages.json"
),
text=text,
headers=headers,
)
status = lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
lister_state = lister.get_state_from_scheduler()
assert status.pages == 3 # 2 categories for 3.17 + 1 None page
# annotation pkg origin is in 2 categories
# and we collect git origins as well
assert status.origins == 6
assert lister.get_origins_from_page.call_count == 3
assert [o.url for o in scheduler_origins] == [
"https://git.bioconductor.org/packages/annotation",
"https://git.bioconductor.org/packages/arrays",
"https://git.bioconductor.org/packages/variants",
"https://www.bioconductor.org/packages/annotation",
"https://www.bioconductor.org/packages/arrays",
"https://www.bioconductor.org/packages/variants",
]
assert [
o.extra_loader_arguments["packages"]
for o in scheduler_origins
if "packages" in o.extra_loader_arguments
] == [
{
"3.17/bioc/1.24.1": {
"package": "annotation",
"release": "3.17",
"tar_url": (
"https://www.bioconductor.org/packages/3.17/"
"bioc/src/contrib/annotation_1.24.1.tar.gz"
),
"version": "1.24.1",
"category": "bioc",
"checksums": {"md5": "4cb4db8807acb2e164985636091faa93"},
"last_update_date": "2023-06-30",
},
"3.17/workflows/1.24.1": {
"package": "annotation",
"release": "3.17",
"tar_url": (
"https://www.bioconductor.org/packages/3.17/"
"workflows/src/contrib/annotation_1.24.1.tar.gz"
),
"version": "1.24.1",
"category": "workflows",
"checksums": {"md5": "4cb4db8807acb2e164985636091faa93"},
"last_update_date": "2023-06-30",
},
},
{
"3.17/workflows/1.26.0": {
"package": "arrays",
"release": "3.17",
"tar_url": (
"https://www.bioconductor.org/packages/3.17/"
"workflows/src/contrib/arrays_1.26.0.tar.gz"
),
"version": "1.26.0",
"category": "workflows",
"checksums": {"md5": "009ef917ebc047246b8c62c48e02a237"},
"last_update_date": "2023-04-28",
}
},
{
"3.17/bioc/1.24.0": {
"package": "variants",
"release": "3.17",
"tar_url": (
"https://www.bioconductor.org/packages/3.17/"
"bioc/src/contrib/variants_1.24.0.tar.gz"
),
"version": "1.24.0",
"category": "bioc",
"checksums": {"md5": "38f2c00b73e1a695f5ef4c9b4a728923"},
"last_update_date": "2023-04-28",
}
},
]
assert lister_state.package_versions == {
"annotation": {"3.17/workflows/1.24.1", "3.17/bioc/1.24.1"},
"arrays": {"3.17/workflows/1.26.0"},
"variants": {"3.17/bioc/1.24.0"},
}
@pytest.mark.parametrize("status_code", [400, 500, 404])
def test_bioconductor_lister_http_error(
swh_scheduler: SchedulerInterface,
requests_mock: RequestsMocker,
packages_json1,
status_code: int,
):
"""
Simulates handling of HTTP Errors while fetching of packages for bioconductor releases.
"""
releases = ["3.8"]
categories = ["workflows", "bioc"]
requests_mock.get(
"https://www.bioconductor.org/packages/json/3.8/workflows/packages.json",
status_code=status_code,
text="Something went wrong",
)
text, headers = packages_json1
requests_mock.get(
"https://www.bioconductor.org/packages/json/3.8/bioc/packages.json",
text=text,
headers=headers,
)
lister = BioconductorLister(
scheduler=swh_scheduler,
releases=releases,
categories=categories,
incremental=True,
)
# On facing HTTP errors, it should continue
# to crawl other releases/categories
stats = lister.run()
# 1 None page + 3.8 bioc page
assert stats.pages == 2
# Both packages have git and bioconductor urls.
assert stats.origins == 4
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert [o.url for o in scheduler_origins] == [
"https://git.bioconductor.org/packages/annotation",
"https://git.bioconductor.org/packages/variants",
"https://www.bioconductor.org/packages/annotation",
"https://www.bioconductor.org/packages/variants",
]
assert [
o.extra_loader_arguments["packages"]
for o in scheduler_origins
if "packages" in o.extra_loader_arguments
] == [
{
"3.8/bioc/1.24.1": {
"package": "annotation",
"release": "3.8",
"tar_url": (
"https://www.bioconductor.org/packages/"
"3.8/bioc/src/contrib/annotation_1.24.1.tar.gz"
),
"version": "1.24.1",
"category": "bioc",
"checksums": {"md5": "4cb4db8807acb2e164985636091faa93"},
"last_update_date": "2023-06-30",
}
},
{
"3.8/bioc/1.24.0": {
"package": "variants",
"release": "3.8",
"tar_url": (
"https://www.bioconductor.org/packages/"
"3.8/bioc/src/contrib/variants_1.24.0.tar.gz"
),
"version": "1.24.0",
"category": "bioc",
"checksums": {"md5": "38f2c00b73e1a695f5ef4c9b4a728923"},
"last_update_date": "2023-04-28",
}
},
]
lister_state = lister.get_state_from_scheduler()
assert lister_state.package_versions == {
"annotation": {"3.8/bioc/1.24.1"},
"variants": {"3.8/bioc/1.24.0"},
}
def test_bioconductor_fetch_versions(swh_scheduler: SchedulerInterface):
lister = BioconductorLister(scheduler=swh_scheduler)
assert lister.releases == [
"1.5",
"1.6",
"1.7",
"1.8",
"1.9",
"2.0",
"2.1",
"2.2",
"2.3",
"2.4",
"2.5",
"2.6",
"2.7",
"2.8",
"2.9",
"2.10",
"2.11",
"2.12",
"2.13",
"2.14",
"3.0",
"3.1",
"3.2",
"3.3",
"3.4",
"3.5",
"3.6",
"3.7",
"3.8",
"3.9",
"3.10",
"3.11",
"3.12",
"3.13",
"3.14",
"3.15",
"3.16",
"3.17",
]
def test_bioconductor_lister_parse_packages_txt(
swh_scheduler: SchedulerInterface, packages_json1, packages_txt1
):
lister = BioconductorLister(
scheduler=swh_scheduler, releases=["3.8"], categories=["bioc"]
)
text, _ = packages_json1
res = lister.parse_packages(text)
assert {
pkg_name: pkg_metadata["Version"] for pkg_name, pkg_metadata in res.items()
} == {"annotation": "1.24.1", "maEndToEnd": "2.20.0", "variants": "1.24.0"}
text, _ = packages_txt1
res = lister.parse_packages(text)
assert res == {
"affylmGUI": {
"Package": "affylmGUI",
"Version": "1.4.0",
"Depends": "limma, tcltk, affy",
"Suggests": "tkrplot, affyPLM, R2HTML, xtable",
},
"affypdnn": {
"Package": "affypdnn",
"Version": "1.4.0",
"Depends": "R (>= 1.9.0), affy (>= 1.5), affydata, hgu95av2probe",
},
"affyPLM": {
"Package": "affyPLM",
"Version": "1.6.0",
"Depends": (
"R (>= 2.0.0), affy (>= 1.5.0), affydata, "
"Biobase, methods,\n gcrma"
),
},
}
def test_bioconductor_lister_old_releases(
swh_scheduler, mocker, requests_mock, packages_txt1, packages_txt2
):
releases = ["1.7"]
categories = ["workflows", "bioc"]
text, headers = packages_txt1
requests_mock.get(
("https://www.bioconductor.org/packages/" "bioc/1.7/src/contrib/PACKAGES"),
text=text,
headers=headers,
)
text, headers = packages_txt2
requests_mock.get(
"/packages/2.2/bioc/src/contrib/PACKAGES",
text=text,
headers=headers,
)
requests_mock.get(
"/packages/2.2/data/experiment/src/contrib/PACKAGES", status_code=404
)
requests_mock.get(
"/packages/2.2/data/annotation/src/contrib/PACKAGES", status_code=404
)
lister = BioconductorLister(
scheduler=swh_scheduler,
releases=releases,
categories=categories,
incremental=True,
)
lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
stats = lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
lister_state = lister.get_state_from_scheduler()
assert stats.pages == 2 # 1.7 'bioc' + None page
assert stats.origins == 3
assert lister.get_origins_from_page.call_count == 2
expected_origins = [
"https://www.bioconductor.org/packages/affyPLM",
"https://www.bioconductor.org/packages/affylmGUI",
"https://www.bioconductor.org/packages/affypdnn",
]
assert [o.url for o in scheduler_origins] == expected_origins
expected_loader_packages = [
{
"1.7/bioc/1.6.0": {
"package": "affyPLM",
"release": "1.7",
"tar_url": (
"https://www.bioconductor.org/packages/"
"bioc/1.7/src/contrib/Source/affyPLM_1.6.0.tar.gz"
),
"version": "1.6.0",
"category": "bioc",
}
},
{
"1.7/bioc/1.4.0": {
"package": "affylmGUI",
"release": "1.7",
"tar_url": (
"https://www.bioconductor.org/packages/"
"bioc/1.7/src/contrib/Source/affylmGUI_1.4.0.tar.gz"
),
"version": "1.4.0",
"category": "bioc",
}
},
{
"1.7/bioc/1.4.0": {
"package": "affypdnn",
"release": "1.7",
"tar_url": (
"https://www.bioconductor.org/packages/"
"bioc/1.7/src/contrib/Source/affypdnn_1.4.0.tar.gz"
),
"version": "1.4.0",
"category": "bioc",
}
},
]
assert [
o.extra_loader_arguments["packages"] for o in scheduler_origins
] == expected_loader_packages
assert lister_state.package_versions == {
"affyPLM": {"1.7/bioc/1.6.0"},
"affylmGUI": {"1.7/bioc/1.4.0"},
"affypdnn": {"1.7/bioc/1.4.0"},
}
releases.append("2.2")
lister = BioconductorLister(
scheduler=swh_scheduler, releases=releases, categories=categories
)
stats = lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
lister_state = lister.get_state_from_scheduler()
expected_origins = [
"https://www.bioconductor.org/packages/ABarray",
"https://www.bioconductor.org/packages/AnnotationDbi",
] + expected_origins
assert [o.url for o in scheduler_origins] == expected_origins
expected_loader_packages = [
{
"2.2/bioc/1.8.0": {
"package": "ABarray",
"release": "2.2",
"tar_url": (
"https://www.bioconductor.org/packages/"
"2.2/bioc/src/contrib/ABarray_1.8.0.tar.gz"
),
"version": "1.8.0",
"category": "bioc",
}
},
{
"2.2/bioc/1.2.2": {
"package": "AnnotationDbi",
"release": "2.2",
"tar_url": (
"https://www.bioconductor.org/packages/"
"2.2/bioc/src/contrib/AnnotationDbi_1.2.2.tar.gz"
),
"version": "1.2.2",
"category": "bioc",
}
},
] + expected_loader_packages
assert [
o.extra_loader_arguments["packages"] for o in scheduler_origins
] == expected_loader_packages
assert lister_state.package_versions == {
"affyPLM": {"1.7/bioc/1.6.0"},
"affypdnn": {"1.7/bioc/1.4.0"},
"affylmGUI": {"1.7/bioc/1.4.0"},
}

View file

@ -0,0 +1,81 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from unittest.mock import patch
from swh.lister.pattern import ListerStats
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.bioconductor.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
@patch("swh.lister.bioconductor.tasks.BioconductorLister")
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(url="https://www.bioconductor.org")
res = swh_scheduler_celery_app.send_task(
"swh.lister.bioconductor.tasks.BioconductorListerTask",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()
@patch("swh.lister.bioconductor.tasks.BioconductorLister")
def test_incremental_listing(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(url="https://www.bioconductor.org")
res = swh_scheduler_celery_app.send_task(
"swh.lister.bioconductor.tasks.BioconductorIncrementalListerTask",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
kwargs["incremental"] = True
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()
@patch("swh.lister.bioconductor.tasks.BioconductorLister")
def test_full_listing_with_params(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(
url="https://www.bioconductor.org",
instance="bioconductor-test",
releases=["3.7"],
categories=["bioc", "workflows"],
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.bioconductor.tasks.BioconductorListerTask",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()