Reimplement Bitbucket lister using new Lister API

The new lister has incremental and full listing capability.
It can request the Bitbucket API in anonymous and HTTP basic authentication
modes. Rate-limiting is not aggressive and is handled.
This commit is contained in:
tenma 2021-01-13 15:44:07 +01:00
parent 9fd91f007d
commit 565e7423e3
13 changed files with 627 additions and 1089 deletions

View file

@ -1,4 +1,5 @@
pytest
pytest-mock
requests_mock
sqlalchemy-stubs
testing.postgresql

View file

@ -1,14 +1,13 @@
# Copyright (C) 2019 the Software Heritage developers
# Copyright (C) 2019-2021 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def register():
from .lister import BitBucketLister
from .models import BitBucketModel
from .lister import BitbucketLister
return {
"models": [BitBucketModel],
"lister": BitBucketLister,
"models": [],
"lister": BitbucketLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -1,85 +1,201 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timezone
from dataclasses import asdict, dataclass
from datetime import datetime
import logging
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Iterator, List, Optional
from urllib import parse
import iso8601
from requests import Response
import requests
from tenacity.before_sleep import before_sleep_log
from swh.lister.bitbucket.models import BitBucketModel
from swh.lister.core.indexing_lister import IndexingHttpLister
from swh.lister.utils import throttling_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
from ..pattern import CredentialsType, Lister
logger = logging.getLogger(__name__)
class BitBucketLister(IndexingHttpLister):
PATH_TEMPLATE = "/repositories?after=%s"
MODEL = BitBucketModel
@dataclass
class BitbucketListerState:
"""State of Bitbucket lister"""
last_repo_cdate: Optional[datetime] = None
"""Creation date and time of the last listed repository during an
incremental pass"""
class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
"""List origins from Bitbucket using its REST API.
Bitbucket API has the following rate-limit configuration:
* 60 requests per hour for anonymous users
* 1000 requests per hour for authenticated users
The lister is working in anonymous mode by default but Bitbucket account
credentials can be provided to perform authenticated requests.
"""
LISTER_NAME = "bitbucket"
DEFAULT_URL = "https://api.bitbucket.org/2.0"
instance = "bitbucket"
default_min_bound = datetime.fromtimestamp(0, timezone.utc) # type: Any
INSTANCE = "bitbucket"
API_URL = "https://api.bitbucket.org/2.0/repositories"
def __init__(
self, url: str = None, override_config=None, per_page: int = 100
) -> None:
super().__init__(url=url, override_config=override_config)
per_page = self.config.get("per_page", per_page)
self,
scheduler: SchedulerInterface,
page_size: int = 1000,
incremental: bool = True,
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=self.API_URL,
instance=self.INSTANCE,
)
self.PATH_TEMPLATE = "%s&pagelen=%s" % (self.PATH_TEMPLATE, per_page)
self.incremental = incremental
def get_model_from_repo(self, repo: Dict) -> Dict[str, Any]:
return {
"uid": repo["uuid"],
"indexable": iso8601.parse_date(repo["created_on"]),
"name": repo["name"],
"full_name": repo["full_name"],
"html_url": repo["links"]["html"]["href"],
"origin_url": repo["links"]["clone"][0]["href"],
"origin_type": repo["scm"],
self.url_params = {
"pagelen": page_size,
# only return needed JSON fields in bitbucket API responses
# (also prevent errors 500 when listing)
"fields": (
"next,values.links.clone.href,values.scm,values.updated_on,"
"values.created_on"
),
}
def get_next_target_from_response(self, response: Response) -> Optional[datetime]:
"""This will read the 'next' link from the api response if any
and return it as a datetime.
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT}
)
Args:
response (Response): requests' response from api call
if len(self.credentials) > 0:
if len(self.credentials) > 1:
logger.warning(
"Bitbucket lister support only one username:password"
" pair as of now. Will use the first one."
)
cred = self.credentials[0]
self.set_credentials(cred["username"], cred["password"])
Returns:
next date as a datetime
def state_from_dict(self, d: Dict[str, Any]) -> BitbucketListerState:
last_repo_cdate = d.get("last_repo_cdate")
if last_repo_cdate is not None:
d["last_repo_cdate"] = iso8601.parse_date(last_repo_cdate)
return BitbucketListerState(**d)
def state_to_dict(self, state: BitbucketListerState) -> Dict[str, Any]:
d = asdict(state)
last_repo_cdate = d.get("last_repo_cdate")
if last_repo_cdate is not None:
d["last_repo_cdate"] = last_repo_cdate.isoformat()
return d
def set_credentials(self, username: Optional[str], password: Optional[str]) -> None:
"""Set basic authentication headers with given credentials."""
if username is not None and password is not None:
self.session.auth = (username, password)
@throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
def page_request(self, last_repo_cdate: str) -> requests.Response:
self.url_params["after"] = last_repo_cdate
logger.debug("Fetching URL %s with params %s", self.url, self.url_params)
response = self.session.get(self.url, params=self.url_params)
if response.status_code != 200:
logger.warning(
"Unexpected HTTP status code %s on %s: %s",
response.status_code,
response.url,
response.content,
)
response.raise_for_status()
return response
def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
last_repo_cdate: str = "1970-01-01"
if (
self.incremental
and self.state is not None
and self.state.last_repo_cdate is not None
):
last_repo_cdate = self.state.last_repo_cdate.isoformat()
while True:
body = self.page_request(last_repo_cdate).json()
yield body["values"]
next_page_url = body.get("next")
if next_page_url is not None:
next_page_url = parse.urlparse(next_page_url)
if not next_page_url.query:
logger.warning("Failed to parse url %s", next_page_url)
break
last_repo_cdate = parse.parse_qs(next_page_url.query)["after"][0]
else:
# last page
break
def get_origins_from_page(
self, page: List[Dict[str, Any]]
) -> Iterator[ListedOrigin]:
"""Convert a page of Bitbucket repositories into a list of ListedOrigins.
"""
body = response.json()
next_ = body.get("next")
if next_ is not None:
next_ = parse.urlparse(next_)
return iso8601.parse_date(parse.parse_qs(next_.query)["after"][0])
return None
assert self.lister_obj.id is not None
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
repos = response.json()["values"]
return [self.get_model_from_repo(repo) for repo in repos]
for repo in page:
last_update = iso8601.parse_date(repo["updated_on"])
origin_url = repo["links"]["clone"][0]["href"]
origin_type = repo["scm"]
def request_uri(self, identifier: datetime) -> str: # type: ignore
identifier_str = parse.quote(identifier.isoformat())
return super().request_uri(identifier_str or "1970-01-01")
yield ListedOrigin(
lister_id=self.lister_obj.id,
url=origin_url,
visit_type=origin_type,
last_update=last_update,
)
def is_within_bounds(
self, inner: int, lower: Optional[int] = None, upper: Optional[int] = None
) -> bool:
# values are expected to be datetimes
if lower is None and upper is None:
ret = True
elif lower is None:
ret = inner <= upper # type: ignore
elif upper is None:
ret = inner >= lower
else:
ret = lower <= inner <= upper
return ret
def commit_page(self, page: List[Dict[str, Any]]) -> None:
"""Update the currently stored state using the latest listed page."""
if self.incremental:
last_repo = page[-1]
last_repo_cdate = iso8601.parse_date(last_repo["created_on"])
if (
self.state.last_repo_cdate is None
or last_repo_cdate > self.state.last_repo_cdate
):
self.state.last_repo_cdate = last_repo_cdate
def finalize(self) -> None:
if self.incremental:
scheduler_state = self.get_state_from_scheduler()
if self.state.last_repo_cdate is None:
return
# Update the lister state in the backend only if the last seen id of
# the current run is higher than that stored in the database.
if (
scheduler_state.last_repo_cdate is None
or self.state.last_repo_cdate > scheduler_state.last_repo_cdate
):
self.updated = True

View file

@ -1,16 +0,0 @@
# Copyright (C) 2017-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, DateTime, String
from swh.lister.core.models import IndexingModelBase
class BitBucketModel(IndexingModelBase):
"""a BitBucket repository"""
__tablename__ = "bitbucket_repo"
uid = Column(String, primary_key=True)
indexable = Column(DateTime(timezone=True), index=True)

View file

@ -1,53 +1,36 @@
# Copyright (C) 2017-2019 the Software Heritage developers
# Copyright (C) 2017-2021 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import random
from typing import Optional
from celery import group, shared_task
from celery import shared_task
from .lister import BitBucketLister
GROUP_SPLIT = 10000
from .lister import BitbucketLister
@shared_task(name=__name__ + ".IncrementalBitBucketLister")
def list_bitbucket_incremental(**lister_args):
"""Incremental update of the BitBucket forge"""
lister = BitBucketLister(**lister_args)
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
def list_bitbucket_incremental(
page_size: Optional[int] = None,
username: Optional[str] = None,
password: Optional[str] = None,
):
"""Incremental listing of the public Bitbucket repositories."""
lister = BitbucketLister.from_configfile(page_size=page_size, incremental=True)
lister.set_credentials(username, password)
return lister.run().dict()
@shared_task(name=__name__ + ".RangeBitBucketLister")
def _range_bitbucket_lister(start, end, **lister_args):
lister = BitBucketLister(**lister_args)
return lister.run(min_bound=start, max_bound=end)
@shared_task(name=__name__ + ".FullBitBucketRelister", bind=True)
def list_bitbucket_full(self, split=None, **lister_args):
"""Full update of the BitBucket forge
It's not to be called for an initial listing.
"""
lister = BitBucketLister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info("Nothing to list")
return
random.shuffle(ranges)
promise = group(
_range_bitbucket_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
)()
self.log.debug("%s OK (spawned %s subtasks)", (self.name, len(ranges)))
try:
promise.save() # so that we can restore the GroupResult in tests
except (NotImplementedError, AttributeError):
self.log.info("Unable to call save_group with current result backend.")
# FIXME: what to do in terms of return here?
return promise.id
@shared_task(name=__name__ + ".FullBitBucketRelister")
def list_bitbucket_full(
page_size: Optional[int] = None,
username: Optional[str] = None,
password: Optional[str] = None,
):
"""Full listing of the public Bitbucket repositories."""
lister = BitbucketLister.from_configfile(page_size=page_size, incremental=False)
lister.set_credentials(username, password)
return lister.run().dict()
@shared_task(name=__name__ + ".ping")

View file

@ -1,4 +1,4 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

View file

@ -0,0 +1,124 @@
{
"values": [{
"scm": "git",
"updated_on": "2014-11-16T23:19:16.674082+00:00",
"created_on": "2011-06-06T03:40:09.505792+00:00",
"slug": "xwork",
"links": {
"clone": [{
"href": "https://bitbucket.org/opensymphony/xwork.git"
}, {
"href": "git@bitbucket.org:opensymphony/xwork.git"
}]
}
}, {
"scm": "git",
"updated_on": "2013-08-16T05:17:12.385393+00:00",
"created_on": "2011-06-07T02:25:57.515877+00:00",
"slug": "webwork",
"links": {
"clone": [{
"href": "https://bitbucket.org/opensymphony/webwork.git"
}, {
"href": "git@bitbucket.org:opensymphony/webwork.git"
}]
}
}, {
"scm": "git",
"updated_on": "2017-02-05T20:25:16.398281+00:00",
"created_on": "2011-06-07T04:13:28.097554+00:00",
"slug": "propertyset",
"links": {
"clone": [{
"href": "https://bitbucket.org/opensymphony/propertyset.git"
}, {
"href": "git@bitbucket.org:opensymphony/propertyset.git"
}]
}
}, {
"scm": "git",
"updated_on": "2012-07-06T23:05:13.437602+00:00",
"created_on": "2011-06-07T04:15:47.909191+00:00",
"slug": "quartz",
"links": {
"clone": [{
"href": "https://bitbucket.org/opensymphony/quartz.git"
}, {
"href": "git@bitbucket.org:opensymphony/quartz.git"
}]
}
}, {
"scm": "git",
"updated_on": "2018-02-06T04:36:52.369420+00:00",
"created_on": "2011-06-16T09:16:27.957216+00:00",
"slug": "opup",
"links": {
"clone": [{
"href": "https://bitbucket.org/jwalton/opup.git"
}, {
"href": "git@bitbucket.org:jwalton/opup.git"
}]
}
}, {
"scm": "git",
"updated_on": "2017-03-19T16:09:30.336053+00:00",
"created_on": "2011-07-08T08:59:53.298617+00:00",
"slug": "git-scripts",
"links": {
"clone": [{
"href": "https://bitbucket.org/jwalton/git-scripts.git"
}, {
"href": "git@bitbucket.org:jwalton/git-scripts.git"
}]
}
}, {
"scm": "git",
"updated_on": "2015-10-15T17:35:06.978690+00:00",
"created_on": "2011-08-10T00:42:35.509559+00:00",
"slug": "git-tests",
"links": {
"clone": [{
"href": "https://bitbucket.org/evzijst/git-tests.git"
}, {
"href": "git@bitbucket.org:evzijst/git-tests.git"
}]
}
}, {
"scm": "git",
"updated_on": "2013-07-17T23:08:05.997544+00:00",
"created_on": "2011-08-10T03:48:05.820933+00:00",
"slug": "libgit2",
"links": {
"clone": [{
"href": "https://bitbucket.org/brodie/libgit2.git"
}, {
"href": "git@bitbucket.org:brodie/libgit2.git"
}]
}
}, {
"scm": "git",
"updated_on": "2013-10-10T23:43:15.183665+00:00",
"created_on": "2011-08-15T05:19:11.022316+00:00",
"slug": "git",
"links": {
"clone": [{
"href": "https://bitbucket.org/evzijst/git.git"
}, {
"href": "git@bitbucket.org:evzijst/git.git"
}]
}
}, {
"scm": "git",
"updated_on": "2013-06-12T22:42:52.654728+00:00",
"created_on": "2011-08-18T00:17:00.862842+00:00",
"slug": "streams-jira-delete-issue-plugin",
"links": {
"clone": [{
"href": "https://bitbucket.org/atlassian_tutorial/streams-jira-delete-issue-plugin.git"
}, {
"href": "git@bitbucket.org:atlassian_tutorial/streams-jira-delete-issue-plugin.git"
}]
}
}],
"next": "https://api.bitbucket.org/2.0/repositories?pagelen=10&after=2011-09-03T12%3A33%3A16.028393%2B00%3A00&fields=next%2Cvalues.links.clone.href%2Cvalues.slug%2Cvalues.scm%2Cvalues.updated_on%2Cvalues.created_on"
}

View file

@ -0,0 +1,123 @@
{
"values": [{
"scm": "git",
"updated_on": "2012-08-08T21:49:39.837528+00:00",
"created_on": "2011-09-03T12:33:16.028393+00:00",
"slug": "jreversepro",
"links": {
"clone": [{
"href": "https://bitbucket.org/puffnfresh/jreversepro.git"
}, {
"href": "git@bitbucket.org:puffnfresh/jreversepro.git"
}]
}
}, {
"scm": "git",
"updated_on": "2012-07-24T08:11:00.229299+00:00",
"created_on": "2011-09-08T01:43:21.182004+00:00",
"slug": "jira4-compat",
"links": {
"clone": [{
"href": "https://bitbucket.org/mrdon/jira4-compat.git"
}, {
"href": "git@bitbucket.org:mrdon/jira4-compat.git"
}]
}
}, {
"scm": "git",
"updated_on": "2019-03-29T16:07:35.214957+00:00",
"created_on": "2011-09-12T20:21:47.109184+00:00",
"slug": "chrome-confluence-activity-stream",
"links": {
"clone": [{
"href": "https://bitbucket.org/rmanalan/chrome-confluence-activity-stream.git"
}, {
"href": "git@bitbucket.org:rmanalan/chrome-confluence-activity-stream.git"
}]
}
}, {
"scm": "git",
"updated_on": "2014-03-31T14:30:43.850637+00:00",
"created_on": "2011-09-14T05:21:02.811713+00:00",
"slug": "anode",
"links": {
"clone": [{
"href": "https://bitbucket.org/tarkasteve/anode.git"
}, {
"href": "git@bitbucket.org:tarkasteve/anode.git"
}]
}
}, {
"scm": "git",
"updated_on": "2011-11-02T07:45:17.681629+00:00",
"created_on": "2011-09-20T04:27:56.852255+00:00",
"slug": "pac-release-plugin",
"links": {
"clone": [{
"href": "https://bitbucket.org/jschumacher/pac-release-plugin.git"
}, {
"href": "git@bitbucket.org:jschumacher/pac-release-plugin.git"
}]
}
}, {
"scm": "git",
"updated_on": "2012-07-27T00:54:30.098265+00:00",
"created_on": "2011-09-21T00:05:50.970472+00:00",
"slug": "metadata-confluence-plugin",
"links": {
"clone": [{
"href": "https://bitbucket.org/jwalton/metadata-confluence-plugin.git"
}, {
"href": "git@bitbucket.org:jwalton/metadata-confluence-plugin.git"
}]
}
}, {
"scm": "git",
"updated_on": "2014-01-18T05:28:48.832287+00:00",
"created_on": "2011-09-21T22:05:29.955410+00:00",
"slug": "coffee-script",
"links": {
"clone": [{
"href": "https://bitbucket.org/detkin/coffee-script.git"
}, {
"href": "git@bitbucket.org:detkin/coffee-script.git"
}]
}
}, {
"scm": "git",
"updated_on": "2012-07-17T23:32:25.879023+00:00",
"created_on": "2011-09-27T08:37:17.132670+00:00",
"slug": "taleo-link-fix",
"links": {
"clone": [{
"href": "https://bitbucket.org/christo/taleo-link-fix.git"
}, {
"href": "git@bitbucket.org:christo/taleo-link-fix.git"
}]
}
}, {
"scm": "git",
"updated_on": "2012-06-26T22:55:05.634860+00:00",
"created_on": "2011-09-27T21:10:47.586400+00:00",
"slug": "bdoc",
"links": {
"clone": [{
"href": "https://bitbucket.org/rmanalan/bdoc.git"
}, {
"href": "git@bitbucket.org:rmanalan/bdoc.git"
}]
}
}, {
"scm": "git",
"updated_on": "2020-04-20T18:16:50.540634+00:00",
"created_on": "2011-09-29T23:36:49.719055+00:00",
"slug": "git",
"links": {
"clone": [{
"href": "https://bitbucket.org/mirror/git.git"
}, {
"href": "git@bitbucket.org:mirror/git.git"
}]
}
}]
}

View file

@ -1,806 +0,0 @@
{
"pagelen": 10,
"values": [
{
"scm": "hg",
"website": "",
"has_wiki": true,
"name": "app-template",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/commits"
},
"clone": [
{
"href": "https://bitbucket.org/bebac/app-template",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/bebac/app-template",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template"
},
"html": {
"href": "https://bitbucket.org/bebac/app-template"
},
"avatar": {
"href": "https://bitbucket.org/bebac/app-template/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{0cf80a6e-e91f-4a4c-a61b-8c8ff51ca3ec}",
"language": "c++",
"created_on": "2008-07-12T07:44:01.476818+00:00",
"full_name": "bebac/app-template",
"has_issues": true,
"owner": {
"username": "bebac",
"display_name": "Benny Bach",
"type": "user",
"uuid": "{d1a83a2a-be1b-4034-8c1d-386a6690cddb}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/bebac"
},
"html": {
"href": "https://bitbucket.org/bebac/"
},
"avatar": {
"href": "https://bitbucket.org/account/bebac/avatar/32/"
}
}
},
"updated_on": "2011-10-05T15:36:19.409008+00:00",
"size": 71548,
"type": "repository",
"slug": "app-template",
"is_private": false,
"description": "Basic files and directory structure for a C++ project. Intended as a starting point for a new project. Includes a basic cross platform core library."
},
{
"scm": "git",
"website": "",
"has_wiki": true,
"name": "mercurialeclipse",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/commits"
},
"clone": [
{
"href": "https://bitbucket.org/bastiand/mercurialeclipse",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/bastiand/mercurialeclipse",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse"
},
"html": {
"href": "https://bitbucket.org/bastiand/mercurialeclipse"
},
"avatar": {
"href": "https://bitbucket.org/bastiand/mercurialeclipse/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{f7a08670-bdd1-4465-aa97-7a5ce8d1a25b}",
"language": "",
"created_on": "2008-07-12T09:37:06.254721+00:00",
"full_name": "bastiand/mercurialeclipse",
"has_issues": false,
"owner": {
"username": "bastiand",
"display_name": "Bastian Doetsch",
"type": "user",
"uuid": "{3742cd48-adad-4205-ab0d-04fc992a1728}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/bastiand"
},
"html": {
"href": "https://bitbucket.org/bastiand/"
},
"avatar": {
"href": "https://bitbucket.org/account/bastiand/avatar/32/"
}
}
},
"updated_on": "2011-09-17T02:36:59.062596+00:00",
"size": 6445145,
"type": "repository",
"slug": "mercurialeclipse",
"is_private": false,
"description": "my own repo for MercurialEclipse."
},
{
"scm": "hg",
"website": "",
"has_wiki": true,
"name": "sandboxpublic",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/commits"
},
"clone": [
{
"href": "https://bitbucket.org/aleax/sandboxpublic",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/aleax/sandboxpublic",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic"
},
"html": {
"href": "https://bitbucket.org/aleax/sandboxpublic"
},
"avatar": {
"href": "https://bitbucket.org/aleax/sandboxpublic/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{452c716c-a1ce-42bc-a95b-d38da49cbb37}",
"language": "",
"created_on": "2008-07-14T01:59:23.568048+00:00",
"full_name": "aleax/sandboxpublic",
"has_issues": true,
"owner": {
"username": "aleax",
"display_name": "Alex Martelli",
"type": "user",
"uuid": "{1155d94d-fb48-43fe-a431-ec07c900b636}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/aleax"
},
"html": {
"href": "https://bitbucket.org/aleax/"
},
"avatar": {
"href": "https://bitbucket.org/account/aleax/avatar/32/"
}
}
},
"updated_on": "2012-06-22T21:55:28.753727+00:00",
"size": 3120,
"type": "repository",
"slug": "sandboxpublic",
"is_private": false,
"description": "to help debug ACLs for private vs public bitbucket repos"
},
{
"scm": "hg",
"website": "",
"has_wiki": true,
"name": "otrsfix-ng",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/commits"
},
"clone": [
{
"href": "https://bitbucket.org/adiakin/otrsfix-ng",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/adiakin/otrsfix-ng",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng"
},
"html": {
"href": "https://bitbucket.org/adiakin/otrsfix-ng"
},
"avatar": {
"href": "https://bitbucket.org/adiakin/otrsfix-ng/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{05b1b9dc-a7b6-46d6-ae1b-e66a17aa4f55}",
"language": "",
"created_on": "2008-07-15T06:14:39.306314+00:00",
"full_name": "adiakin/otrsfix-ng",
"has_issues": true,
"owner": {
"username": "adiakin",
"display_name": "adiakin",
"type": "user",
"uuid": "{414012b5-1ac9-4096-9f46-8893cfa3cda5}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/adiakin"
},
"html": {
"href": "https://bitbucket.org/adiakin/"
},
"avatar": {
"href": "https://bitbucket.org/account/adiakin/avatar/32/"
}
}
},
"updated_on": "2016-06-02T18:56:34.868302+00:00",
"size": 211631,
"type": "repository",
"slug": "otrsfix-ng",
"is_private": false,
"description": "OTRS greasemonkey extension"
},
{
"scm": "hg",
"website": "",
"has_wiki": true,
"name": "pida-pypaned",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/commits"
},
"clone": [
{
"href": "https://bitbucket.org/aafshar/pida-pypaned",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/aafshar/pida-pypaned",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned"
},
"html": {
"href": "https://bitbucket.org/aafshar/pida-pypaned"
},
"avatar": {
"href": "https://bitbucket.org/aafshar/pida-pypaned/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{94cb830a-1784-4e51-9791-8f5cc93990a9}",
"language": "",
"created_on": "2008-07-16T22:47:38.682491+00:00",
"full_name": "aafshar/pida-pypaned",
"has_issues": true,
"owner": {
"username": "aafshar",
"display_name": "Ali Afshar",
"type": "user",
"uuid": "{bcb87110-6a92-41fc-b95c-680feeea5512}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/aafshar"
},
"html": {
"href": "https://bitbucket.org/aafshar/"
},
"avatar": {
"href": "https://bitbucket.org/account/aafshar/avatar/32/"
}
}
},
"updated_on": "2012-06-22T21:55:42.451431+00:00",
"size": 4680652,
"type": "repository",
"slug": "pida-pypaned",
"is_private": false,
"description": ""
},
{
"scm": "hg",
"website": "",
"has_wiki": true,
"name": "TLOMM-testing",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/commits"
},
"clone": [
{
"href": "https://bitbucket.org/tgrimley/tlomm-testing",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/tgrimley/tlomm-testing",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing"
},
"html": {
"href": "https://bitbucket.org/tgrimley/tlomm-testing"
},
"avatar": {
"href": "https://bitbucket.org/tgrimley/tlomm-testing/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{95283ca1-f77e-40d6-b3ed-5bfae6ed2d15}",
"language": "",
"created_on": "2008-07-18T21:05:17.750587+00:00",
"full_name": "tgrimley/tlomm-testing",
"has_issues": true,
"owner": {
"username": "tgrimley",
"display_name": "Thomas Grimley",
"type": "user",
"uuid": "{c958a08f-4669-4c77-81ec-4e2faa8ebf35}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/tgrimley"
},
"html": {
"href": "https://bitbucket.org/tgrimley/"
},
"avatar": {
"href": "https://bitbucket.org/account/tgrimley/avatar/32/"
}
}
},
"updated_on": "2012-06-22T21:55:46.627825+00:00",
"size": 3128,
"type": "repository",
"slug": "tlomm-testing",
"is_private": false,
"description": "File related to testing functionality of TLOMM->TLOTTS transition"
},
{
"scm": "hg",
"website": "",
"has_wiki": true,
"name": "test",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/commits"
},
"clone": [
{
"href": "https://bitbucket.org/tingle/test",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/tingle/test",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test"
},
"html": {
"href": "https://bitbucket.org/tingle/test"
},
"avatar": {
"href": "https://bitbucket.org/tingle/test/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/tingle/test/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{457953ec-fe87-41b9-b659-94756fb40ece}",
"language": "",
"created_on": "2008-07-18T22:24:31.984981+00:00",
"full_name": "tingle/test",
"has_issues": true,
"owner": {
"username": "tingle",
"display_name": "tingle",
"type": "user",
"uuid": "{dddce42b-bd19-417b-90ff-72cdbfb6eb7d}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/tingle"
},
"html": {
"href": "https://bitbucket.org/tingle/"
},
"avatar": {
"href": "https://bitbucket.org/account/tingle/avatar/32/"
}
}
},
"updated_on": "2012-06-22T21:55:49.860564+00:00",
"size": 3090,
"type": "repository",
"slug": "test",
"is_private": false,
"description": ""
},
{
"scm": "hg",
"website": "http://shaze.myopenid.com/",
"has_wiki": true,
"name": "Repository",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/commits"
},
"clone": [
{
"href": "https://bitbucket.org/Shaze/repository",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/Shaze/repository",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository"
},
"html": {
"href": "https://bitbucket.org/Shaze/repository"
},
"avatar": {
"href": "https://bitbucket.org/Shaze/repository/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{3c0b8076-caef-465a-8d08-a184459f659b}",
"language": "",
"created_on": "2008-07-18T22:39:51.380134+00:00",
"full_name": "Shaze/repository",
"has_issues": true,
"owner": {
"username": "Shaze",
"display_name": "Shaze",
"type": "user",
"uuid": "{f57817e9-bfe4-4c65-84dd-662152430323}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/Shaze"
},
"html": {
"href": "https://bitbucket.org/Shaze/"
},
"avatar": {
"href": "https://bitbucket.org/account/Shaze/avatar/32/"
}
}
},
"updated_on": "2012-06-22T21:55:51.570502+00:00",
"size": 3052,
"type": "repository",
"slug": "repository",
"is_private": false,
"description": "Mine, all mine!"
},
{
"scm": "hg",
"website": "http://bitbucket.org/copiesofcopies/identifox/",
"has_wiki": true,
"name": "identifox",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/commits"
},
"clone": [
{
"href": "https://bitbucket.org/uncryptic/identifox",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/uncryptic/identifox",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox"
},
"html": {
"href": "https://bitbucket.org/uncryptic/identifox"
},
"avatar": {
"href": "https://bitbucket.org/uncryptic/identifox/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{78a1a080-a77e-4d0d-823a-b107484477a8}",
"language": "",
"created_on": "2008-07-19T00:33:14.065946+00:00",
"full_name": "uncryptic/identifox",
"has_issues": true,
"owner": {
"username": "uncryptic",
"display_name": "Uncryptic Communications",
"type": "user",
"uuid": "{db87bb9a-9980-4840-bd4a-61f7748a56b4}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/uncryptic"
},
"html": {
"href": "https://bitbucket.org/uncryptic/"
},
"avatar": {
"href": "https://bitbucket.org/account/uncryptic/avatar/32/"
}
}
},
"updated_on": "2008-07-19T00:33:14+00:00",
"size": 1918,
"type": "repository",
"slug": "identifox",
"is_private": false,
"description": "TwitterFox, modified to work with Identi.ca, including cosmetic and subtle code changes. For the most part, the code is nearly identical to the TwitterFox base: http://www.naan.net/trac/wiki/TwitterFox"
},
{
"scm": "hg",
"website": "http://rforce.rubyforge.org",
"has_wiki": false,
"name": "rforce",
"links": {
"watchers": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/watchers"
},
"branches": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/refs/branches"
},
"tags": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/refs/tags"
},
"commits": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/commits"
},
"clone": [
{
"href": "https://bitbucket.org/undees/rforce",
"name": "https"
},
{
"href": "ssh://hg@bitbucket.org/undees/rforce",
"name": "ssh"
}
],
"self": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce"
},
"html": {
"href": "https://bitbucket.org/undees/rforce"
},
"avatar": {
"href": "https://bitbucket.org/undees/rforce/avatar/32/"
},
"hooks": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/hooks"
},
"forks": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/forks"
},
"downloads": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/downloads"
},
"pullrequests": {
"href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/pullrequests"
}
},
"fork_policy": "allow_forks",
"uuid": "{ec2ffee7-bfcd-4e95-83c8-22ac31e69fa3}",
"language": "",
"created_on": "2008-07-19T06:16:43.044743+00:00",
"full_name": "undees/rforce",
"has_issues": false,
"owner": {
"username": "undees",
"display_name": "Ian Dees",
"type": "user",
"uuid": "{6ff66a34-6412-4f28-bf57-707a2a5c6d7b}",
"links": {
"self": {
"href": "https://api.bitbucket.org/2.0/users/undees"
},
"html": {
"href": "https://bitbucket.org/undees/"
},
"avatar": {
"href": "https://bitbucket.org/account/undees/avatar/32/"
}
}
},
"updated_on": "2015-02-09T00:48:15.408680+00:00",
"size": 338402,
"type": "repository",
"slug": "rforce",
"is_private": false,
"description": "A simple, usable binding to the SalesForce API."
}
],
"next": "https://api.bitbucket.org/2.0/repositories?after=2008-07-19T19%3A53%3A07.031845%2B00%3A00"
}

View file

@ -1,4 +0,0 @@
{
"pagelen": 10,
"values": []
}

View file

@ -1 +0,0 @@
2.0_repositories,after=1970-01-01T00:00:00+00:00,pagelen=100

View file

@ -1,117 +1,181 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import timedelta
import re
import unittest
from urllib.parse import unquote
from datetime import datetime
import json
import os
import iso8601
import requests_mock
import pytest
from swh.lister.bitbucket.lister import BitBucketLister
from swh.lister.core.tests.test_lister import HttpListerTester
from swh.lister.bitbucket.lister import BitbucketLister
def _convert_type(req_index):
"""Convert the req_index to its right type according to the model's
"indexable" column.
"""
return iso8601.parse_date(unquote(req_index))
@pytest.fixture
def bb_api_repositories_page1(datadir):
data_file_path = os.path.join(datadir, "bb_api_repositories_page1.json")
with open(data_file_path, "r") as data_file:
return json.load(data_file)
class BitBucketListerTester(HttpListerTester, unittest.TestCase):
Lister = BitBucketLister
test_re = re.compile(r"/repositories\?after=([^?&]+)")
lister_subdir = "bitbucket"
good_api_response_file = "data/https_api.bitbucket.org/response.json"
bad_api_response_file = "data/https_api.bitbucket.org/empty_response.json"
first_index = _convert_type("2008-07-12T07:44:01.476818+00:00")
last_index = _convert_type("2008-07-19T06:16:43.044743+00:00")
entries_per_page = 10
convert_type = _convert_type
def request_index(self, request):
"""(Override) This is needed to emulate the listing bootstrap
when no min_bound is provided to run
"""
m = self.test_re.search(request.path_url)
idx = _convert_type(m.group(1))
if idx == self.Lister.default_min_bound:
idx = self.first_index
return idx
@requests_mock.Mocker()
def test_fetch_none_nodb(self, http_mocker):
"""Overridden because index is not an integer nor a string
"""
http_mocker.get(self.test_re, text=self.mock_response)
fl = self.get_fl()
self.disable_scheduler(fl)
self.disable_db(fl)
# stores no results
fl.run(
min_bound=self.first_index - timedelta(days=3), max_bound=self.first_index
)
def test_is_within_bounds(self):
fl = self.get_fl()
self.assertTrue(
fl.is_within_bounds(
iso8601.parse_date("2008-07-15"), self.first_index, self.last_index
)
)
self.assertFalse(
fl.is_within_bounds(
iso8601.parse_date("2008-07-20"), self.first_index, self.last_index
)
)
self.assertFalse(
fl.is_within_bounds(
iso8601.parse_date("2008-07-11"), self.first_index, self.last_index
)
)
@pytest.fixture
def bb_api_repositories_page2(datadir):
data_file_path = os.path.join(datadir, "bb_api_repositories_page2.json")
with open(data_file_path, "r") as data_file:
return json.load(data_file)
def test_lister_bitbucket(lister_bitbucket, requests_mock_datadir):
"""Simple bitbucket listing should create scheduled tasks (git, hg)
def _check_listed_origins(lister_origins, scheduler_origins):
"""Asserts that the two collections have the same origins from the point of view of
the lister"""
"""
lister_bitbucket.run()
sorted_lister_origins = list(sorted(lister_origins))
sorted_scheduler_origins = list(sorted(scheduler_origins))
r = lister_bitbucket.scheduler.search_tasks(task_type="load-hg")
assert len(r) == 9
assert len(sorted_lister_origins) == len(sorted_scheduler_origins)
for row in r:
args = row["arguments"]["args"]
kwargs = row["arguments"]["kwargs"]
for lo, so in zip(sorted_lister_origins, sorted_scheduler_origins):
assert lo.url == so.url
assert lo.last_update == so.last_update
assert len(args) == 0
assert len(kwargs) == 1
url = kwargs["url"]
assert url.startswith("https://bitbucket.org")
def test_bitbucket_incremental_lister(
swh_scheduler,
requests_mock,
mocker,
bb_api_repositories_page1,
bb_api_repositories_page2,
):
"""Simple Bitbucket listing with two pages containing 10 origins"""
assert row["policy"] == "recurring"
assert row["priority"] is None
requests_mock.get(
BitbucketLister.API_URL,
[{"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2},],
)
r = lister_bitbucket.scheduler.search_tasks(task_type="load-git")
assert len(r) == 1
lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)
for row in r:
args = row["arguments"]["args"]
kwargs = row["arguments"]["kwargs"]
assert len(args) == 0
assert len(kwargs) == 1
url = kwargs["url"]
# First listing
stats = lister.run()
assert url.startswith("https://bitbucket.org")
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins
assert row["policy"] == "recurring"
assert row["priority"] is None
assert stats.pages == 2
assert stats.origins == 20
assert len(scheduler_origins) == 20
assert lister.updated
lister_state = lister.get_state_from_scheduler()
last_repo_cdate = lister_state.last_repo_cdate.isoformat()
assert hasattr(lister_state, "last_repo_cdate")
assert last_repo_cdate == bb_api_repositories_page2["values"][-1]["created_on"]
# Second listing, restarting from last state
lister.session.get = mocker.spy(lister.session, "get")
lister.run()
url_params = lister.url_params
url_params["after"] = last_repo_cdate
lister.session.get.assert_called_once_with(lister.API_URL, params=url_params)
all_origins = (
bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"]
)
_check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins)
def test_bitbucket_lister_rate_limit_hit(
swh_scheduler,
requests_mock,
mocker,
bb_api_repositories_page1,
bb_api_repositories_page2,
):
"""Simple Bitbucket listing with two pages containing 10 origins"""
requests_mock.get(
BitbucketLister.API_URL,
[
{"json": bb_api_repositories_page1, "status_code": 200},
{"json": None, "status_code": 429},
{"json": None, "status_code": 429},
{"json": bb_api_repositories_page2, "status_code": 200},
],
)
lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)
mocker.patch.object(lister.page_request.retry, "sleep")
stats = lister.run()
assert stats.pages == 2
assert stats.origins == 20
assert len(swh_scheduler.get_listed_origins(lister.lister_obj.id).origins) == 20
def test_bitbucket_full_lister(
swh_scheduler,
requests_mock,
mocker,
bb_api_repositories_page1,
bb_api_repositories_page2,
):
"""Simple Bitbucket listing with two pages containing 10 origins"""
requests_mock.get(
BitbucketLister.API_URL,
[
{"json": bb_api_repositories_page1},
{"json": bb_api_repositories_page2},
{"json": bb_api_repositories_page1},
{"json": bb_api_repositories_page2},
],
)
credentials = {"bitbucket": {"bitbucket": [{"username": "u", "password": "p"}]}}
lister = BitbucketLister(
scheduler=swh_scheduler, page_size=10, incremental=True, credentials=credentials
)
assert lister.session.auth is not None
# First do a incremental run to have an initial lister state
stats = lister.run()
last_lister_state = lister.get_state_from_scheduler()
assert stats.origins == 20
# Then do the full run and verify lister state did not change
# Modify last listed repo modification date to check it will be not saved
# to lister state after its execution
last_page2_repo = bb_api_repositories_page2["values"][-1]
last_page2_repo["created_on"] = datetime.now().isoformat()
last_page2_repo["updated_on"] = datetime.now().isoformat()
lister = BitbucketLister(scheduler=swh_scheduler, page_size=10, incremental=False)
assert lister.session.auth is None
stats = lister.run()
assert stats.pages == 2
assert stats.origins == 20
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins
# 20 because scheduler upserts based on (id, type, url)
assert len(scheduler_origins) == 20
# Modification on created_on SHOULD NOT impact lister state
assert lister.get_state_from_scheduler() == last_lister_state
# Modification on updated_on SHOULD impact lister state
all_origins = (
bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"]
)
_check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins)

View file

@ -1,12 +1,11 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from time import sleep
from unittest.mock import patch
from celery.result import GroupResult
from swh.lister.pattern import ListerStats
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
@ -17,79 +16,35 @@ def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
assert res.result == "OK"
@patch("swh.lister.bitbucket.tasks.BitBucketLister")
def test_incremental(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
# setup the mocked BitbucketLister
lister.return_value = lister
lister.db_last_index.return_value = 42
lister.run.return_value = None
@patch("swh.lister.bitbucket.tasks.BitbucketLister")
def test_incremental_listing(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=5, origins=5000)
res = swh_scheduler_celery_app.send_task(
"swh.lister.bitbucket.tasks.IncrementalBitBucketLister"
"swh.lister.bitbucket.tasks.IncrementalBitBucketLister",
kwargs=dict(page_size=100, username="username", password="password",),
)
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with()
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
lister.run.assert_called_once()
@patch("swh.lister.bitbucket.tasks.BitBucketLister")
def test_range(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
# setup the mocked BitbucketLister
lister.return_value = lister
lister.run.return_value = None
@patch("swh.lister.bitbucket.tasks.BitbucketLister")
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=5, origins=5000)
res = swh_scheduler_celery_app.send_task(
"swh.lister.bitbucket.tasks.RangeBitBucketLister", kwargs=dict(start=12, end=42)
"swh.lister.bitbucket.tasks.FullBitBucketRelister",
kwargs=dict(page_size=100, username="username", password="password",),
)
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@patch("swh.lister.bitbucket.tasks.BitBucketLister")
def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
# setup the mocked BitbucketLister
lister.return_value = lister
lister.run.return_value = None
lister.db_partition_indices.return_value = [(i, i + 9) for i in range(0, 50, 10)]
res = swh_scheduler_celery_app.send_task(
"swh.lister.bitbucket.tasks.FullBitBucketRelister"
)
assert res
res.wait()
assert res.successful()
# retrieve the GroupResult for this task and wait for all the subtasks
# to complete
promise_id = res.result
assert promise_id
promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app)
for i in range(5):
if promise.ready():
break
sleep(1)
lister.assert_called_with()
# one by the FullBitbucketRelister task
# + 5 for the RangeBitbucketLister subtasks
assert lister.call_count == 6
lister.db_last_index.assert_not_called()
lister.db_partition_indices.assert_called_once_with(10000)
# lister.run should have been called once per partition interval
for i in range(5):
assert (
dict(min_bound=10 * i, max_bound=10 * i + 9),
) in lister.run.call_args_list
lister.run.assert_called_once()