swh.lister.pypi: Use pypi's legacy html based api to list packages
The xmlrpc is marked as deprecated [1]. Even if it's not now, the legacy api is not marked as deprecated. So moving towards this one sounds more reasonable [2]. [1] https://warehouse.readthedocs.io/api-reference/xml-rpc/#pypi-s-xml-rpc-methods [2] https://warehouse.readthedocs.io/api-reference/legacy/#simple-project-api Related T422
This commit is contained in:
parent
6ff3b90859
commit
3a65fbb4c8
3 changed files with 47 additions and 20 deletions
|
@ -7,6 +7,7 @@ import random
|
|||
from datetime import datetime
|
||||
from email.utils import parsedate
|
||||
from pprint import pformat
|
||||
from xmlrpc import client
|
||||
|
||||
import requests
|
||||
import xmltodict
|
||||
|
@ -35,15 +36,8 @@ class ListerXMLRPCTransport(abc.ABC):
|
|||
"""Initialize client to query for result
|
||||
|
||||
"""
|
||||
from xmlrpc import client
|
||||
return client.ServerProxy(path)
|
||||
|
||||
def list_packages(self, client):
|
||||
"""Listing method
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
def request_uri(self, _):
|
||||
"""Same uri called once
|
||||
|
||||
|
@ -64,24 +58,22 @@ class ListerXMLRPCTransport(abc.ABC):
|
|||
return False, 0
|
||||
|
||||
def transport_request(self, identifier):
|
||||
"""Implements SWHListerBase.transport_request for HTTP using Requests.
|
||||
"""Implements SWHListerBase.transport_request
|
||||
|
||||
"""
|
||||
path = self.request_uri(identifier)
|
||||
# params = self.request_params(identifier) # we cannot use this...
|
||||
|
||||
try:
|
||||
_client = self.get_client(path)
|
||||
return self.list_packages(_client)
|
||||
return self.get_client(path)
|
||||
except Exception as e:
|
||||
raise FetchError(e)
|
||||
|
||||
def transport_response_to_string(self, response):
|
||||
"""Implements SWHListerBase.transport_response_to_string for XMLRPC
|
||||
given responses.
|
||||
|
||||
"""
|
||||
s = pformat(self.SERVER)
|
||||
s += '\n#\n' + pformat(response)
|
||||
s += '\n#\n' + pformat(response) # Note: will potentially be big
|
||||
return s
|
||||
|
||||
|
||||
|
@ -216,3 +208,25 @@ class SWHListerHttpTransport(abc.ABC):
|
|||
except Exception: # not xml
|
||||
s += pformat(response.text)
|
||||
return s
|
||||
|
||||
|
||||
class ListerOnePageApiTransport(SWHListerHttpTransport):
|
||||
"""Use the request library for retrieving a basic html page and parse
|
||||
the result.
|
||||
|
||||
To be used in conjunction with SWHListerBase or a subclass of it.
|
||||
|
||||
"""
|
||||
PAGE = AbstractAttribute("The server api's unique page to retrieve and "
|
||||
"parse for information")
|
||||
PATH_TEMPLATE = None # we do not use it
|
||||
|
||||
def __init__(self, api_baseurl=None):
|
||||
self.session = requests.Session()
|
||||
self.lister_version = __version__
|
||||
|
||||
def request_uri(self, _):
|
||||
"""Get the full request URI given the transport_request identifier.
|
||||
|
||||
"""
|
||||
return self.PAGE
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import abc
|
||||
import logging
|
||||
|
||||
from .lister_base import SWHListerBase
|
||||
|
@ -17,6 +18,13 @@ class SimpleLister(SWHListerBase):
|
|||
information and stores those in db
|
||||
|
||||
"""
|
||||
@abc.abstractmethod
|
||||
def list_packages(self, *args):
|
||||
"""Listing packages method.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
def ingest_data(self, identifier, checks=False):
|
||||
"""Rework the base ingest_data.
|
||||
Request server endpoint which gives all in one go.
|
||||
|
@ -32,6 +40,7 @@ class SimpleLister(SWHListerBase):
|
|||
"""
|
||||
# Request (partial?) list of repositories info
|
||||
response = self.safely_issue_request(identifier)
|
||||
response = self.list_packages(response)
|
||||
if not response:
|
||||
return response, []
|
||||
models_list = self.transport_response_simplified(response)
|
||||
|
|
|
@ -2,21 +2,23 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import xmltodict
|
||||
|
||||
from .models import PyPiModel
|
||||
|
||||
from swh.scheduler import utils
|
||||
from swh.lister.core.simple_lister import SimpleLister
|
||||
from swh.lister.core.lister_transports import ListerXMLRPCTransport
|
||||
from swh.lister.core.lister_transports import ListerOnePageApiTransport
|
||||
|
||||
|
||||
class PyPiLister(ListerXMLRPCTransport, SimpleLister):
|
||||
class PyPiLister(ListerOnePageApiTransport, SimpleLister):
|
||||
# Template path expecting an integer that represents the page id
|
||||
MODEL = PyPiModel
|
||||
LISTER_NAME = 'pypi'
|
||||
SERVER = 'https://pypi.org/pypi'
|
||||
PAGE = 'https://pypi.org/simple/'
|
||||
|
||||
def __init__(self, override_config=None):
|
||||
ListerXMLRPCTransport.__init__(self)
|
||||
ListerOnePageApiTransport .__init__(self)
|
||||
SimpleLister.__init__(self, override_config=override_config)
|
||||
|
||||
def task_dict(self, origin_type, origin_url, **kwargs):
|
||||
|
@ -33,11 +35,13 @@ class PyPiLister(ListerXMLRPCTransport, SimpleLister):
|
|||
_type, _policy, origin_url,
|
||||
project_metadata_url=project_metadata_url)
|
||||
|
||||
def list_packages(self, client):
|
||||
"""(Override) List the actual pypi origins from the api.
|
||||
def list_packages(self, response):
|
||||
"""(Override) List the actual pypi origins from the response.
|
||||
|
||||
"""
|
||||
return client.list_packages()
|
||||
result = xmltodict.parse(response.content)
|
||||
_all = result['html']['body']['a']
|
||||
return [package['#text'] for package in _all]
|
||||
|
||||
def _compute_urls(self, repo_name):
|
||||
"""Returns a tuple (project_url, project_metadata_url)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue