diff --git a/mypy.ini b/mypy.ini index c84a7e7..8aab2fa 100644 --- a/mypy.ini +++ b/mypy.ini @@ -36,6 +36,3 @@ ignore_missing_imports = True [mypy-urllib3.util.*] ignore_missing_imports = True - -[mypy-xmltodict.*] -ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt index 34bf339..4f6c24e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ python_debian requests setuptools -xmltodict iso8601 beautifulsoup4 launchpadlib diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py index ae9874b..ad52e22 100644 --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -6,8 +6,8 @@ import logging from typing import Iterator, List, Optional +from bs4 import BeautifulSoup import requests -import xmltodict from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -54,8 +54,9 @@ class PyPILister(StatelessLister[PackageListPage]): response.raise_for_status() - page_xmldict = xmltodict.parse(response.content) - page_results = [p["#text"] for p in page_xmldict["html"]["body"]["a"]] + page = BeautifulSoup(response.content, features="html.parser") + + page_results = [p.text for p in page.find_all("a")] yield page_results