import os from urllib.parse import urljoin, urlsplit, urlunsplit import requests from bs4 import BeautifulSoup ENTRY_URLS = { "cacert": [ "https://www.cacert.org/index.php?id=3", ], "certum": [ "https://www.certum.eu/en/cert_expertise_root_certificates/", ], "dtrust": [ "https://www.d-trust.net/de/support/repository", ], "globalsign": [ "https://support.globalsign.com/ca-certificates/root-certificates/globalsign-root-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/alphassl-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/domainssl-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/extendedssl-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/organizationssl-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/personalsign-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/code-signing-standard-ev-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/intranetssl-root-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/cloudssl-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/timestamping-intermediate-certificates", "https://support.globalsign.com/ca-certificates/intermediate-certificates/g3-intermediate-certificates", ], "letsencrypt": [ "https://letsencrypt.org/de/certificates/", ], "sectigo": [ "https://sectigo.com/resource-library/sectigo-root-intermediate-certificate-files", "https://secure.sectigo.com/products/publiclyDisclosedSubCACerts", ], } def handle_link(base_url, sess: requests.Session, dir, link): scheme, netloc, path, _, _ = urlsplit(urljoin(base_url, link)) fetch_url = urlunsplit((scheme, netloc, path, "", "")) if fetch_url.rsplit(".", 2)[-1] in ("crt", "der", "cer", "pem"): if not os.path.isdir(dir): os.makedirs(dir, mode=0o750) cert_filename = os.path.join(dir, os.path.basename(path)) if os.path.exists(cert_filename): print(f"{cert_filename} exist, skip download of {fetch_url}") return resp = sess.get(fetch_url) if not resp.ok: return with open(cert_filename, "wb") as cert_file: cert_file.write(resp.content) print(f"downloaded {fetch_url} as {cert_filename}") def main(): sess = requests.Session() sess.headers = { "User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0", } sess.verify = False for dir in ENTRY_URLS: for url in ENTRY_URLS[dir]: response = sess.get(url, allow_redirects=True) response.raise_for_status() html = BeautifulSoup(response.text, features="html.parser") for link in html.findAll("a"): handle_link(url, sess, dir, link.get("href")) if __name__ == "__main__": main()