certificate_analysis/fetch_ca_certs.py

85 lines
3.3 KiB
Python
Raw Normal View History

2022-07-03 14:38:15 +00:00
import os
from urllib.parse import urljoin, urlsplit, urlunsplit
import requests
from bs4 import BeautifulSoup
ENTRY_URLS = {
"cacert": [
"https://www.cacert.org/index.php?id=3",
],
"certum": [
"https://www.certum.eu/en/cert_expertise_root_certificates/",
],
"dtrust": [
"https://www.d-trust.net/de/support/repository",
],
"globalsign": [
"https://support.globalsign.com/ca-certificates/root-certificates/globalsign-root-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/alphassl-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/domainssl-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/extendedssl-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/organizationssl-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/personalsign-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/code-signing-standard-ev-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/intranetssl-root-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/cloudssl-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/timestamping-intermediate-certificates",
"https://support.globalsign.com/ca-certificates/intermediate-certificates/g3-intermediate-certificates",
],
"letsencrypt": [
"https://letsencrypt.org/de/certificates/",
],
"sectigo": [
"https://sectigo.com/resource-library/sectigo-root-intermediate-certificate-files",
"https://secure.sectigo.com/products/publiclyDisclosedSubCACerts",
],
}
def handle_link(base_url, sess: requests.Session, dir, link):
scheme, netloc, path, _, _ = urlsplit(urljoin(base_url, link))
fetch_url = urlunsplit((scheme, netloc, path, "", ""))
if fetch_url.rsplit(".", 2)[-1] in ("crt", "der", "cer", "pem"):
if not os.path.isdir(dir):
os.makedirs(dir, mode=0o750)
cert_filename = os.path.join(dir, os.path.basename(path))
if os.path.exists(cert_filename):
print(f"{cert_filename} exist, skip download of {fetch_url}")
return
resp = sess.get(fetch_url)
if not resp.ok:
return
with open(cert_filename, "wb") as cert_file:
cert_file.write(resp.content)
print(f"downloaded {fetch_url} as {cert_filename}")
def main():
sess = requests.Session()
sess.headers = {
"User-Agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
}
sess.verify = False
for dir in ENTRY_URLS:
for url in ENTRY_URLS[dir]:
response = sess.get(url, allow_redirects=True)
response.raise_for_status()
html = BeautifulSoup(response.text, features="html.parser")
for link in html.findAll("a"):
handle_link(url, sess, dir, link.get("href"))
if __name__ == "__main__":
main()