#!/usr/bin/env python3 import os import re import requests urls = set() def check_url(url): try: r = requests.get(url, timeout = 5) print(r) return True except requests.exceptions.ConnectionError as e: print(e) return False for root, dirs, files in os.walk('.'): for f in files: if f.endswith('.html'): full = os.path.join(root, f) for l in open(full).readlines(): m = re.search(r'"(http:\/\/[^"]*)"', l) if m: urls.add(m.group(1)) urls = list(sorted(list(urls))) to_replace = [] for i, url in enumerate(urls): https = url.replace('http://', 'https://') print('%d/%d: %s' % (i, len(urls), https)) if (check_url(https)): to_replace.append(url) print() print('Can use HTTPS for %d of links:' % (len(to_replace))) for url in to_replace: print(url)