Source code for nefelibata.assistants.mirror_images

import hashlib
import mimetypes
import re
from pathlib import Path

import requests
from bs4 import BeautifulSoup
from nefelibata.assistants import Assistant
from nefelibata.assistants import Scope
from nefelibata.post import Post
from nefelibata.utils import modify_html


CHUNK_SIZE = 2048


[docs]def get_resource_extension(url: str) -> str: response = requests.head(url) content_type = response.headers["content-type"] extension = mimetypes.guess_extension(content_type) return extension or ""
[docs]class MirrorImagesAssistant(Assistant): scopes = [Scope.POST, Scope.SITE]
[docs] def process_post(self, post: Post, force: bool = False) -> None: self._process_file(post.file_path.with_suffix(".html"))
[docs] def process_site(self, force: bool = False) -> None: for path in (self.root / "build").glob("*.html"): self._process_file(path)
def _process_file(self, file_path: Path) -> None: mirror = file_path.parent / "img" if not mirror.exists(): mirror.mkdir() soup: BeautifulSoup with modify_html(file_path) as soup: external_images = soup.find_all("img", src=re.compile("http")) for image in external_images: url = image.attrs["src"] extension = get_resource_extension(url) m = hashlib.md5() m.update(url.encode("utf-8")) filename = f"{m.hexdigest()}{extension}" local = mirror / filename # download and store locally if not local.exists(): response = requests.get(url, stream=True) with open(local, "wb") as outp: for chunk in response.iter_content(chunk_size=CHUNK_SIZE): outp.write(chunk) image.attrs["src"] = "img/%s" % local.name