Source code for nefelibata.assistants.relativize_links
import logging
import urllib.parse
from pathlib import Path
from bs4 import BeautifulSoup
from nefelibata.assistants import Assistant
from nefelibata.assistants import Scope
from nefelibata.post import Post
_logger = logging.getLogger(__name__)
[docs]class RelativizeLinksAssistant(Assistant):
scopes = [Scope.POST, Scope.SITE]
[docs] def process_post(self, post: Post, force: bool = False) -> None:
self._process_file(post.file_path.with_suffix(".html"))
[docs] def process_site(self, force: bool = False) -> None:
for path in (self.root / "build").glob("*.html"):
self._process_file(path)
def _process_file(self, file_path: Path) -> None:
with open(file_path) as fp:
html = fp.read()
tag_attributes = [
("a", "href"),
("img", "src"),
("link", "href"),
("script", "src"),
]
soup = BeautifulSoup(html, "html.parser")
modified = False
for tag, attr in tag_attributes:
for el in soup.find_all(tag):
resource = el.attrs.get(attr)
if not resource:
continue
# this should be "posts", when `process_post` is called, or "build",
# when `process_site` is called
base_dir = file_path.relative_to(self.root).parts[0]
if resource.startswith(self.config["url"]):
resource_path = (
self.root / base_dir / resource[len(self.config["url"]) :]
)
elif resource.startswith("/"):
resource_path = self.root / base_dir / resource[1:]
else:
_logger.debug("Relative link found, ignoring")
continue
up = Path(".")
parent = file_path.parent
while True:
try:
# we're guaranteed to reach this eventually
relative_url = resource_path.relative_to(parent)
break
except ValueError:
# resource is up of file path
parent = parent.parent
up = up / ".."
el.attrs[attr] = up / relative_url
modified = True
if modified:
html = str(soup)
with open(file_path, "w") as fp:
fp.write(html)