Source code for nefelibata.announcers.fawm

import logging
import re
import textwrap
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import dateutil.parser
import requests
from bs4 import BeautifulSoup
from bs4 import NavigableString
from bs4.element import Tag
from dateutil.parser._parser import ParserError
from nefelibata.announcers import Announcer
from nefelibata.announcers import Response
from nefelibata.post import Post

_logger = logging.getLogger(__name__)


[docs]def extract_params(post: Post, root: Path, config: Dict[str, Any]) -> Dict[str, Any]:
    """Extract params from a standard FAWM post.
    """
    soup = BeautifulSoup(post.html, "html.parser")

    # liner notes are between <h1>s
    notes_h1 = soup.find("h1", text="Liner Notes")
    lines = []
    next_sibling = notes_h1.next_sibling
    while next_sibling and next_sibling.name != "h1":
        try:
            content = next_sibling.get_text()
        except AttributeError:
            content = next_sibling.string
        lines.append(content)
        if next_sibling.name == "p":
            lines.append("\n")
        next_sibling = next_sibling.next_sibling
    notes = "".join(lines).strip()

    # lyrics are inside a <pre> element
    try:
        pre = soup.find("pre")
        lyrics = textwrap.dedent(pre.text).strip()
    except Exception:
        lyrics = "N/A"

    # tags are separated by space, not comma
    tags = re.sub(r",\s?", " ", post.parsed["keywords"])

    # search for a single MP3 in the post directory to use as demo
    post_directory = post.file_path.parent
    mp3s = list(post_directory.glob("**/*.mp3"))
    if len(mp3s) == 1:
        mp3_path = mp3s[0].relative_to(root / "posts")
        demo = f'{config["url"]}{mp3_path}'
    elif len(mp3s) > 1:
        _logger.error("Multiple MP3s found, aborting!")
        raise Exception("Only posts with a single MP3 can be announced on FAWM")
    else:
        demo = ""

    return {
        "id": "",
        "title": post.title,
        "tags": tags,
        "demo": demo,
        "notes": notes,
        "lyrics": lyrics,
        "status": "public",
        "collab": 0,
        "downloadable": 1,
        "submit": "Save+It!",
    }


[docs]def get_response_from_li(url: str, el: Tag) -> Response:
    """Generate a standard response from a <li> element in the FAWM song page.
    """
    base_url = "https://fawm.org"

    # the <li> has an id that starts with a `c`, followed by numbers
    comment_id = el.attrs["id"][1:]

    # the timestamp is a fuzzy date :(
    fuzzy_timestamp = el.find("small", {"class": "text-muted"}).text
    try:
        timestamp = (
            dateutil.parser.parse(fuzzy_timestamp)
            .replace(tzinfo=timezone.utc)
            .isoformat()
        )
    except ParserError:
        # parse "1 day", etc.
        value, unit = fuzzy_timestamp.split()
        unit = unit.rstrip("s")
        delta = timedelta(**{f"{unit}s": int(value)})
        timestamp = (datetime.now(tz=timezone.utc) - delta).isoformat()

    user_ref = el.find("a", {"class": "user-ref"})
    user_name = user_ref.text.strip()
    relative_url = user_ref.attrs["href"]
    user_url = f"{base_url}{relative_url}"

    relative_image = el.find("img", {"class": "comment-avatar"}).attrs["src"]
    user_image = f"{base_url}{relative_image}"

    # the actual comment is in a <p> with an id that starts with `q`
    comment = el.find("p", {"id": f"q{comment_id}"}).text

    return {
        "source": "FAWM",
        "url": url,
        "color": "#cc6600",
        "id": f"fawm:{comment_id}",
        "timestamp": timestamp,
        "user": {"name": user_name, "image": user_image, "url": user_url},
        "comment": {"text": comment, "url": f"{url}#c{comment_id}"},
    }


[docs]def get_comments_from_fawm_page(
    url: str, username: str, password: str,
) -> List[Response]:
    """Extract comments from a given FAWM page.
    """
    response = requests.get(url, auth=(username, password))
    response.encoding = "UTF-8"
    html = response.text
    soup = BeautifulSoup(html, "html.parser")

    responses = []
    # there are non-comments with the class "comment-item", so we need to narrow down
    for el in soup.find_all("li", {"class": "comment-item", "id": re.compile(r"c\d+")}):
        responses.append(get_response_from_li(url, el))

    return responses


[docs]class FAWMAnnouncer(Announcer):

    """FAWM Announcer

    FAWM (February Album Writing Month) is a website where every year thousands
    of people participate in a challenge to write 14 songs in 28 days during the
    month of February.

    Every year the website reboots, and comments are lost, making it particularly
    suited for Nefelibata.

    In order to publish a song, the post should be structured like this:

        # Liner Notes

        <notes about the song>

        # Lyrics

        <pre>
            <lyrics go here>
        </pre>

    Everything else is ignored, so you can add an audio player outside of those
    sections.

    You also need to have an MP3 in your post directory, if you want to have it
    published as a demo.

    """

    id = "fawm"
    name = "FAWM"
    url_header = "fawm-url"

    def __init__(
        self, root: Path, config: Dict[str, Any], username: str, password: str,
    ):
        super().__init__(root, config)

        self.username = username
        self.password = password

[docs]    def announce(self, post: Post) -> str:
        """Publish the song to FAWM.
        """
        _logger.info("Creating new song on FAWM")

        params = extract_params(post, self.root, self.config)
        response = requests.post(
            "https://fawm.org/songs/add",
            data=params,
            auth=(self.username, self.password),
        )
        url = response.headers["Location"]

        _logger.info("Success!")

        return url

[docs]    def collect(self, post: Post) -> List[Response]:
        _logger.info("Collecting comments from FAWM")

        url = post.parsed[self.url_header]
        responses = get_comments_from_fawm_page(url, self.username, self.password)

        _logger.info("Success!")

        return responses
Source code for nefelibata.announcers.fawm

nefelibata

Navigation

Related Topics