From 3b9b559803ca242570de0c3e9162d4dc64b66bdd Mon Sep 17 00:00:00 2001 From: egeakman Date: Fri, 24 May 2024 03:53:51 +0300 Subject: [PATCH 01/27] lots of stuff --- Dockerfile | 4 +- data/public/{europython-2024 => }/.gitignore | 0 data/raw/{europython-2024 => }/.gitignore | 0 deploy/hosts.ini | 2 +- deploy/nginx.conf.j2 | 4 +- src/config.py | 2 +- src/download.py | 3 + src/transform.py | 174 +++++++++++++++++-- test.json | 0 9 files changed, 165 insertions(+), 24 deletions(-) rename data/public/{europython-2024 => }/.gitignore (100%) rename data/raw/{europython-2024 => }/.gitignore (100%) create mode 100644 test.json diff --git a/Dockerfile b/Dockerfile index a9b7831..b8bce97 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,8 +8,8 @@ RUN pip install -r requirements.txt COPY src/ ./src/ COPY Makefile . -RUN mkdir -p /srv/data/raw/europython-2024/ -RUN mkdir -p /srv/data/public/europython-2024/ +RUN mkdir -p /srv/data/raw/europython-2023/ +RUN mkdir -p /srv/data/public/europython-2023/ CMD ["make", "all"] diff --git a/data/public/europython-2024/.gitignore b/data/public/.gitignore similarity index 100% rename from data/public/europython-2024/.gitignore rename to data/public/.gitignore diff --git a/data/raw/europython-2024/.gitignore b/data/raw/.gitignore similarity index 100% rename from data/raw/europython-2024/.gitignore rename to data/raw/.gitignore diff --git a/deploy/hosts.ini b/deploy/hosts.ini index 9154b04..6e9414d 100644 --- a/deploy/hosts.ini +++ b/deploy/hosts.ini @@ -1,2 +1,2 @@ [hetzner] -49.13.23.199 ansible_user=root domain_name=programapi24.europython.eu ansible_ssh_common_args='-o StrictHostKeyChecking=no' +49.13.23.199 ansible_user=root domain_name=programapi23.europython.eu ansible_ssh_common_args='-o StrictHostKeyChecking=no' diff --git a/deploy/nginx.conf.j2 b/deploy/nginx.conf.j2 index 77c8958..b1f62d0 100644 --- a/deploy/nginx.conf.j2 +++ b/deploy/nginx.conf.j2 @@ -75,8 +75,8 @@ http { include /etc/letsencrypt/options-ssl-nginx.conf; ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; - location /2024 { - alias /usr/share/static/europython-2024; + location /2023 { + alias /usr/share/static/europython-2023; } } } diff --git a/src/config.py b/src/config.py index 9a3944f..1e9a27b 100644 --- a/src/config.py +++ b/src/config.py @@ -3,7 +3,7 @@ class Config: - event = "europython-2024" + event = "europython-2023" project_root = Path(__file__).resolve().parents[1] raw_path = Path(f"{project_root}/data/raw/{event}") public_path = Path(f"{project_root}/data/public/{event}") diff --git a/src/download.py b/src/download.py index 9afd165..901b2f9 100644 --- a/src/download.py +++ b/src/download.py @@ -19,6 +19,9 @@ "speakers?questions=all", ] +if not Config.raw_path.exists(): + Config.raw_path.mkdir(parents=True) + for resource in resources: url = base_url + f"{resource}" diff --git a/src/transform.py b/src/transform.py index 6c42b9e..cfd5aa7 100644 --- a/src/transform.py +++ b/src/transform.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json from datetime import datetime @@ -8,9 +10,9 @@ class SpeakerQuestion: - affiliation = "Company / Organization / Educational Institution" - homepage = "Social (Homepage)" - twitter = "Social (X/Twitter)" + affiliation = "Company / Institute" + homepage = "Homepage" + twitter = "Twitter / Mastodon handle(s)" mastodon = "Social (Mastodon)" @@ -105,14 +107,16 @@ class PretalxSubmission(BaseModel): # This is embedding a slot inside a submission for easier lookup later room: str | None = None - start: datetime | None = None - end: datetime | None = None + start: datetime | str | None = None + end: datetime | str | None = None # TODO: once we have schedule data then we can prefill those in the code here + # These are added after the model is created talks_in_parallel: list[str] | None = None talks_after: list[str] | None = None - next_talk_code: str | None = None - prev_talk_code: str | None = None + talks_before: list[str] | None = None + next_talk: str | None = None + prev_talk: str | None = None website_url: str | None = None @@ -153,9 +157,19 @@ def extract(cls, values): if isinstance(values["duration"], int): values["duration"] = str(values["duration"]) + if cls.is_publishable and values["slot"]: + slot = values["slot"] + values["room"] = slot["room"]["en"] if slot["room"] else None + values["start"] = ( + datetime.fromisoformat(slot["start"]) if slot["start"] else None + ) + values["end"] = datetime.fromisoformat(slot["end"]) if slot["end"] else None + slug = slugify(values["title"]) values["slug"] = slug - values["website_url"] = f"https://ep2024.europython.eu/session/{slug}" + values["website_url"] = ( + f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{slug}" + ) return values @@ -171,6 +185,120 @@ def is_confirmed(self): def is_publishable(self): return self.is_accepted or self.is_confirmed + @staticmethod + def set_talks_in_parallel( + submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] + ): + parallel = [] + for session in all_sessions.values(): + if ( + session.code == submission.code + or session.start is None + or submission.start is None + ): + continue + + # If they intersect, they are in parallel + if session.start < submission.end and session.end > submission.start: + parallel.append(session.code) + + submission.talks_in_parallel = parallel + + @staticmethod + def set_talks_after( + submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] + ): + if submission.start is None: + return + + # Sort sessions based on start time, early first + all_sessions_sorted = sorted( + all_sessions.values(), key=lambda x: (x.start is None, x.start) + ) + + # Filter out sessions + remaining_sessions = [ + session + for session in all_sessions_sorted + if session.start is not None + and session.start >= submission.end + and session.code not in submission.talks_in_parallel + and session.code != submission.code + and submission.start.day == session.start.day + and not submission.submission_type + == session.submission_type + == "Announcements" + ] + + # Add sessions to the list if they are in different rooms + seen_rooms = set() + unique_sessions = [] + + for session in remaining_sessions: + if session.room not in seen_rooms: + unique_sessions.append(session) + seen_rooms.add(session.room) + + # If there is a keynote next, only show that + if any(s.submission_type == "Keynote" for s in unique_sessions): + unique_sessions = [ + s for s in unique_sessions if s.submission_type == "Keynote" + ] + + # Set the next talks in all rooms + submission.talks_after = [session.code for session in unique_sessions] + + # Set the next talk in the same room + for session in unique_sessions: + if session.room == submission.room: + submission.next_talk = session.code + break + + @staticmethod + def set_talks_before( + submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] + ): + if submission.start is None: + return + + # Sort sessions based on start time, late first + all_sessions_sorted = sorted( + all_sessions.values(), + key=lambda x: (x.start is None, x.start), + reverse=True, + ) + + remaining_sessions = [ + session + for session in all_sessions_sorted + if session.start is not None + and session.code not in submission.talks_in_parallel + and session.start <= submission.start + and session.code != submission.code + and submission.start.day == session.start.day + and session.submission_type != "Announcements" + ] + + seen_rooms = set() + unique_sessions = [] + + for session in remaining_sessions: + if session.room not in seen_rooms: + unique_sessions.append(session) + seen_rooms.add(session.room) + + submission.talks_before = [session.code for session in unique_sessions] + + for session in unique_sessions: + if session.room == submission.room: + submission.prev_talk = session.code + break + + def model_dump(self): + self.start = self.start.isoformat() if self.start else None + self.end = self.end.isoformat() if self.end else None + return super().model_dump() + def parse_submissions() -> list[PretalxSubmission]: """ @@ -209,20 +337,22 @@ def publishable_speakers(accepted_proposals: set[str]) -> dict[str, PretalxSpeak return output -def save_publishable_sessions(): +def save_publishable_sessions(publishable: dict[str, PretalxSubmission]): path = Config.public_path / "sessions.json" - publishable = publishable_submissions() + for sub in publishable.values(): + PretalxSubmission.set_talks_in_parallel(sub, publishable) + PretalxSubmission.set_talks_after(sub, publishable) + PretalxSubmission.set_talks_before(sub, publishable) data = {k: v.model_dump() for k, v in publishable.items()} with open(path, "w") as fd: json.dump(data, fd, indent=2) -def save_publishable_speakers(): +def save_publishable_speakers(publishable: dict[str, PretalxSubmission]): path = Config.public_path / "speakers.json" - publishable = publishable_submissions() speakers = publishable_speakers(publishable.keys()) data = {k: v.model_dump() for k, v in speakers.items()} @@ -230,12 +360,20 @@ def save_publishable_speakers(): json.dump(data, fd, indent=2) +def save_all(): + if not Config.public_path.exists(): + Config.public_path.mkdir(parents=True) + + publishable = publishable_submissions() + save_publishable_sessions(publishable) + save_publishable_speakers(publishable) + + if __name__ == "__main__": - print("Checking for duplicate slugs...") - assert len(set(s.slug for s in publishable_submissions().values())) == len( - publishable_submissions() - ) + # print("Checking for duplicate slugs...") + # assert len(set(s.slug for s in publishable_submissions().values())) == len( + # publishable_submissions() + # ) print("Saving publishable data...") - save_publishable_sessions() - save_publishable_speakers() + save_all() print("Done") diff --git a/test.json b/test.json new file mode 100644 index 0000000..e69de29 From a3db579a575d5b5feae49175274f1e50e5a419e3 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sat, 25 May 2024 22:21:56 +0300 Subject: [PATCH 02/27] port funcs to 2024 --- Dockerfile | 4 ++-- data/examples/output/sessions.json | 10 ++++++---- deploy/nginx.conf.j2 | 4 ++-- src/config.py | 2 +- src/transform.py | 14 +++++++------- test.json | 0 6 files changed, 18 insertions(+), 16 deletions(-) delete mode 100644 test.json diff --git a/Dockerfile b/Dockerfile index b8bce97..a9b7831 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,8 +8,8 @@ RUN pip install -r requirements.txt COPY src/ ./src/ COPY Makefile . -RUN mkdir -p /srv/data/raw/europython-2023/ -RUN mkdir -p /srv/data/public/europython-2023/ +RUN mkdir -p /srv/data/raw/europython-2024/ +RUN mkdir -p /srv/data/public/europython-2024/ CMD ["make", "all"] diff --git a/data/examples/output/sessions.json b/data/examples/output/sessions.json index a5a8467..457a443 100644 --- a/data/examples/output/sessions.json +++ b/data/examples/output/sessions.json @@ -19,8 +19,9 @@ "end": null, "talks_in_parallel": null, "talks_after": null, - "next_talk_code": null, - "prev_talk_code": null, + "talks_before": null, + "next_talk": null, + "prev_talk": null, "website_url": "https://ep2024.europython.eu/session/this-is-a-test-talk-from-a-test-speaker-about-a-test-topic" }, "B8CD4F": { @@ -43,8 +44,9 @@ "end": null, "talks_in_parallel": null, "talks_after": null, - "next_talk_code": null, - "prev_talk_code": null, + "talks_before": null, + "next_talk": null, + "prev_talk": null, "website_url": "https://ep2024.europython.eu/session/a-talk-with-shorter-title" } } diff --git a/deploy/nginx.conf.j2 b/deploy/nginx.conf.j2 index b1f62d0..1224283 100644 --- a/deploy/nginx.conf.j2 +++ b/deploy/nginx.conf.j2 @@ -75,8 +75,8 @@ http { include /etc/letsencrypt/options-ssl-nginx.conf; ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; - location /2023 { - alias /usr/share/static/europython-2023; + location /2024{ + alias /usr/share/static/europython-2024; } } } diff --git a/src/config.py b/src/config.py index 1e9a27b..9a3944f 100644 --- a/src/config.py +++ b/src/config.py @@ -3,7 +3,7 @@ class Config: - event = "europython-2023" + event = "europython-2024" project_root = Path(__file__).resolve().parents[1] raw_path = Path(f"{project_root}/data/raw/{event}") public_path = Path(f"{project_root}/data/public/{event}") diff --git a/src/transform.py b/src/transform.py index cfd5aa7..b385d5b 100644 --- a/src/transform.py +++ b/src/transform.py @@ -10,9 +10,9 @@ class SpeakerQuestion: - affiliation = "Company / Institute" - homepage = "Homepage" - twitter = "Twitter / Mastodon handle(s)" + affiliation = "Company / Organization / Educational Institution" + homepage = "Social (Homepage)" + twitter = "Social (X/Twitter)" mastodon = "Social (Mastodon)" @@ -370,10 +370,10 @@ def save_all(): if __name__ == "__main__": - # print("Checking for duplicate slugs...") - # assert len(set(s.slug for s in publishable_submissions().values())) == len( - # publishable_submissions() - # ) + print("Checking for duplicate slugs...") + assert len(set(s.slug for s in publishable_submissions().values())) == len( + publishable_submissions() + ) print("Saving publishable data...") save_all() print("Done") diff --git a/test.json b/test.json deleted file mode 100644 index e69de29..0000000 From 0c50a627e290aa61ad56c84d36e780a3c5e8c6f7 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sat, 25 May 2024 23:31:45 +0300 Subject: [PATCH 03/27] update --- deploy/hosts.ini | 2 +- deploy/nginx.conf.j2 | 2 +- src/transform.py | 13 ++++--------- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/deploy/hosts.ini b/deploy/hosts.ini index 6e9414d..9154b04 100644 --- a/deploy/hosts.ini +++ b/deploy/hosts.ini @@ -1,2 +1,2 @@ [hetzner] -49.13.23.199 ansible_user=root domain_name=programapi23.europython.eu ansible_ssh_common_args='-o StrictHostKeyChecking=no' +49.13.23.199 ansible_user=root domain_name=programapi24.europython.eu ansible_ssh_common_args='-o StrictHostKeyChecking=no' diff --git a/deploy/nginx.conf.j2 b/deploy/nginx.conf.j2 index 1224283..77c8958 100644 --- a/deploy/nginx.conf.j2 +++ b/deploy/nginx.conf.j2 @@ -75,7 +75,7 @@ http { include /etc/letsencrypt/options-ssl-nginx.conf; ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; - location /2024{ + location /2024 { alias /usr/share/static/europython-2024; } } diff --git a/src/transform.py b/src/transform.py index b385d5b..66ca7f0 100644 --- a/src/transform.py +++ b/src/transform.py @@ -208,9 +208,6 @@ def set_talks_in_parallel( def set_talks_after( submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] ): - if submission.start is None: - return - # Sort sessions based on start time, early first all_sessions_sorted = sorted( all_sessions.values(), key=lambda x: (x.start is None, x.start) @@ -258,9 +255,6 @@ def set_talks_after( def set_talks_before( submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] ): - if submission.start is None: - return - # Sort sessions based on start time, late first all_sessions_sorted = sorted( all_sessions.values(), @@ -341,9 +335,10 @@ def save_publishable_sessions(publishable: dict[str, PretalxSubmission]): path = Config.public_path / "sessions.json" for sub in publishable.values(): - PretalxSubmission.set_talks_in_parallel(sub, publishable) - PretalxSubmission.set_talks_after(sub, publishable) - PretalxSubmission.set_talks_before(sub, publishable) + if sub.start is None: + PretalxSubmission.set_talks_in_parallel(sub, publishable) + PretalxSubmission.set_talks_after(sub, publishable) + PretalxSubmission.set_talks_before(sub, publishable) data = {k: v.model_dump() for k, v in publishable.items()} with open(path, "w") as fd: From 1d106e00adee96ed51596fd2249da816f3661ac0 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sat, 25 May 2024 23:55:49 +0300 Subject: [PATCH 04/27] oops + more readable + tell what event are we transforming --- src/transform.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/transform.py b/src/transform.py index 66ca7f0..ea3a46a 100644 --- a/src/transform.py +++ b/src/transform.py @@ -32,9 +32,9 @@ class SubmissionState: class PretalxAnswer(BaseModel): question_text: str answer_text: str - answer_file: str | None - submission_id: str | None - speaker_id: str | None + answer_file: str | None = None + submission_id: str | None = None + speaker_id: str | None = None @model_validator(mode="before") @classmethod @@ -50,8 +50,8 @@ def extract(cls, values): class PretalxSpeaker(BaseModel): code: str name: str - biography: str | None - avatar: str | None + biography: str | None = None + avatar: str | None = None slug: str answers: list[PretalxAnswer] = Field(..., exclude=True) submissions: list[str] @@ -95,7 +95,7 @@ class PretalxSubmission(BaseModel): speakers: list[str] # We only want the code, not the full info submission_type: str slug: str - track: str | None + track: str | None = None state: str abstract: str answers: list[PretalxAnswer] = Field(..., exclude=True) @@ -159,11 +159,13 @@ def extract(cls, values): if cls.is_publishable and values["slot"]: slot = values["slot"] - values["room"] = slot["room"]["en"] if slot["room"] else None - values["start"] = ( - datetime.fromisoformat(slot["start"]) if slot["start"] else None - ) - values["end"] = datetime.fromisoformat(slot["end"]) if slot["end"] else None + + if isinstance(slot["room"], dict): + values["room"] = slot["room"]["en"] + + if slot["start"]: + values["start"] = datetime.fromisoformat(slot["start"]) + values["end"] = datetime.fromisoformat(slot["end"]) slug = slugify(values["title"]) values["slug"] = slug @@ -335,7 +337,7 @@ def save_publishable_sessions(publishable: dict[str, PretalxSubmission]): path = Config.public_path / "sessions.json" for sub in publishable.values(): - if sub.start is None: + if sub.start is not None: PretalxSubmission.set_talks_in_parallel(sub, publishable) PretalxSubmission.set_talks_after(sub, publishable) PretalxSubmission.set_talks_before(sub, publishable) @@ -365,6 +367,7 @@ def save_all(): if __name__ == "__main__": + print(f"Transforming {Config.event} data...") print("Checking for duplicate slugs...") assert len(set(s.slug for s in publishable_submissions().values())) == len( publishable_submissions() From 96111abb4fe2d714e6233efa252d3e9afaefd187 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 26 May 2024 01:23:02 +0300 Subject: [PATCH 05/27] better slug dupe check + optimize --- Makefile | 5 ++++- src/transform.py | 45 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 62f2b4b..05bc266 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,11 @@ download: python -m src.download transform: +ifeq ($(ALLOW_DUPES), true) + python -m src.transform --allow-dupes +else python -m src.transform - +endif all: download transform diff --git a/src/transform.py b/src/transform.py index ea3a46a..70e60c2 100644 --- a/src/transform.py +++ b/src/transform.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import sys from datetime import datetime from pydantic import BaseModel, Field, model_validator @@ -357,21 +358,49 @@ def save_publishable_speakers(publishable: dict[str, PretalxSubmission]): json.dump(data, fd, indent=2) -def save_all(): +def save_all(all_sessions: dict[str, PretalxSubmission]): if not Config.public_path.exists(): Config.public_path.mkdir(parents=True) - publishable = publishable_submissions() - save_publishable_sessions(publishable) - save_publishable_speakers(publishable) + save_publishable_sessions(all_sessions) + save_publishable_speakers(all_sessions) + + +def check_duplicate_slugs(all_sessions: dict[str, PretalxSubmission]) -> bool: + all_speakers = publishable_speakers(all_sessions.keys()) + + session_slugs = [s.slug for s in all_sessions.values()] + speaker_slugs = [s.slug for s in all_speakers.values()] + + session_duplicates = [ + slug for slug in set(session_slugs) if session_slugs.count(slug) > 1 + ] + speaker_duplicates = [ + slug for slug in set(speaker_slugs) if speaker_slugs.count(slug) > 1 + ] + + if session_duplicates or speaker_duplicates: + print("Found duplicate slugs:") + for slug in session_duplicates: + print(f"Session: {slug}") + for slug in speaker_duplicates: + print(f"Speaker: {slug}") + return False + return True if __name__ == "__main__": print(f"Transforming {Config.event} data...") print("Checking for duplicate slugs...") - assert len(set(s.slug for s in publishable_submissions().values())) == len( - publishable_submissions() - ) + + all_sessions = publishable_submissions() + + if not check_duplicate_slugs(all_sessions) and ( + len(sys.argv) <= 1 or sys.argv[1] != "--allow-dupes" + ): + print("Exiting. Use ``make transform ALLOW_DUPES=true`` to continue.") + sys.exit(1) + print("Saving publishable data...") - save_all() + save_all(all_sessions) print("Done") From 08bcbde2ba90d4fca82d56c15167dcebcd024878 Mon Sep 17 00:00:00 2001 From: egeakman Date: Wed, 29 May 2024 23:36:37 +0300 Subject: [PATCH 06/27] add documentation --- README.md | 33 ++++++++++- data/examples/README.md | 123 ++++++++++++++++++++++++++++++++++++++++ src/transform.py | 4 +- 3 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 data/examples/README.md diff --git a/README.md b/README.md index 4e26c29..9293604 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,33 @@ # programapi -Program API + +This project processes, saves, and serves the static JSON files containing details of accepted speakers and submissions via an API. + +What this project does step-by-step: + +1. Downloads the Pretalx speaker and submission data, and saves it as JSON files. +2. Transforms the JSON files into a format that is easier to work with and OK to serve publicly. This includes removing unnecessary/private fields, and adding new fields. +3. Serves the JSON files via an API. + +## Installation + +1. Clone the repository. +2. Install the dependency management tool: ``make deps/pre`` +3. Install the dependencies: ``make deps/install`` +4. Set up ``pre-commit``: ``make pre-commit`` + +## Usage + +- Run the whole process: ``make all`` +- Run only the download process: ``make download`` +- Run only the transformation process: ``make transform`` + +**Note:** Don't forget to set the ``PRETALX_TOKEN`` environment variable before running the download process. And please don't make too many requests to the Pretalx API, it might get angry 🤪 + +## API + +The API is served at ``programapi24.europython.eu``. It has two endpoints: + +- ``/speakers.json``: Returns the list of confirmed speakers. +- ``/sessions.json``: Returns the list of confirmed sessions. + +**Note:** See [this page](data/examples/README.md) for the explanations of the fields in the returned JSON files. diff --git a/data/examples/README.md b/data/examples/README.md new file mode 100644 index 0000000..8f20161 --- /dev/null +++ b/data/examples/README.md @@ -0,0 +1,123 @@ +# Explaining the data + +**Note:** Some of the fields may be `null` or empty (`""`). + +## `sessions.json` + +
+Example session data JSON + +```json +{ + "A1B2C3": { + "code": "A1B2C3", + "title": "Example talk", + "speakers": [ + "B4D5E6", + ... + ], + "submission_type": "Talk", + "slug": "example-talk", + "track": "Some Track", + "state": "confirmed", + "abstract": "This is an example talk. It is a great talk.", + "tweet": "This is an example talk.", + "duration": "60", + "level": "intermediate", + "delivery": "in-person", + "room": "South Hall 2A", + "start": "2024-07-10T14:00:00+02:00", + "end": "2024-07-10T15:00:00+02:00", + "talks_in_parallel": [ + "F7G8H9", + ... + ], + "talks_after": [ + "I0J1K2", + ... + ], + "talks_before": [ + "L3M4N5", + ... + ], + "next_talk": "O6P7Q8", + "prev_talk": "R9S0T1", + "website_url": "https://ep2024.europython.eu/session/example-talk/" + }, +} +``` +
+ +  + +The fields are as follows: + +| Key | Type | Notes | +|--------------------|-----------------------------------|---------------------------------------------------------------| +| `code` | `string` | Unique identifier for the session | +| `title` | `string` | Title of the session | +| `speakers` | `list[string]` | List of codes of the speakers | +| `submission_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | +| `slug` | `string` | URL-friendly version of the title | +| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | +| `state` | `string` | State of the session (e.g. confirmed, canceled, etc.) | +| `abstract` | `string` | Abstract of the session | +| `tweet` | `string` | Tweet-length description of the session | +| `duration` | `string` | Duration of the session in minutes | +| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | +| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | +| `room` | `string` \| `null` | Room where the session will be held | +| `start` | `datetime (ISO format)` \| `null` | Start time of the session | +| `end` | `datetime (ISO format)` \| `null` | End time of the session | +| `talks_in_parallel`| `list[string]` \| `null` | List of codes of sessions happening in parallel | +| `talks_after` | `list[string]` \| `null` | List of codes of sessions happening after this session | +| `talks_before` | `list[string]` \| `null` | List of codes of sessions happening before this session | +| `next_talk` | `string` \| `null` | Code of the next session in the same room | +| `prev_talk` | `string` \| `null` | Code of the previous session in the same room | +| `website_url` | `string` | URL of the session on the conference website | + +  + +## `speakers.json` + +
+Example speaker data JSON + +```json +{ + "B4D5E6": { + "code": "B4D5E6", + "name": "A Speaker", + "biography": "Some bio", + "avatar": "https://pretalx.com/media/avatars/picture.jpg", + "slug": "a-speaker", + "submissions": [ + "A1B2C3", + ... + ], + "affiliation": "A Company", + "homepage": "https://example.com", + "twitter": "example", + "mastodon": "example" + }, + ... +} +``` +
+ +  + +The fields are as follows: + +| Key | Type | Notes | +|----------------|--------------------|-----------------------------------------------------------------------| +| `code` | `string` | Unique identifier for the speaker | +| `name` | `string` | Name of the speaker | +| `biography` | `string` \| `null` | Biography of the speaker | +| `avatar` | `string` \| `null` | URL of the speaker's avatar | +| `slug` | `string` | URL-friendly version of the name | +| `submissions` | `list[string]` | List of codes of the sessions the speaker is speaking at | +| `affiliation` | `string` \| `null` | Affiliation of the speaker | +| `homepage` | `string` \| `null` | URL of the speaker's homepage | +| `twitter` | `string` \| `null` | Twitter handle of the speaker | +| `mastodon` | `string` \| `null` | Mastodon handle of the speaker | diff --git a/src/transform.py b/src/transform.py index 70e60c2..105fecb 100644 --- a/src/transform.py +++ b/src/transform.py @@ -104,7 +104,7 @@ class PretalxSubmission(BaseModel): duration: str level: str = "" - delivery: str | None = "" + delivery: str = "" # This is embedding a slot inside a submission for easier lookup later room: str | None = None @@ -119,7 +119,7 @@ class PretalxSubmission(BaseModel): next_talk: str | None = None prev_talk: str | None = None - website_url: str | None = None + website_url: str @model_validator(mode="before") @classmethod From 39a96e3c863a829799cce4b9aebbcbd482a5dc74 Mon Sep 17 00:00:00 2001 From: Ege Akman Date: Wed, 29 May 2024 23:41:10 +0300 Subject: [PATCH 07/27] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9293604..7165733 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # programapi -This project processes, saves, and serves the static JSON files containing details of accepted speakers and submissions via an API. +This project downloads, processes, saves, and serves the static JSON files containing details of accepted speakers and submissions via an API. What this project does step-by-step: @@ -25,7 +25,7 @@ What this project does step-by-step: ## API -The API is served at ``programapi24.europython.eu``. It has two endpoints: +The API is served at ``programapi24.europython.eu/2024``. It has two endpoints (for now): - ``/speakers.json``: Returns the list of confirmed speakers. - ``/sessions.json``: Returns the list of confirmed sessions. From ecb1cc30fad0396bc272daa3765f53463e6a9076 Mon Sep 17 00:00:00 2001 From: Ege Akman Date: Wed, 29 May 2024 23:42:16 +0300 Subject: [PATCH 08/27] Update README.md --- data/examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/examples/README.md b/data/examples/README.md index 8f20161..d5673cf 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -1,4 +1,4 @@ -# Explaining the data +# Explaining the output data **Note:** Some of the fields may be `null` or empty (`""`). From 4276fa56a2789b3832dad07569a4f5c7a165151b Mon Sep 17 00:00:00 2001 From: egeakman Date: Wed, 29 May 2024 23:49:19 +0300 Subject: [PATCH 09/27] add configuration to readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7165733..210f338 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,10 @@ What this project does step-by-step: 3. Install the dependencies: ``make deps/install`` 4. Set up ``pre-commit``: ``make pre-commit`` +## Configuration + +You can change the event in the [``config.py``](src/config.py) file. It is set to ``europython-2024`` right now. + ## Usage - Run the whole process: ``make all`` From aba49d6bc8daa82d5dbdae5cb4acc01d216cab08 Mon Sep 17 00:00:00 2001 From: egeakman Date: Thu, 30 May 2024 00:26:07 +0300 Subject: [PATCH 10/27] Use model_dump_json to be able to serialize datetime --- src/transform.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/transform.py b/src/transform.py index 105fecb..baa826b 100644 --- a/src/transform.py +++ b/src/transform.py @@ -108,8 +108,8 @@ class PretalxSubmission(BaseModel): # This is embedding a slot inside a submission for easier lookup later room: str | None = None - start: datetime | str | None = None - end: datetime | str | None = None + start: datetime | None = None + end: datetime | None = None # TODO: once we have schedule data then we can prefill those in the code here # These are added after the model is created @@ -291,11 +291,6 @@ def set_talks_before( submission.prev_talk = session.code break - def model_dump(self): - self.start = self.start.isoformat() if self.start else None - self.end = self.end.isoformat() if self.end else None - return super().model_dump() - def parse_submissions() -> list[PretalxSubmission]: """ @@ -343,7 +338,7 @@ def save_publishable_sessions(publishable: dict[str, PretalxSubmission]): PretalxSubmission.set_talks_after(sub, publishable) PretalxSubmission.set_talks_before(sub, publishable) - data = {k: v.model_dump() for k, v in publishable.items()} + data = {k: json.loads(v.model_dump_json()) for k, v in publishable.items()} with open(path, "w") as fd: json.dump(data, fd, indent=2) From 4e433ec512bd04daa26fcd62ee10208d8e207fe6 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sat, 1 Jun 2024 01:59:26 +0300 Subject: [PATCH 11/27] .env + documentation + extract more socials --- README.md | 6 +- data/examples/README.md | 60 +++++++------ data/examples/output/speakers.json | 6 +- data/examples/pretalx/speakers.json | 2 +- requirements.in | 1 + requirements.txt | 11 ++- src/config.py | 14 ++- src/transform.py | 135 ++++++++++++++++++---------- 8 files changed, 147 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 210f338..08ffc41 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ This project downloads, processes, saves, and serves the static JSON files containing details of accepted speakers and submissions via an API. -What this project does step-by-step: +Used by the EuroPython 2024 website and the Discord bot. + +**What this project does step-by-step:** 1. Downloads the Pretalx speaker and submission data, and saves it as JSON files. 2. Transforms the JSON files into a format that is easier to work with and OK to serve publicly. This includes removing unnecessary/private fields, and adding new fields. @@ -25,7 +27,7 @@ You can change the event in the [``config.py``](src/config.py) file. It is set t - Run only the download process: ``make download`` - Run only the transformation process: ``make transform`` -**Note:** Don't forget to set the ``PRETALX_TOKEN`` environment variable before running the download process. And please don't make too many requests to the Pretalx API, it might get angry 🤪 +**Note:** Don't forget to set ``PRETALX_TOKEN`` in your ``.env`` file at the root of the project. And please don't make too many requests to the Pretalx API, it might get angry 🤪 ## API diff --git a/data/examples/README.md b/data/examples/README.md index d5673cf..17e6155 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -52,29 +52,29 @@ The fields are as follows: -| Key | Type | Notes | -|--------------------|-----------------------------------|---------------------------------------------------------------| -| `code` | `string` | Unique identifier for the session | -| `title` | `string` | Title of the session | -| `speakers` | `list[string]` | List of codes of the speakers | -| `submission_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | -| `slug` | `string` | URL-friendly version of the title | -| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | -| `state` | `string` | State of the session (e.g. confirmed, canceled, etc.) | -| `abstract` | `string` | Abstract of the session | -| `tweet` | `string` | Tweet-length description of the session | -| `duration` | `string` | Duration of the session in minutes | -| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | -| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | -| `room` | `string` \| `null` | Room where the session will be held | -| `start` | `datetime (ISO format)` \| `null` | Start time of the session | -| `end` | `datetime (ISO format)` \| `null` | End time of the session | -| `talks_in_parallel`| `list[string]` \| `null` | List of codes of sessions happening in parallel | -| `talks_after` | `list[string]` \| `null` | List of codes of sessions happening after this session | -| `talks_before` | `list[string]` \| `null` | List of codes of sessions happening before this session | -| `next_talk` | `string` \| `null` | Code of the next session in the same room | -| `prev_talk` | `string` \| `null` | Code of the previous session in the same room | -| `website_url` | `string` | URL of the session on the conference website | +| Key | Type | Notes | +|---------------------|-----------------------------------|---------------------------------------------------------------| +| `code` | `string` | Unique identifier for the session | +| `title` | `string` | Title of the session | +| `speakers` | `list[string]` | List of codes of the speakers | +| `submission_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | +| `slug` | `string` | URL-friendly version of the title | +| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | +| `state` | `string` | State of the session (e.g. confirmed, canceled, etc.) | +| `abstract` | `string` | Abstract of the session | +| `tweet` | `string` | Tweet-length description of the session | +| `duration` | `string` | Duration of the session in minutes | +| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | +| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | +| `room` | `string` \| `null` | Room where the session will be held | +| `start` | `datetime (ISO format)` \| `null` | Start time of the session | +| `end` | `datetime (ISO format)` \| `null` | End time of the session | +| `talks_in_parallel` | `list[string]` \| `null` | List of codes of sessions happening in parallel | +| `talks_after` | `list[string]` \| `null` | List of codes of sessions happening after this session | +| `talks_before` | `list[string]` \| `null` | List of codes of sessions happening before this session | +| `next_talk` | `string` \| `null` | Code of the next session in the same room | +| `prev_talk` | `string` \| `null` | Code of the previous session in the same room | +| `website_url` | `string` | URL of the session on the conference website |   @@ -97,8 +97,10 @@ The fields are as follows: ], "affiliation": "A Company", "homepage": "https://example.com", - "twitter": "example", - "mastodon": "example" + "gitx_url": "https://github.com/B4D5E6", + "linkedin_url": "https://www.linkedin.com/in/B4D5E6", + "mastodon_url": "https://mastodon.social/@B4D5E6", + "twitter_url": "https://x.com/B4D5E6" }, ... } @@ -114,10 +116,12 @@ The fields are as follows: | `code` | `string` | Unique identifier for the speaker | | `name` | `string` | Name of the speaker | | `biography` | `string` \| `null` | Biography of the speaker | -| `avatar` | `string` \| `null` | URL of the speaker's avatar | +| `avatar` | `string` | URL of the speaker's avatar | | `slug` | `string` | URL-friendly version of the name | | `submissions` | `list[string]` | List of codes of the sessions the speaker is speaking at | | `affiliation` | `string` \| `null` | Affiliation of the speaker | | `homepage` | `string` \| `null` | URL of the speaker's homepage | -| `twitter` | `string` \| `null` | Twitter handle of the speaker | -| `mastodon` | `string` \| `null` | Mastodon handle of the speaker | +| `gitx_url` | `string` \| `null` | URL of the speaker's GitHub/GitLab/etc. profile | +| `linkedin_url` | `string` \| `null` | URL of the speaker's LinkedIn profile | +| `twitter_url` | `string` \| `null` | URL of the speaker's Twitter profile | +| `mastodon_url` | `string` \| `null` | URL of the speaker's Mastodon profile | diff --git a/data/examples/output/speakers.json b/data/examples/output/speakers.json index 23c45a6..66925e6 100644 --- a/data/examples/output/speakers.json +++ b/data/examples/output/speakers.json @@ -8,7 +8,9 @@ "submissions": ["A8CD3F"], "affiliation": "A Company", "homepage": null, - "twitter": null, - "mastodon": null + "gitx_url": "https://github.com/F3DC8A", + "linkedin_url": "https://www.linkedin.com/in/F3DC8A", + "mastodon_url": null, + "twitter_url": null } } diff --git a/data/examples/pretalx/speakers.json b/data/examples/pretalx/speakers.json index 73d5917..7c961a0 100644 --- a/data/examples/pretalx/speakers.json +++ b/data/examples/pretalx/speakers.json @@ -83,7 +83,7 @@ "en": "Social (LinkedIn)" } }, - "answer": "https://www.linkedin.com/in/F3DC8A/", + "answer": "https://www.linkedin.com/in/F3DC8A", "answer_file": null, "submission": null, "review": null, diff --git a/requirements.in b/requirements.in index 06779cd..14d29d5 100644 --- a/requirements.in +++ b/requirements.in @@ -4,5 +4,6 @@ pre-commit requests pydantic +python-dotenv python-slugify tqdm diff --git a/requirements.txt b/requirements.txt index 3741855..0cc46a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile # -annotated-types==0.6.0 +annotated-types==0.7.0 # via pydantic attrs==23.2.0 # via wmctrl @@ -26,7 +26,7 @@ idna==3.7 # via requests iniconfig==2.0.0 # via pytest -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit packaging==24.0 # via pytest @@ -48,6 +48,8 @@ pyrepl==0.9.0 # via fancycompleter pytest==8.2.1 # via -r requirements.in +python-dotenv==1.0.1 + # via -r requirements.in python-slugify==8.0.4 # via -r requirements.in pyyaml==6.0.1 @@ -58,7 +60,7 @@ text-unidecode==1.3 # via python-slugify tqdm==4.66.4 # via -r requirements.in -typing-extensions==4.11.0 +typing-extensions==4.12.0 # via # pydantic # pydantic-core @@ -68,6 +70,3 @@ virtualenv==20.26.2 # via pre-commit wmctrl==0.5 # via pdbpp - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/src/config.py b/src/config.py index 9a3944f..3a235a2 100644 --- a/src/config.py +++ b/src/config.py @@ -1,6 +1,8 @@ import os from pathlib import Path +from dotenv import load_dotenv + class Config: event = "europython-2024" @@ -8,6 +10,12 @@ class Config: raw_path = Path(f"{project_root}/data/raw/{event}") public_path = Path(f"{project_root}/data/public/{event}") - @staticmethod - def token(): - return os.environ["PRETALX_TOKEN"] + @classmethod + def token(cls) -> str: + dotenv_exists = load_dotenv(cls.project_root / ".env") + if (token := os.getenv("PRETALX_TOKEN")) and not dotenv_exists: + print("Please prefer .env file to store your token! It's more secure!") + return token + elif token is None: + raise Exception("Please set your token in .env file!") + return token diff --git a/src/transform.py b/src/transform.py index baa826b..9a71647 100644 --- a/src/transform.py +++ b/src/transform.py @@ -4,7 +4,7 @@ import sys from datetime import datetime -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator from slugify import slugify from src.config import Config @@ -15,6 +15,8 @@ class SpeakerQuestion: homepage = "Social (Homepage)" twitter = "Social (X/Twitter)" mastodon = "Social (Mastodon)" + linkedin = "Social (LinkedIn)" + gitx = "Social (Github/Gitlab)" class SubmissionQuestion: @@ -48,11 +50,24 @@ def extract(cls, values): return values +class PretalxSlot(BaseModel): + room: str | None = None + start: datetime | None = None + end: datetime | None = None + + @field_validator("room", mode="before") + @classmethod + def handle_localized(cls, v): + if isinstance(v, dict): + return v.get("en") + return v + + class PretalxSpeaker(BaseModel): code: str name: str biography: str | None = None - avatar: str | None = None + avatar: str slug: str answers: list[PretalxAnswer] = Field(..., exclude=True) submissions: list[str] @@ -60,12 +75,14 @@ class PretalxSpeaker(BaseModel): # Extracted affiliation: str | None = None homepage: str | None = None - twitter: str | None = None - mastodon: str | None = None + twitter_url: str | None = None + mastodon_url: str | None = None + linkedin_url: str | None = None + gitx_url: str | None = None @model_validator(mode="before") @classmethod - def extract(cls, values): + def extract(cls, values) -> dict: values["slug"] = slugify(values["name"]) answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] @@ -77,15 +94,47 @@ def extract(cls, values): if answer.question_text == SpeakerQuestion.homepage: values["homepage"] = answer.answer_text - # NOTE: in practice the format of the data here is different, - # depending on the speaker. We could fix this here by parsing the - # the answer_text to some standardised format (either @handle or - # https://twitter.com/handle url, etc) + # Handle handles (pun intended) if answer.question_text == SpeakerQuestion.twitter: - values["twitter"] = answer.answer_text + twitter_url = answer.answer_text.strip().split()[0] + if twitter_url.startswith("@"): + twitter_url = f"https://x.com/{twitter_url[1:]}" + elif not twitter_url.startswith(("https://", "http://", "www.")): + twitter_url = f"https://x.com/{twitter_url}" + else: + twitter_url = f"https://{twitter_url.removeprefix('https://').removeprefix('http://')}" + + values["twitter_url"] = twitter_url.split("?")[0] + + # If it's like @user@instance, we need to convert it to a URL if answer.question_text == SpeakerQuestion.mastodon: - values["mastodon"] = answer.answer_text + mastodon_url = answer.answer_text.strip().split()[0] + + if ( + not mastodon_url.startswith(("https://", "http://")) + and mastodon_url.count("@") == 2 + ): + mastodon_url = f"https://{mastodon_url.split('@')[2]}/@{mastodon_url.split('@')[1]}" + else: + mastodon_url = f"https://{mastodon_url.removeprefix('https://').removeprefix('http://')}" + + values["mastodon_url"] = mastodon_url.split("?")[0] + + if answer.question_text == SpeakerQuestion.linkedin: + linkedin_url = answer.answer_text.strip().split()[0] + + if linkedin_url.startswith("in/"): + linkedin_url = f"https://linkedin.com/{linkedin_url}" + elif not linkedin_url.startswith(("https://", "http://", "www.")): + linkedin_url = f"https://linkedin.com/in/{linkedin_url}" + else: + linkedin_url = f"https://{linkedin_url.removeprefix('https://').removeprefix('http://')}" + + values["linkedin_url"] = linkedin_url.split("?")[0] + + if answer.question_text == SpeakerQuestion.gitx: + values["gitx_url"] = answer.answer_text.strip().split()[0] return values @@ -100,6 +149,7 @@ class PretalxSubmission(BaseModel): state: str abstract: str answers: list[PretalxAnswer] = Field(..., exclude=True) + slot: PretalxSlot | None = Field(..., exclude=True) tweet: str = "" duration: str @@ -121,23 +171,24 @@ class PretalxSubmission(BaseModel): website_url: str - @model_validator(mode="before") + @field_validator("submission_type", "track", mode="before") @classmethod - def extract(cls, values): - # # SubmissionType and Track have localised names. For this project we - # # only care about their english versions, so we can extract them here - for field in ["submission_type", "track"]: - if values[field] is None: - continue - else: - # In 2024 some of those are localised, and some are not. - # Instead of figuring out why and fixing the data, there's this - # hack: - if isinstance(values[field], dict): - values[field] = values[field]["en"] + def handle_localized(cls, v) -> str: + if isinstance(v, dict): + return v.get("en") + return v - values["speakers"] = sorted([s["code"] for s in values["speakers"]]) + @field_validator("duration", mode="before") + @classmethod + def duration_to_string(cls, v) -> str: + if isinstance(v, int): + return str(v) + return v + @model_validator(mode="before") + @classmethod + def extract(cls, values) -> dict: + values["speakers"] = sorted([s["code"] for s in values["speakers"]]) answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] for answer in answers: @@ -154,19 +205,11 @@ def extract(cls, values): if answer.question_text == SubmissionQuestion.level: values["level"] = answer.answer_text.lower() - # Convert duration to string for model validation - if isinstance(values["duration"], int): - values["duration"] = str(values["duration"]) - - if cls.is_publishable and values["slot"]: - slot = values["slot"] - - if isinstance(slot["room"], dict): - values["room"] = slot["room"]["en"] - - if slot["start"]: - values["start"] = datetime.fromisoformat(slot["start"]) - values["end"] = datetime.fromisoformat(slot["end"]) + if values.get("slot"): + slot = PretalxSlot.model_validate(values["slot"]) + values["room"] = slot.room + values["start"] = slot.start + values["end"] = slot.end slug = slugify(values["title"]) values["slug"] = slug @@ -177,21 +220,21 @@ def extract(cls, values): return values @property - def is_accepted(self): + def is_accepted(self) -> bool: return self.state == SubmissionState.accepted @property - def is_confirmed(self): + def is_confirmed(self) -> bool: return self.state == SubmissionState.confirmed @property - def is_publishable(self): + def is_publishable(self) -> bool: return self.is_accepted or self.is_confirmed @staticmethod def set_talks_in_parallel( submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] - ): + ) -> None: parallel = [] for session in all_sessions.values(): if ( @@ -210,7 +253,7 @@ def set_talks_in_parallel( @staticmethod def set_talks_after( submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] - ): + ) -> None: # Sort sessions based on start time, early first all_sessions_sorted = sorted( all_sessions.values(), key=lambda x: (x.start is None, x.start) @@ -248,16 +291,16 @@ def set_talks_after( # Set the next talks in all rooms submission.talks_after = [session.code for session in unique_sessions] - # Set the next talk in the same room + # Set the next talk in the same room, or a keynote for session in unique_sessions: - if session.room == submission.room: + if session.room == submission.room or session.submission_type == "Keynote": submission.next_talk = session.code break @staticmethod def set_talks_before( submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] - ): + ) -> None: # Sort sessions based on start time, late first all_sessions_sorted = sorted( all_sessions.values(), From fcceb66f4bd32bf57068fc29bdadd4dbbc048706 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sat, 1 Jun 2024 19:32:36 +0300 Subject: [PATCH 12/27] exist_ok --- src/download.py | 3 +-- src/transform.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/download.py b/src/download.py index 901b2f9..0a8b20f 100644 --- a/src/download.py +++ b/src/download.py @@ -19,8 +19,7 @@ "speakers?questions=all", ] -if not Config.raw_path.exists(): - Config.raw_path.mkdir(parents=True) +Config.raw_path.mkdir(parents=True, exist_ok=True) for resource in resources: url = base_url + f"{resource}" diff --git a/src/transform.py b/src/transform.py index 9a71647..31588a9 100644 --- a/src/transform.py +++ b/src/transform.py @@ -397,8 +397,7 @@ def save_publishable_speakers(publishable: dict[str, PretalxSubmission]): def save_all(all_sessions: dict[str, PretalxSubmission]): - if not Config.public_path.exists(): - Config.public_path.mkdir(parents=True) + Config.public_path.mkdir(parents=True, exist_ok=True) save_publishable_sessions(all_sessions) save_publishable_speakers(all_sessions) From b6669716611e0211b700fdbe9baea6c17f90a31d Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 2 Jun 2024 00:32:40 +0300 Subject: [PATCH 13/27] url extraction functions --- .pre-commit-config.yaml | 1 - src/transform.py | 82 ++++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e4315c4..313dba6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,6 @@ repos: hooks: - id: ruff - id: ruff-format - args: ["--check"] - repo: local hooks: diff --git a/src/transform.py b/src/transform.py index 31588a9..51bc418 100644 --- a/src/transform.py +++ b/src/transform.py @@ -83,7 +83,42 @@ class PretalxSpeaker(BaseModel): @model_validator(mode="before") @classmethod def extract(cls, values) -> dict: - values["slug"] = slugify(values["name"]) + # Extract the twitter URL from the answer + def extract_twitter_url(text: str) -> str: + if text.startswith("@"): + twitter_url = f"https://x.com/{text[1:]}" + elif not text.startswith(("https://", "http://", "www.")): + twitter_url = f"https://x.com/{text}" + else: + twitter_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return twitter_url.split("?")[0] + + # If it's like @user@instance, we need to convert it to a URL + def extract_mastodon_url(text: str) -> str: + if not text.startswith(("https://", "http://")) and text.count("@") == 2: + mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" + else: + mastodon_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return mastodon_url.split("?")[0] + + # Extract the linkedin URL from the answer + def extract_linkedin_url(text: str) -> str: + if text.startswith("in/"): + linkedin_url = f"https://linkedin.com/{text}" + elif not text.startswith(("https://", "http://", "www.")): + linkedin_url = f"https://linkedin.com/in/{text}" + else: + linkedin_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return linkedin_url.split("?")[0] answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] @@ -94,48 +129,27 @@ def extract(cls, values) -> dict: if answer.question_text == SpeakerQuestion.homepage: values["homepage"] = answer.answer_text - # Handle handles (pun intended) if answer.question_text == SpeakerQuestion.twitter: - twitter_url = answer.answer_text.strip().split()[0] - - if twitter_url.startswith("@"): - twitter_url = f"https://x.com/{twitter_url[1:]}" - elif not twitter_url.startswith(("https://", "http://", "www.")): - twitter_url = f"https://x.com/{twitter_url}" - else: - twitter_url = f"https://{twitter_url.removeprefix('https://').removeprefix('http://')}" + values["twitter_url"] = extract_twitter_url( + answer.answer_text.strip().split()[0] + ) - values["twitter_url"] = twitter_url.split("?")[0] - - # If it's like @user@instance, we need to convert it to a URL if answer.question_text == SpeakerQuestion.mastodon: - mastodon_url = answer.answer_text.strip().split()[0] - - if ( - not mastodon_url.startswith(("https://", "http://")) - and mastodon_url.count("@") == 2 - ): - mastodon_url = f"https://{mastodon_url.split('@')[2]}/@{mastodon_url.split('@')[1]}" - else: - mastodon_url = f"https://{mastodon_url.removeprefix('https://').removeprefix('http://')}" - - values["mastodon_url"] = mastodon_url.split("?")[0] + values["mastodon_url"] = extract_mastodon_url( + answer.answer_text.strip().split()[0] + ) if answer.question_text == SpeakerQuestion.linkedin: - linkedin_url = answer.answer_text.strip().split()[0] - - if linkedin_url.startswith("in/"): - linkedin_url = f"https://linkedin.com/{linkedin_url}" - elif not linkedin_url.startswith(("https://", "http://", "www.")): - linkedin_url = f"https://linkedin.com/in/{linkedin_url}" - else: - linkedin_url = f"https://{linkedin_url.removeprefix('https://').removeprefix('http://')}" - - values["linkedin_url"] = linkedin_url.split("?")[0] + values["linkedin_url"] = extract_linkedin_url( + answer.answer_text.strip().split()[0] + ) if answer.question_text == SpeakerQuestion.gitx: values["gitx_url"] = answer.answer_text.strip().split()[0] + # Set the slug + values["slug"] = slugify(values["name"]) + return values From 5798b4b3f6a237a286b13fbcb378a52a11bec4c5 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 2 Jun 2024 04:58:04 +0300 Subject: [PATCH 14/27] Tried to put timings under a different model --- pyproject.toml | 2 + src/transform.py | 314 +++++++++++++++----------- tests/test_examples_are_up_to_date.py | 4 +- 3 files changed, 192 insertions(+), 128 deletions(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5d7bf33 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.isort] +profile = "black" diff --git a/src/transform.py b/src/transform.py index 51bc418..7f1221d 100644 --- a/src/transform.py +++ b/src/transform.py @@ -4,7 +4,7 @@ import sys from datetime import datetime -from pydantic import BaseModel, Field, field_validator, model_validator +from pydantic import BaseModel, Field, RootModel, field_validator, model_validator from slugify import slugify from src.config import Config @@ -63,6 +63,152 @@ def handle_localized(cls, v): return v +class TimingRelationship(BaseModel): + talks_in_parallel: list[str] + talks_after: list[str] + talks_before: list[str] + next_talk: str | None = None + prev_talk: str | None = None + + @model_validator(mode="before") + @classmethod + def compute(cls, values): + session = values["session"] + all_sessions = values["all_sessions"] + + talks_in_parallel = cls.compute_talks_in_parallel(session, all_sessions) + talks_after_data = cls.compute_talks_after( + session, all_sessions, talks_in_parallel + ) + talks_before_data = cls.compute_talks_before( + session, all_sessions, talks_in_parallel + ) + + values["talks_in_parallel"] = talks_in_parallel + values["talks_after"] = talks_after_data.get("talks_after") + values["talks_before"] = talks_before_data.get("talks_before") + values["next_talk"] = talks_after_data.get("next_talk") + values["prev_talk"] = talks_before_data.get("prev_talk") + + return values + + @staticmethod + def compute_talks_in_parallel( + session: PretalxSession, all_sessions: list[PretalxSession] + ) -> list[str]: + talks_parallel = [] + for other_session in all_sessions: + if ( + other_session.code == session.code + or other_session.start is None + or session.start is None + ): + continue + + # If they intersect, they are in parallel + if other_session.start < session.end and other_session.end > session.start: + talks_parallel.append(other_session.code) + + return talks_parallel + + @staticmethod + def compute_talks_after( + session: PretalxSession, + all_sessions: list[PretalxSession], + talks_in_parallel: list[str] = [], + ) -> dict[str, list[str] | str | None]: + # Sort sessions based on start time, early first + all_sessions_sorted = sorted( + all_sessions, key=lambda x: (x.start is None, x.start) + ) + + # Filter out sessions + remaining_sessions = [ + other_session + for other_session in all_sessions_sorted + if other_session.start is not None + and other_session.start >= session.end + and other_session.code not in talks_in_parallel + and other_session.code != session.code + and other_session.start.day == session.start.day + and not other_session.submission_type + == session.submission_type + == "Announcements" + ] + + # Add sessions to the list if they are in different rooms + seen_rooms = set() + unique_sessions = [] + + for other_session in remaining_sessions: + if other_session.room not in seen_rooms: + unique_sessions.append(other_session) + seen_rooms.add(other_session.room) + + # If there is a keynote next, only show that + if any(s.submission_type == "Keynote" for s in unique_sessions): + unique_sessions = [ + s for s in unique_sessions if s.submission_type == "Keynote" + ] + + # Set the next talks in all rooms + talks_after = [s.code for s in unique_sessions] + + # Set the next talk in the same room, or a keynote + next_talk = None + for other_session in unique_sessions: + if ( + other_session.room == session.room + or other_session.submission_type == "Keynote" + ): + next_talk = other_session.code + break + + return {"talks_after": talks_after, "next_talk": next_talk} + + @staticmethod + def compute_talks_before( + session: PretalxSession, + all_sessions: list[PretalxSession], + talks_in_parallel: list[str] = [], + ) -> dict[str, list[str] | str | None]: + # Sort sessions based on start time, late first + all_sessions_sorted = sorted( + all_sessions, + key=lambda x: (x.start is None, x.start), + reverse=True, + ) + + remaining_sessions = [ + other_session + for other_session in all_sessions_sorted + if other_session.start is not None + and other_session.code not in talks_in_parallel + and other_session.start <= session.start + and other_session.code != session.code + and other_session.start.day == session.start.day + and other_session.submission_type != "Announcements" + ] + + seen_rooms = set() + unique_sessions = [] + + for other_session in remaining_sessions: + if other_session.room not in seen_rooms: + unique_sessions.append(other_session) + seen_rooms.add(other_session.room) + + talks_before = [session.code for session in unique_sessions] + + prev_talk = None + for other_session in unique_sessions: + if other_session.room == session.room: + prev_talk = other_session.code + break + + return {"talks_before": talks_before, "prev_talk": prev_talk} + + class PretalxSpeaker(BaseModel): code: str name: str @@ -153,7 +299,37 @@ def extract_linkedin_url(text: str) -> str: return values -class PretalxSubmission(BaseModel): +class PretalxSubmissions(RootModel): + root: list[PretalxSession] + + @model_validator(mode="before") + @classmethod + def initiate(cls, root): + """ + Returns only the publishable sessions, and computes their timings + """ + publishable = [] + for submission in root: + sub = PretalxSession.model_validate(submission) + if sub.is_publishable: + publishable.append(sub) + + for session in publishable: + if session.start: + timings = TimingRelationship.model_validate( + dict(session=session, all_sessions=publishable) + ) + + session.talks_in_parallel = timings.talks_in_parallel + session.talks_after = timings.talks_after + session.talks_before = timings.talks_before + session.next_talk = timings.next_talk + session.prev_talk = timings.prev_talk + + return publishable + + +class PretalxSession(BaseModel): code: str title: str speakers: list[str] # We only want the code, not the full info @@ -175,8 +351,6 @@ class PretalxSubmission(BaseModel): start: datetime | None = None end: datetime | None = None - # TODO: once we have schedule data then we can prefill those in the code here - # These are added after the model is created talks_in_parallel: list[str] | None = None talks_after: list[str] | None = None talks_before: list[str] | None = None @@ -245,123 +419,21 @@ def is_confirmed(self) -> bool: def is_publishable(self) -> bool: return self.is_accepted or self.is_confirmed - @staticmethod - def set_talks_in_parallel( - submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] - ) -> None: - parallel = [] - for session in all_sessions.values(): - if ( - session.code == submission.code - or session.start is None - or submission.start is None - ): - continue - - # If they intersect, they are in parallel - if session.start < submission.end and session.end > submission.start: - parallel.append(session.code) - - submission.talks_in_parallel = parallel - - @staticmethod - def set_talks_after( - submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] - ) -> None: - # Sort sessions based on start time, early first - all_sessions_sorted = sorted( - all_sessions.values(), key=lambda x: (x.start is None, x.start) - ) - - # Filter out sessions - remaining_sessions = [ - session - for session in all_sessions_sorted - if session.start is not None - and session.start >= submission.end - and session.code not in submission.talks_in_parallel - and session.code != submission.code - and submission.start.day == session.start.day - and not submission.submission_type - == session.submission_type - == "Announcements" - ] - - # Add sessions to the list if they are in different rooms - seen_rooms = set() - unique_sessions = [] - for session in remaining_sessions: - if session.room not in seen_rooms: - unique_sessions.append(session) - seen_rooms.add(session.room) - - # If there is a keynote next, only show that - if any(s.submission_type == "Keynote" for s in unique_sessions): - unique_sessions = [ - s for s in unique_sessions if s.submission_type == "Keynote" - ] - - # Set the next talks in all rooms - submission.talks_after = [session.code for session in unique_sessions] - - # Set the next talk in the same room, or a keynote - for session in unique_sessions: - if session.room == submission.room or session.submission_type == "Keynote": - submission.next_talk = session.code - break - - @staticmethod - def set_talks_before( - submission: PretalxSubmission, all_sessions: dict[str, PretalxSubmission] - ) -> None: - # Sort sessions based on start time, late first - all_sessions_sorted = sorted( - all_sessions.values(), - key=lambda x: (x.start is None, x.start), - reverse=True, - ) - - remaining_sessions = [ - session - for session in all_sessions_sorted - if session.start is not None - and session.code not in submission.talks_in_parallel - and session.start <= submission.start - and session.code != submission.code - and submission.start.day == session.start.day - and session.submission_type != "Announcements" - ] - - seen_rooms = set() - unique_sessions = [] - - for session in remaining_sessions: - if session.room not in seen_rooms: - unique_sessions.append(session) - seen_rooms.add(session.room) - - submission.talks_before = [session.code for session in unique_sessions] - - for session in unique_sessions: - if session.room == submission.room: - submission.prev_talk = session.code - break - - -def parse_submissions() -> list[PretalxSubmission]: +def parse_publishable_submissions() -> list[PretalxSession]: """ - Returns only confirmed talks + Returns only publishable sessions """ with open(Config.raw_path / "submissions_latest.json") as fd: js = json.load(fd) - subs = [PretalxSubmission.model_validate(item) for item in js] + subs = PretalxSubmissions.model_validate(js).root + subs = {s.code: s for s in subs} return subs def parse_speakers() -> list[PretalxSpeaker]: """ - Returns only speakers with confirmed talks + Returns only speakers with publishable sessions """ with open(Config.raw_path / "speakers_latest.json") as fd: js = json.load(fd) @@ -369,10 +441,6 @@ def parse_speakers() -> list[PretalxSpeaker]: return speakers -def publishable_submissions() -> dict[str, PretalxSubmission]: - return {s.code: s for s in parse_submissions() if s.is_publishable} - - def publishable_speakers(accepted_proposals: set[str]) -> dict[str, PretalxSpeaker]: sp = parse_speakers() output = {} @@ -386,21 +454,15 @@ def publishable_speakers(accepted_proposals: set[str]) -> dict[str, PretalxSpeak return output -def save_publishable_sessions(publishable: dict[str, PretalxSubmission]): +def save_publishable_sessions(publishable: dict[str, PretalxSession]): path = Config.public_path / "sessions.json" - for sub in publishable.values(): - if sub.start is not None: - PretalxSubmission.set_talks_in_parallel(sub, publishable) - PretalxSubmission.set_talks_after(sub, publishable) - PretalxSubmission.set_talks_before(sub, publishable) - data = {k: json.loads(v.model_dump_json()) for k, v in publishable.items()} with open(path, "w") as fd: json.dump(data, fd, indent=2) -def save_publishable_speakers(publishable: dict[str, PretalxSubmission]): +def save_publishable_speakers(publishable: dict[str, PretalxSession]): path = Config.public_path / "speakers.json" speakers = publishable_speakers(publishable.keys()) @@ -410,14 +472,14 @@ def save_publishable_speakers(publishable: dict[str, PretalxSubmission]): json.dump(data, fd, indent=2) -def save_all(all_sessions: dict[str, PretalxSubmission]): +def save_all(all_sessions: dict[str, PretalxSession]): Config.public_path.mkdir(parents=True, exist_ok=True) save_publishable_sessions(all_sessions) save_publishable_speakers(all_sessions) -def check_duplicate_slugs(all_sessions: dict[str, PretalxSubmission]) -> bool: +def check_duplicate_slugs(all_sessions: dict[str, PretalxSession]) -> bool: all_speakers = publishable_speakers(all_sessions.keys()) session_slugs = [s.slug for s in all_sessions.values()] @@ -444,7 +506,7 @@ def check_duplicate_slugs(all_sessions: dict[str, PretalxSubmission]) -> bool: print(f"Transforming {Config.event} data...") print("Checking for duplicate slugs...") - all_sessions = publishable_submissions() + all_sessions = parse_publishable_submissions() if not check_duplicate_slugs(all_sessions) and ( len(sys.argv) <= 1 or sys.argv[1] != "--allow-dupes" diff --git a/tests/test_examples_are_up_to_date.py b/tests/test_examples_are_up_to_date.py index 5a5b987..33fa58c 100644 --- a/tests/test_examples_are_up_to_date.py +++ b/tests/test_examples_are_up_to_date.py @@ -1,6 +1,6 @@ import json -from src.transform import PretalxSpeaker, PretalxSubmission +from src.transform import PretalxSession, PretalxSpeaker with open("./data/examples/pretalx/submissions.json") as fd: pretalx_submissions = json.load(fd) @@ -13,7 +13,7 @@ def test_sessions_example(): assert pretalx_submissions[0]["code"] == "A8CD3F" pretalx = pretalx_submissions[0] - transformed = PretalxSubmission.model_validate(pretalx) + transformed = PretalxSession.model_validate(pretalx) with open("./data/examples/output/sessions.json") as fd: sessions = json.load(fd) From 7818471a7afc8cc69cfbd42e0d75464951a967a3 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 2 Jun 2024 19:53:05 +0300 Subject: [PATCH 15/27] correct typing at some places --- data/.gitignore | 3 +++ data/public/.gitignore | 4 ---- data/raw/.gitignore | 1 - src/config.py | 2 +- src/download.py | 5 +++-- src/transform.py | 15 +++++++++------ 6 files changed, 16 insertions(+), 14 deletions(-) create mode 100644 data/.gitignore delete mode 100644 data/public/.gitignore delete mode 100644 data/raw/.gitignore diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..2070759 --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,3 @@ +# JSON files except the ones in examples/ +*.json +!examples/** diff --git a/data/public/.gitignore b/data/public/.gitignore deleted file mode 100644 index 5f55c43..0000000 --- a/data/public/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# In this folder we have public data -# This may in the future actually end up in this repository -# But for now it's a bit too much noise -*.json diff --git a/data/raw/.gitignore b/data/raw/.gitignore deleted file mode 100644 index a6c57f5..0000000 --- a/data/raw/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.json diff --git a/src/config.py b/src/config.py index 3a235a2..1ee8d0b 100644 --- a/src/config.py +++ b/src/config.py @@ -17,5 +17,5 @@ def token(cls) -> str: print("Please prefer .env file to store your token! It's more secure!") return token elif token is None: - raise Exception("Please set your token in .env file!") + raise ValueError("Please set your token in .env file!") return token diff --git a/src/download.py b/src/download.py index 0a8b20f..8027584 100644 --- a/src/download.py +++ b/src/download.py @@ -1,4 +1,5 @@ import json +from typing import Any import requests from tqdm import tqdm @@ -24,8 +25,8 @@ for resource in resources: url = base_url + f"{resource}" - res0 = [] - data = {"next": url} + res0: list[dict[str, Any]] = [] + data: dict[str, Any] = {"next": url} n = 0 pbar = tqdm(desc=f"Downloading {resource}", unit=" page", dynamic_ncols=True) diff --git a/src/transform.py b/src/transform.py index 7f1221d..fd72d0f 100644 --- a/src/transform.py +++ b/src/transform.py @@ -2,6 +2,7 @@ import json import sys +from collections.abc import KeysView from datetime import datetime from pydantic import BaseModel, Field, RootModel, field_validator, model_validator @@ -315,7 +316,7 @@ def initiate(cls, root): publishable.append(sub) for session in publishable: - if session.start: + if session.start and session.end: timings = TimingRelationship.model_validate( dict(session=session, all_sessions=publishable) ) @@ -361,7 +362,7 @@ class PretalxSession(BaseModel): @field_validator("submission_type", "track", mode="before") @classmethod - def handle_localized(cls, v) -> str: + def handle_localized(cls, v) -> str | None: if isinstance(v, dict): return v.get("en") return v @@ -420,15 +421,15 @@ def is_publishable(self) -> bool: return self.is_accepted or self.is_confirmed -def parse_publishable_submissions() -> list[PretalxSession]: +def parse_publishable_submissions() -> dict[str, PretalxSession]: """ Returns only publishable sessions """ with open(Config.raw_path / "submissions_latest.json") as fd: js = json.load(fd) subs = PretalxSubmissions.model_validate(js).root - subs = {s.code: s for s in subs} - return subs + subs_dict = {s.code: s for s in subs} + return subs_dict def parse_speakers() -> list[PretalxSpeaker]: @@ -441,7 +442,9 @@ def parse_speakers() -> list[PretalxSpeaker]: return speakers -def publishable_speakers(accepted_proposals: set[str]) -> dict[str, PretalxSpeaker]: +def publishable_speakers( + accepted_proposals: KeysView[str], +) -> dict[str, PretalxSpeaker]: sp = parse_speakers() output = {} for speaker in sp: From 84d338752b0a623360a1ea7e7657deb01c3297fb Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 2 Jun 2024 23:33:09 +0300 Subject: [PATCH 16/27] better overall structure --- src/transform.py | 372 ++++++++++++++++++++++++++++------------------- 1 file changed, 223 insertions(+), 149 deletions(-) diff --git a/src/transform.py b/src/transform.py index fd72d0f..721d841 100644 --- a/src/transform.py +++ b/src/transform.py @@ -5,7 +5,14 @@ from collections.abc import KeysView from datetime import datetime -from pydantic import BaseModel, Field, RootModel, field_validator, model_validator +from pydantic import ( + BaseModel, + Field, + RootModel, + computed_field, + field_validator, + model_validator, +) from slugify import slugify from src.config import Config @@ -64,34 +71,37 @@ def handle_localized(cls, v): return v -class TimingRelationship(BaseModel): - talks_in_parallel: list[str] - talks_after: list[str] - talks_before: list[str] - next_talk: str | None = None - prev_talk: str | None = None +class TimingRelationship: + # Relationships are stored in a dictionary with the session code as the key + relationships: dict[str, dict[str, list[str] | str | None]] = {} - @model_validator(mode="before") @classmethod - def compute(cls, values): - session = values["session"] - all_sessions = values["all_sessions"] - - talks_in_parallel = cls.compute_talks_in_parallel(session, all_sessions) - talks_after_data = cls.compute_talks_after( - session, all_sessions, talks_in_parallel - ) - talks_before_data = cls.compute_talks_before( - session, all_sessions, talks_in_parallel - ) - - values["talks_in_parallel"] = talks_in_parallel - values["talks_after"] = talks_after_data.get("talks_after") - values["talks_before"] = talks_before_data.get("talks_before") - values["next_talk"] = talks_after_data.get("next_talk") - values["prev_talk"] = talks_before_data.get("prev_talk") + def compute_relationships( + cls, all_sessions: list[PretalxSession] + ) -> dict[str, dict[str, list[str] | str | None]]: + relationships = {} + for session in all_sessions: + talks_in_parallel = cls.compute_talks_in_parallel(session, all_sessions) + talks_after_data = cls.compute_talks_after( + session, all_sessions, talks_in_parallel + ) + talks_before_data = cls.compute_talks_before( + session, all_sessions, talks_in_parallel + ) + + relationships[session.code] = { + "talks_in_parallel": talks_in_parallel, + "talks_after": talks_after_data.get("talks_after"), + "next_talk": talks_after_data.get("next_talk"), + "talks_before": talks_before_data.get("talks_before"), + "prev_talk": talks_before_data.get("prev_talk"), + } + + cls.relationships = relationships - return values + @classmethod + def get_relationships(cls, code: str) -> dict[str, list[str] | str | None]: + return cls.relationships[code] @staticmethod def compute_talks_in_parallel( @@ -230,45 +240,9 @@ class PretalxSpeaker(BaseModel): @model_validator(mode="before") @classmethod def extract(cls, values) -> dict: - # Extract the twitter URL from the answer - def extract_twitter_url(text: str) -> str: - if text.startswith("@"): - twitter_url = f"https://x.com/{text[1:]}" - elif not text.startswith(("https://", "http://", "www.")): - twitter_url = f"https://x.com/{text}" - else: - twitter_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) - - return twitter_url.split("?")[0] - - # If it's like @user@instance, we need to convert it to a URL - def extract_mastodon_url(text: str) -> str: - if not text.startswith(("https://", "http://")) and text.count("@") == 2: - mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" - else: - mastodon_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) - - return mastodon_url.split("?")[0] - - # Extract the linkedin URL from the answer - def extract_linkedin_url(text: str) -> str: - if text.startswith("in/"): - linkedin_url = f"https://linkedin.com/{text}" - elif not text.startswith(("https://", "http://", "www.")): - linkedin_url = f"https://linkedin.com/in/{text}" - else: - linkedin_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) - - return linkedin_url.split("?")[0] - answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] + # Extract the answers for answer in answers: if answer.question_text == SpeakerQuestion.affiliation: values["affiliation"] = answer.answer_text @@ -277,17 +251,17 @@ def extract_linkedin_url(text: str) -> str: values["homepage"] = answer.answer_text if answer.question_text == SpeakerQuestion.twitter: - values["twitter_url"] = extract_twitter_url( + values["twitter_url"] = cls.extract_twitter_url( answer.answer_text.strip().split()[0] ) if answer.question_text == SpeakerQuestion.mastodon: - values["mastodon_url"] = extract_mastodon_url( + values["mastodon_url"] = cls.extract_mastodon_url( answer.answer_text.strip().split()[0] ) if answer.question_text == SpeakerQuestion.linkedin: - values["linkedin_url"] = extract_linkedin_url( + values["linkedin_url"] = cls.extract_linkedin_url( answer.answer_text.strip().split()[0] ) @@ -299,38 +273,52 @@ def extract_linkedin_url(text: str) -> str: return values + # Extract the twitter URL from the answer + @staticmethod + def extract_twitter_url(text: str) -> str: + if text.startswith("@"): + twitter_url = f"https://x.com/{text[1:]}" + elif not text.startswith(("https://", "http://", "www.")): + twitter_url = f"https://x.com/{text}" + else: + twitter_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return twitter_url.split("?")[0] + + # If it's like @user@instance, we need to convert it to a URL + @staticmethod + def extract_mastodon_url(text: str) -> str: + if not text.startswith(("https://", "http://")) and text.count("@") == 2: + mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" + else: + mastodon_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) -class PretalxSubmissions(RootModel): - root: list[PretalxSession] - - @model_validator(mode="before") - @classmethod - def initiate(cls, root): - """ - Returns only the publishable sessions, and computes their timings - """ - publishable = [] - for submission in root: - sub = PretalxSession.model_validate(submission) - if sub.is_publishable: - publishable.append(sub) - - for session in publishable: - if session.start and session.end: - timings = TimingRelationship.model_validate( - dict(session=session, all_sessions=publishable) - ) + return mastodon_url.split("?")[0] - session.talks_in_parallel = timings.talks_in_parallel - session.talks_after = timings.talks_after - session.talks_before = timings.talks_before - session.next_talk = timings.next_talk - session.prev_talk = timings.prev_talk + # Extract the linkedin URL from the answer + @staticmethod + def extract_linkedin_url(text: str) -> str: + if text.startswith("in/"): + linkedin_url = f"https://linkedin.com/{text}" + elif not text.startswith(("https://", "http://", "www.")): + linkedin_url = f"https://linkedin.com/in/{text}" + else: + linkedin_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) - return publishable + return linkedin_url.split("?")[0] class PretalxSession(BaseModel): + """ + Model for only confirmed and accepted sessions + """ + code: str title: str speakers: list[str] # We only want the code, not the full info @@ -347,19 +335,11 @@ class PretalxSession(BaseModel): level: str = "" delivery: str = "" - # This is embedding a slot inside a submission for easier lookup later + # Extracted room: str | None = None start: datetime | None = None end: datetime | None = None - talks_in_parallel: list[str] | None = None - talks_after: list[str] | None = None - talks_before: list[str] | None = None - next_talk: str | None = None - prev_talk: str | None = None - - website_url: str - @field_validator("submission_type", "track", mode="before") @classmethod def handle_localized(cls, v) -> str | None: @@ -374,12 +354,44 @@ def duration_to_string(cls, v) -> str: return str(v) return v + @computed_field + def website_url(self) -> str: + return ( + f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{self.slug}" + ) + + @computed_field + def talks_in_parallel(self) -> list[str] | None: + if self.start and self.end: + return TimingRelationship.get_relationships(self.code)["talks_in_parallel"] + + @computed_field + def talks_after(self) -> list[str] | None: + if self.start and self.end: + return TimingRelationship.get_relationships(self.code)["talks_after"] + + @computed_field + def talks_before(self) -> list[str] | None: + if self.start and self.end: + return TimingRelationship.get_relationships(self.code)["talks_before"] + + @computed_field + def next_talk(self) -> str | None: + if self.start and self.end: + return TimingRelationship.get_relationships(self.code)["next_talk"] + + @computed_field + def prev_talk(self) -> str | None: + if self.start and self.end: + return TimingRelationship.get_relationships(self.code)["prev_talk"] + @model_validator(mode="before") @classmethod def extract(cls, values) -> dict: values["speakers"] = sorted([s["code"] for s in values["speakers"]]) answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] + # Extract the answers for answer in answers: # TODO if we need any other questions if answer.question_text == SubmissionQuestion.tweet: @@ -394,31 +406,100 @@ def extract(cls, values) -> dict: if answer.question_text == SubmissionQuestion.level: values["level"] = answer.answer_text.lower() + # Set slot information if values.get("slot"): slot = PretalxSlot.model_validate(values["slot"]) values["room"] = slot.room values["start"] = slot.start values["end"] = slot.end - slug = slugify(values["title"]) - values["slug"] = slug - values["website_url"] = ( - f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{slug}" - ) + # Set the slug + values["slug"] = slugify(values["title"]) return values - @property - def is_accepted(self) -> bool: - return self.state == SubmissionState.accepted - @property - def is_confirmed(self) -> bool: - return self.state == SubmissionState.confirmed +class PretalxData(RootModel): + root: list[PretalxSession | PretalxSpeaker] + + @staticmethod + def replace_duplicate_slugs(objects: list[PretalxSession | PretalxSpeaker]): + slug_count = {} + seen_slugs = set() + + for obj in objects: + original_slug = obj.slug + + if original_slug in seen_slugs: + if original_slug in slug_count: + slug_count[original_slug] += 1 + else: + slug_count[original_slug] = 1 + obj.slug = f"{original_slug}-{slug_count[original_slug]}" + else: + seen_slugs.add(original_slug) + + +class PretalxSubmissions(PretalxData): + root: list[PretalxSession] + + @model_validator(mode="before") + @classmethod + def initiate_publishable_sessions(cls, root) -> PretalxSubmissions: + """ + Returns only the publishable sessions + """ + sessions = [] + for submission in root: + if cls.is_submission_publishable(submission): + sessions.append(PretalxSession.model_validate(submission)) + + # Sort by start time (early first) for deterministic slug replacement + sessions = sorted(sessions, key=lambda x: (x.start is None, x.start)) + + cls.replace_duplicate_slugs(sessions) + + # Compute the relationships of all sessions + TimingRelationship.compute_relationships( + [s for s in sessions if s.start and s.end] + ) + + return sessions + + @staticmethod + def is_submission_publishable(submission: PretalxSession) -> bool: + return submission.get("state") in ( + SubmissionState.accepted, + SubmissionState.confirmed, + ) + + +class PretalxSpeakers(PretalxData): + root: list[PretalxSpeaker] + + # Overriden to be able to pass the accepted_proposals + @classmethod + def model_validate(cls, root, accepted_proposals: KeysView[str]) -> PretalxSpeakers: + """ + Returns only speakers with publishable sessions + """ + speakers = [] + for speaker in root: + if cls.is_speaker_publishable(speaker, accepted_proposals): + speakers.append(PretalxSpeaker.model_validate(speaker)) + + # Sort by code for deterministic slug replacement + speakers = sorted(speakers, key=lambda x: x.code) - @property - def is_publishable(self) -> bool: - return self.is_accepted or self.is_confirmed + cls.replace_duplicate_slugs(speakers) + + return cls(root=speakers) + + @staticmethod + def is_speaker_publishable( + speaker: PretalxSpeaker, accepted_proposals: KeysView[str] + ) -> bool: + return set(speaker.get("submissions")) & accepted_proposals def parse_publishable_submissions() -> dict[str, PretalxSession]: @@ -432,61 +513,53 @@ def parse_publishable_submissions() -> dict[str, PretalxSession]: return subs_dict -def parse_speakers() -> list[PretalxSpeaker]: +def parse_publishable_speakers( + publishable_sessions: KeysView[str], +) -> dict[str, PretalxSpeaker]: """ Returns only speakers with publishable sessions """ with open(Config.raw_path / "speakers_latest.json") as fd: js = json.load(fd) - speakers = [PretalxSpeaker.model_validate(item) for item in js] - return speakers - - -def publishable_speakers( - accepted_proposals: KeysView[str], -) -> dict[str, PretalxSpeaker]: - sp = parse_speakers() - output = {} - for speaker in sp: - accepted = set(speaker.submissions) & accepted_proposals - if accepted: - # Overwrite with only the accepted proposals - speaker.submissions = list(accepted) - output[speaker.code] = speaker - - return output + speakers = PretalxSpeakers.model_validate( + js, accepted_proposals=publishable_sessions + ).root + speakers_dict = {s.code: s for s in speakers} + return speakers_dict -def save_publishable_sessions(publishable: dict[str, PretalxSession]): +def save_publishable_sessions(sessions: dict[str, PretalxSession]): path = Config.public_path / "sessions.json" - data = {k: json.loads(v.model_dump_json()) for k, v in publishable.items()} + data = {k: json.loads(v.model_dump_json()) for k, v in sessions.items()} with open(path, "w") as fd: json.dump(data, fd, indent=2) -def save_publishable_speakers(publishable: dict[str, PretalxSession]): +def save_publishable_speakers(speakers: dict[str, PretalxSession]): path = Config.public_path / "speakers.json" - speakers = publishable_speakers(publishable.keys()) - data = {k: v.model_dump() for k, v in speakers.items()} with open(path, "w") as fd: json.dump(data, fd, indent=2) -def save_all(all_sessions: dict[str, PretalxSession]): +def save_all( + publishable_sessions: dict[str, PretalxSession], + publishable_speakers: dict[str, PretalxSpeaker], +): Config.public_path.mkdir(parents=True, exist_ok=True) - save_publishable_sessions(all_sessions) - save_publishable_speakers(all_sessions) - + save_publishable_sessions(publishable_sessions) + save_publishable_speakers(publishable_speakers) -def check_duplicate_slugs(all_sessions: dict[str, PretalxSession]) -> bool: - all_speakers = publishable_speakers(all_sessions.keys()) - session_slugs = [s.slug for s in all_sessions.values()] - speaker_slugs = [s.slug for s in all_speakers.values()] +def check_duplicate_slugs( + publishable_sessions: dict[str, PretalxSession], + publishable_speakers: dict[str, PretalxSpeaker], +) -> bool: + session_slugs = [s.slug for s in publishable_sessions.values()] + speaker_slugs = [s.slug for s in publishable_speakers.values()] session_duplicates = [ slug for slug in set(session_slugs) if session_slugs.count(slug) > 1 @@ -509,14 +582,15 @@ def check_duplicate_slugs(all_sessions: dict[str, PretalxSession]) -> bool: print(f"Transforming {Config.event} data...") print("Checking for duplicate slugs...") - all_sessions = parse_publishable_submissions() + publishable_sessions = parse_publishable_submissions() + publishable_speakers = parse_publishable_speakers(publishable_sessions.keys()) - if not check_duplicate_slugs(all_sessions) and ( + if not check_duplicate_slugs(publishable_sessions, publishable_speakers) and ( len(sys.argv) <= 1 or sys.argv[1] != "--allow-dupes" ): print("Exiting. Use ``make transform ALLOW_DUPES=true`` to continue.") sys.exit(1) print("Saving publishable data...") - save_all(all_sessions) + save_all(publishable_sessions, publishable_speakers) print("Done") From 339ba501687ab29a8ad55dc5cb029340423bfdba Mon Sep 17 00:00:00 2001 From: egeakman Date: Mon, 3 Jun 2024 00:41:32 +0300 Subject: [PATCH 17/27] typing --- src/transform.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/transform.py b/src/transform.py index 721d841..60ad34e 100644 --- a/src/transform.py +++ b/src/transform.py @@ -9,6 +9,7 @@ BaseModel, Field, RootModel, + ValidationInfo, computed_field, field_validator, model_validator, @@ -76,9 +77,7 @@ class TimingRelationship: relationships: dict[str, dict[str, list[str] | str | None]] = {} @classmethod - def compute_relationships( - cls, all_sessions: list[PretalxSession] - ) -> dict[str, dict[str, list[str] | str | None]]: + def compute_relationships(cls, all_sessions: list[PretalxSession]) -> None: relationships = {} for session in all_sessions: talks_in_parallel = cls.compute_talks_in_parallel(session, all_sessions) @@ -424,8 +423,8 @@ class PretalxData(RootModel): @staticmethod def replace_duplicate_slugs(objects: list[PretalxSession | PretalxSpeaker]): - slug_count = {} - seen_slugs = set() + slug_count: dict[str, int] = {} + seen_slugs: set[str] = set() for obj in objects: original_slug = obj.slug @@ -445,7 +444,7 @@ class PretalxSubmissions(PretalxData): @model_validator(mode="before") @classmethod - def initiate_publishable_sessions(cls, root) -> PretalxSubmissions: + def initiate_publishable_sessions(cls, root) -> list[PretalxSession]: """ Returns only the publishable sessions """ @@ -467,7 +466,7 @@ def initiate_publishable_sessions(cls, root) -> PretalxSubmissions: return sessions @staticmethod - def is_submission_publishable(submission: PretalxSession) -> bool: + def is_submission_publishable(submission: dict) -> bool: return submission.get("state") in ( SubmissionState.accepted, SubmissionState.confirmed, @@ -478,11 +477,15 @@ class PretalxSpeakers(PretalxData): root: list[PretalxSpeaker] # Overriden to be able to pass the accepted_proposals + @model_validator(mode="before") @classmethod - def model_validate(cls, root, accepted_proposals: KeysView[str]) -> PretalxSpeakers: + def initiate_publishable_speakers( + cls, root, context: ValidationInfo + ) -> list[PretalxSpeaker]: """ Returns only speakers with publishable sessions """ + accepted_proposals = context.context["accepted_proposals"] speakers = [] for speaker in root: if cls.is_speaker_publishable(speaker, accepted_proposals): @@ -493,12 +496,12 @@ def model_validate(cls, root, accepted_proposals: KeysView[str]) -> PretalxSpeak cls.replace_duplicate_slugs(speakers) - return cls(root=speakers) + return speakers @staticmethod def is_speaker_publishable( - speaker: PretalxSpeaker, accepted_proposals: KeysView[str] - ) -> bool: + speaker: dict, accepted_proposals: KeysView[str] + ) -> set[str]: return set(speaker.get("submissions")) & accepted_proposals @@ -522,7 +525,7 @@ def parse_publishable_speakers( with open(Config.raw_path / "speakers_latest.json") as fd: js = json.load(fd) speakers = PretalxSpeakers.model_validate( - js, accepted_proposals=publishable_sessions + js, context={"accepted_proposals": publishable_sessions} ).root speakers_dict = {s.code: s for s in speakers} return speakers_dict @@ -536,7 +539,7 @@ def save_publishable_sessions(sessions: dict[str, PretalxSession]): json.dump(data, fd, indent=2) -def save_publishable_speakers(speakers: dict[str, PretalxSession]): +def save_publishable_speakers(speakers: dict[str, PretalxSpeaker]): path = Config.public_path / "speakers.json" data = {k: v.model_dump() for k, v in speakers.items()} @@ -583,6 +586,8 @@ def check_duplicate_slugs( print("Checking for duplicate slugs...") publishable_sessions = parse_publishable_submissions() + + # Pass only the keys (session codes) publishable_speakers = parse_publishable_speakers(publishable_sessions.keys()) if not check_duplicate_slugs(publishable_sessions, publishable_speakers) and ( From df0ad5f2c0f16ebc8f81f19a44d249a80b9c82d5 Mon Sep 17 00:00:00 2001 From: egeakman Date: Mon, 3 Jun 2024 01:57:14 +0300 Subject: [PATCH 18/27] Add resources to the schema --- README.md | 4 +- data/examples/README.md | 64 +++++++++++++++----------- data/examples/output/sessions.json | 11 +++++ data/examples/pretalx/submissions.json | 11 ++++- src/transform.py | 11 +++-- 5 files changed, 70 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 08ffc41..78f2de8 100644 --- a/README.md +++ b/README.md @@ -36,4 +36,6 @@ The API is served at ``programapi24.europython.eu/2024``. It has two endpoints ( - ``/speakers.json``: Returns the list of confirmed speakers. - ``/sessions.json``: Returns the list of confirmed sessions. -**Note:** See [this page](data/examples/README.md) for the explanations of the fields in the returned JSON files. +## Schema + +See [this page](data/examples/README.md) for the explanations of the fields in the returned JSON files. diff --git a/data/examples/README.md b/data/examples/README.md index 17e6155..45dcd25 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -25,9 +25,21 @@ "duration": "60", "level": "intermediate", "delivery": "in-person", + "resources": [ + { + "resource": "https://example.com/notebook.ipynb", + "description": "Notebook used in the talk" + }, + { + "resource": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "description": "Video of the robot in action" + } + ... + ], "room": "South Hall 2A", "start": "2024-07-10T14:00:00+02:00", "end": "2024-07-10T15:00:00+02:00", + "website_url": "https://ep2024.europython.eu/session/example-talk/", "talks_in_parallel": [ "F7G8H9", ... @@ -41,8 +53,7 @@ ... ], "next_talk": "O6P7Q8", - "prev_talk": "R9S0T1", - "website_url": "https://ep2024.europython.eu/session/example-talk/" + "prev_talk": "R9S0T1" }, } ``` @@ -52,29 +63,30 @@ The fields are as follows: -| Key | Type | Notes | -|---------------------|-----------------------------------|---------------------------------------------------------------| -| `code` | `string` | Unique identifier for the session | -| `title` | `string` | Title of the session | -| `speakers` | `list[string]` | List of codes of the speakers | -| `submission_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | -| `slug` | `string` | URL-friendly version of the title | -| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | -| `state` | `string` | State of the session (e.g. confirmed, canceled, etc.) | -| `abstract` | `string` | Abstract of the session | -| `tweet` | `string` | Tweet-length description of the session | -| `duration` | `string` | Duration of the session in minutes | -| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | -| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | -| `room` | `string` \| `null` | Room where the session will be held | -| `start` | `datetime (ISO format)` \| `null` | Start time of the session | -| `end` | `datetime (ISO format)` \| `null` | End time of the session | -| `talks_in_parallel` | `list[string]` \| `null` | List of codes of sessions happening in parallel | -| `talks_after` | `list[string]` \| `null` | List of codes of sessions happening after this session | -| `talks_before` | `list[string]` \| `null` | List of codes of sessions happening before this session | -| `next_talk` | `string` \| `null` | Code of the next session in the same room | -| `prev_talk` | `string` \| `null` | Code of the previous session in the same room | -| `website_url` | `string` | URL of the session on the conference website | +| Key | Type | Notes | +|---------------------|-------------------------------------------|---------------------------------------------------------------| +| `code` | `string` | Unique identifier for the session | +| `title` | `string` | Title of the session | +| `speakers` | `array[string]` | List of codes of the speakers | +| `submission_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | +| `slug` | `string` | URL-friendly version of the title | +| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | +| `state` | `string` | State of the session (e.g. confirmed, canceled, etc.) | +| `abstract` | `string` | Abstract of the session | +| `tweet` | `string` | Tweet-length description of the session | +| `duration` | `string` | Duration of the session in minutes | +| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | +| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | +| `resources` | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": \, "description": \}` | +| `room` | `string` \| `null` | Room where the session will be held | +| `start` | `string (datetime ISO format)` \| `null` | Start time of the session | +| `end` | `string (datetime ISO format)` \| `null` | End time of the session | +| `website_url` | `string` | URL of the session on the conference website | +| `talks_in_parallel` | `array[string]` \| `null` | List of codes of sessions happening in parallel | +| `talks_after` | `array[string]` \| `null` | List of codes of sessions happening after this session | +| `talks_before` | `array[string]` \| `null` | List of codes of sessions happening before this session | +| `next_talk` | `string` \| `null` | Code of the next session in the same room | +| `prev_talk` | `string` \| `null` | Code of the previous session in the same room |   @@ -118,7 +130,7 @@ The fields are as follows: | `biography` | `string` \| `null` | Biography of the speaker | | `avatar` | `string` | URL of the speaker's avatar | | `slug` | `string` | URL-friendly version of the name | -| `submissions` | `list[string]` | List of codes of the sessions the speaker is speaking at | +| `submissions` | `array[string]` | List of codes of the sessions the speaker is speaking at | | `affiliation` | `string` \| `null` | Affiliation of the speaker | | `homepage` | `string` \| `null` | URL of the speaker's homepage | | `gitx_url` | `string` \| `null` | URL of the speaker's GitHub/GitLab/etc. profile | diff --git a/data/examples/output/sessions.json b/data/examples/output/sessions.json index 457a443..20a3d05 100644 --- a/data/examples/output/sessions.json +++ b/data/examples/output/sessions.json @@ -14,6 +14,16 @@ "duration": "45", "level": "intermediate", "delivery": "in-person", + "resources": [ + { + "resource": "https://example.com/notebook.ipynb", + "description": "Notebook used in the talk" + }, + { + "resource": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "description": "Video of the robot in action" + } + ], "room": null, "start": null, "end": null, @@ -39,6 +49,7 @@ "duration": "30", "level": "beginner", "delivery": "in-person", + "resources": null, "room": null, "start": null, "end": null, diff --git a/data/examples/pretalx/submissions.json b/data/examples/pretalx/submissions.json index de98184..ee24db7 100644 --- a/data/examples/pretalx/submissions.json +++ b/data/examples/pretalx/submissions.json @@ -28,13 +28,22 @@ "abstract": "This is the abstract of the talk, it should be about Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec condimentum viverra ante in dignissim. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec molestie lorem enim, id dignissim mi faucibus a. Suspendisse mollis lobortis mollis. Praesent eu lorem id velit maximus blandit eget at nisl. Quisque fringilla pharetra euismod. Morbi id ante vitae tortor volutpat interdum fermentum id tortor. Vivamus ligula nisl, mattis molestie purus vel, interdum venenatis nulla. Nam suscipit scelerisque ornare. Ut consequat sem vel sapien porta pretium. Nullam non lacinia nulla, a tincidunt dui. Sed consequat nibh in nibh ornare, rhoncus sollicitudin sem lobortis. Etiam molestie est et felis sollicitudin, commodo facilisis mi vehicula. Quisque pharetra consectetur ligula, sit amet tincidunt nibh consectetur fringilla. Suspendisse eu libero sed magna malesuada bibendum sed et enim. Phasellus convallis tortor nec lectus venenatis, id tristique quam finibus.", "description": null, "duration": 45, + "resources": [ + { + "resource": "https://example.com/notebook.ipynb", + "description": "Notebook used in the talk" + }, + { + "resource": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "description": "Video of the robot in action" + } + ], "slot_count": 1, "do_not_record": false, "is_featured": false, "content_locale": "en", "slot": null, "image": null, - "resources": [], "answers": [ { "question": { diff --git a/src/transform.py b/src/transform.py index 60ad34e..c3087dd 100644 --- a/src/transform.py +++ b/src/transform.py @@ -326,13 +326,13 @@ class PretalxSession(BaseModel): track: str | None = None state: str abstract: str - answers: list[PretalxAnswer] = Field(..., exclude=True) - slot: PretalxSlot | None = Field(..., exclude=True) tweet: str = "" duration: str - level: str = "" delivery: str = "" + resources: list[dict[str, str]] | None = None + answers: list[PretalxAnswer] = Field(..., exclude=True) + slot: PretalxSlot | None = Field(..., exclude=True) # Extracted room: str | None = None @@ -353,6 +353,11 @@ def duration_to_string(cls, v) -> str: return str(v) return v + @field_validator("resources", mode="before") + @classmethod + def handle_resources(cls, v) -> list[dict[str, str]] | None: + return v or None + @computed_field def website_url(self) -> str: return ( From f5e635f8a06513791d7e9256d9cee02bc080099c Mon Sep 17 00:00:00 2001 From: Ege Akman Date: Mon, 3 Jun 2024 02:11:29 +0300 Subject: [PATCH 19/27] Update README.md --- data/examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/examples/README.md b/data/examples/README.md index 45dcd25..38eadb3 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -77,7 +77,7 @@ The fields are as follows: | `duration` | `string` | Duration of the session in minutes | | `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | | `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | -| `resources` | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": \, "description": \}` | +| `resources` | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": , "description": }` | | `room` | `string` \| `null` | Room where the session will be held | | `start` | `string (datetime ISO format)` \| `null` | Start time of the session | | `end` | `string (datetime ISO format)` \| `null` | End time of the session | From 66fa79f9d66ab73e61de7a62790a07e8f5dcf921 Mon Sep 17 00:00:00 2001 From: egeakman Date: Mon, 3 Jun 2024 02:32:14 +0300 Subject: [PATCH 20/27] oops missed this one --- src/transform.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transform.py b/src/transform.py index c3087dd..b6bed18 100644 --- a/src/transform.py +++ b/src/transform.py @@ -493,8 +493,11 @@ def initiate_publishable_speakers( accepted_proposals = context.context["accepted_proposals"] speakers = [] for speaker in root: - if cls.is_speaker_publishable(speaker, accepted_proposals): - speakers.append(PretalxSpeaker.model_validate(speaker)) + # Overwrite the submissions with only the publishable ones + if submissions := cls.is_speaker_publishable(speaker, accepted_proposals): + speaker = PretalxSpeaker.model_validate(speaker) + speaker.submissions = list(submissions) + speakers.append(speaker) # Sort by code for deterministic slug replacement speakers = sorted(speakers, key=lambda x: x.code) From ee3f0180f5f04378980e1ea10568413aa662ff0e Mon Sep 17 00:00:00 2001 From: egeakman Date: Mon, 3 Jun 2024 02:52:46 +0300 Subject: [PATCH 21/27] change gitx_url to gitx --- data/examples/README.md | 6 +++--- data/examples/output/speakers.json | 2 +- src/transform.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data/examples/README.md b/data/examples/README.md index 38eadb3..d6837ab 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -109,7 +109,7 @@ The fields are as follows: ], "affiliation": "A Company", "homepage": "https://example.com", - "gitx_url": "https://github.com/B4D5E6", + "gitx": "https://github.com/B4D5E6", "linkedin_url": "https://www.linkedin.com/in/B4D5E6", "mastodon_url": "https://mastodon.social/@B4D5E6", "twitter_url": "https://x.com/B4D5E6" @@ -132,8 +132,8 @@ The fields are as follows: | `slug` | `string` | URL-friendly version of the name | | `submissions` | `array[string]` | List of codes of the sessions the speaker is speaking at | | `affiliation` | `string` \| `null` | Affiliation of the speaker | -| `homepage` | `string` \| `null` | URL of the speaker's homepage | -| `gitx_url` | `string` \| `null` | URL of the speaker's GitHub/GitLab/etc. profile | +| `homepage` | `string` \| `null` | URL/text of the speaker's homepage | +| `gitx` | `string` \| `null` | URL/text of the speaker's GitHub/GitLab/etc. profile | | `linkedin_url` | `string` \| `null` | URL of the speaker's LinkedIn profile | | `twitter_url` | `string` \| `null` | URL of the speaker's Twitter profile | | `mastodon_url` | `string` \| `null` | URL of the speaker's Mastodon profile | diff --git a/data/examples/output/speakers.json b/data/examples/output/speakers.json index 66925e6..885f3d6 100644 --- a/data/examples/output/speakers.json +++ b/data/examples/output/speakers.json @@ -8,7 +8,7 @@ "submissions": ["A8CD3F"], "affiliation": "A Company", "homepage": null, - "gitx_url": "https://github.com/F3DC8A", + "gitx": "https://github.com/F3DC8A", "linkedin_url": "https://www.linkedin.com/in/F3DC8A", "mastodon_url": null, "twitter_url": null diff --git a/src/transform.py b/src/transform.py index b6bed18..fa5fbef 100644 --- a/src/transform.py +++ b/src/transform.py @@ -234,7 +234,7 @@ class PretalxSpeaker(BaseModel): twitter_url: str | None = None mastodon_url: str | None = None linkedin_url: str | None = None - gitx_url: str | None = None + gitx: str | None = None @model_validator(mode="before") @classmethod @@ -265,7 +265,7 @@ def extract(cls, values) -> dict: ) if answer.question_text == SpeakerQuestion.gitx: - values["gitx_url"] = answer.answer_text.strip().split()[0] + values["gitx"] = answer.answer_text.strip().split()[0] # Set the slug values["slug"] = slugify(values["name"]) From 96eb6142d91addb0a2e2e70d7f248469a31be07c Mon Sep 17 00:00:00 2001 From: NMertsch Date: Mon, 3 Jun 2024 19:41:22 +0200 Subject: [PATCH 22/27] Add tests for mastodon and linkedin url extraction --- tests/test_social_media_extraction.py | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/test_social_media_extraction.py diff --git a/tests/test_social_media_extraction.py b/tests/test_social_media_extraction.py new file mode 100644 index 0000000..4fc0404 --- /dev/null +++ b/tests/test_social_media_extraction.py @@ -0,0 +1,30 @@ +import pytest + +from src.transform import PretalxSpeaker + + +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("http://mastodon.social/@username", "https://mastodon.social/@username"), + ("https://mastodon.social/@username", "https://mastodon.social/@username"), + ("https://mastodon.social/@username?something=true", "https://mastodon.social/@username"), + ("@username@mastodon.social", "https://mastodon.social/@username"), + ] +) +def test_extract_mastodon_url(input_string: str, result: str) -> None: + assert PretalxSpeaker.extract_mastodon_url(input_string) == result + + +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("username", "https://linkedin.com/in/username"), + ("in/username", "https://linkedin.com/in/username"), + ("www.linkedin.com/in/username", "https://www.linkedin.com/in/username"), + ("http://linkedin.com/in/username", "https://linkedin.com/in/username"), + ("https://linkedin.com/in/username", "https://linkedin.com/in/username"), + ] +) +def test_extract_linked_url(input_string: str, result: str) -> None: + assert PretalxSpeaker.extract_linkedin_url(input_string) == result From 1dec5c812ca088c4c339608adb26118571b5b0f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:44:08 +0000 Subject: [PATCH 23/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_social_media_extraction.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_social_media_extraction.py b/tests/test_social_media_extraction.py index 4fc0404..6c2cd5c 100644 --- a/tests/test_social_media_extraction.py +++ b/tests/test_social_media_extraction.py @@ -8,9 +8,12 @@ [ ("http://mastodon.social/@username", "https://mastodon.social/@username"), ("https://mastodon.social/@username", "https://mastodon.social/@username"), - ("https://mastodon.social/@username?something=true", "https://mastodon.social/@username"), + ( + "https://mastodon.social/@username?something=true", + "https://mastodon.social/@username", + ), ("@username@mastodon.social", "https://mastodon.social/@username"), - ] + ], ) def test_extract_mastodon_url(input_string: str, result: str) -> None: assert PretalxSpeaker.extract_mastodon_url(input_string) == result @@ -24,7 +27,7 @@ def test_extract_mastodon_url(input_string: str, result: str) -> None: ("www.linkedin.com/in/username", "https://www.linkedin.com/in/username"), ("http://linkedin.com/in/username", "https://linkedin.com/in/username"), ("https://linkedin.com/in/username", "https://linkedin.com/in/username"), - ] + ], ) def test_extract_linked_url(input_string: str, result: str) -> None: assert PretalxSpeaker.extract_linkedin_url(input_string) == result From ce1de63f733a84ba5e5fc3d24af13d08bd0bdc4b Mon Sep 17 00:00:00 2001 From: egeakman Date: Tue, 4 Jun 2024 15:28:11 +0300 Subject: [PATCH 24/27] better code structure --- Makefile | 4 +- README.md | 2 +- data/examples/README.md | 59 +- .../{output => europython}/sessions.json | 28 +- .../{output => europython}/speakers.json | 0 data/examples/pretalx/submissions.json | 15 +- src/transform.py | 636 +++++++++++------- tests/test_examples_are_up_to_date.py | 33 - tests/test_social_media_extraction.py | 8 +- tests/test_transform_end_to_end.py | 38 ++ 10 files changed, 481 insertions(+), 342 deletions(-) rename data/examples/{output => europython}/sessions.json (82%) rename data/examples/{output => europython}/speakers.json (100%) delete mode 100644 tests/test_examples_are_up_to_date.py create mode 100644 tests/test_transform_end_to_end.py diff --git a/Makefile b/Makefile index 05bc266..4cd2511 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,8 @@ download: python -m src.download transform: -ifeq ($(ALLOW_DUPES), true) - python -m src.transform --allow-dupes +ifeq ($(WARN_DUPES), true) + python -m src.transform --warn-dupes else python -m src.transform endif diff --git a/README.md b/README.md index 78f2de8..9cd7e0f 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ You can change the event in the [``config.py``](src/config.py) file. It is set t ## API -The API is served at ``programapi24.europython.eu/2024``. It has two endpoints (for now): +The API is served at ``https://programapi24.europython.eu/2024``. It has two endpoints (for now): - ``/speakers.json``: Returns the list of confirmed speakers. - ``/sessions.json``: Returns the list of confirmed sessions. diff --git a/data/examples/README.md b/data/examples/README.md index d6837ab..b0d440c 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -16,7 +16,7 @@ "B4D5E6", ... ], - "submission_type": "Talk", + "session_type": "Talk", "slug": "example-talk", "track": "Some Track", "state": "confirmed", @@ -40,20 +40,20 @@ "start": "2024-07-10T14:00:00+02:00", "end": "2024-07-10T15:00:00+02:00", "website_url": "https://ep2024.europython.eu/session/example-talk/", - "talks_in_parallel": [ + "sessions_in_parallel": [ "F7G8H9", ... ], - "talks_after": [ + "sessions_after": [ "I0J1K2", ... ], - "talks_before": [ + "sessions_before": [ "L3M4N5", ... ], - "next_talk": "O6P7Q8", - "prev_talk": "R9S0T1" + "next_session": "O6P7Q8", + "prev_session": "R9S0T1" }, } ``` @@ -63,30 +63,29 @@ The fields are as follows: -| Key | Type | Notes | -|---------------------|-------------------------------------------|---------------------------------------------------------------| -| `code` | `string` | Unique identifier for the session | -| `title` | `string` | Title of the session | -| `speakers` | `array[string]` | List of codes of the speakers | -| `submission_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | -| `slug` | `string` | URL-friendly version of the title | -| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | -| `state` | `string` | State of the session (e.g. confirmed, canceled, etc.) | -| `abstract` | `string` | Abstract of the session | -| `tweet` | `string` | Tweet-length description of the session | -| `duration` | `string` | Duration of the session in minutes | -| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | -| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | -| `resources` | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": , "description": }` | -| `room` | `string` \| `null` | Room where the session will be held | -| `start` | `string (datetime ISO format)` \| `null` | Start time of the session | -| `end` | `string (datetime ISO format)` \| `null` | End time of the session | -| `website_url` | `string` | URL of the session on the conference website | -| `talks_in_parallel` | `array[string]` \| `null` | List of codes of sessions happening in parallel | -| `talks_after` | `array[string]` \| `null` | List of codes of sessions happening after this session | -| `talks_before` | `array[string]` \| `null` | List of codes of sessions happening before this session | -| `next_talk` | `string` \| `null` | Code of the next session in the same room | -| `prev_talk` | `string` \| `null` | Code of the previous session in the same room | +| Key | Type | Notes | +|------------------------|-------------------------------------------|---------------------------------------------------------------| +| `code` | `string` | Unique identifier for the session | +| `title` | `string` | Title of the session | +| `speakers` | `array[string]` | List of codes of the speakers | +| `session_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | +| `slug` | `string` | URL-friendly version of the title | +| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | +| `abstract` | `string` | Abstract of the session | +| `tweet` | `string` | Tweet-length description of the session | +| `duration` | `string` | Duration of the session in minutes | +| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | +| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | +| `resources` | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": , "description": }` | +| `room` | `string` \| `null` | Room where the session will be held | +| `start` | `string (datetime ISO format)` \| `null` | Start time of the session | +| `end` | `string (datetime ISO format)` \| `null` | End time of the session | +| `website_url` | `string` | URL of the session on the conference website | +| `sessions_in_parallel` | `array[string]` \| `null` | List of codes of sessions happening in parallel | +| `sessions_after` | `array[string]` \| `null` | List of codes of sessions happening after this session | +| `sessions_before` | `array[string]` \| `null` | List of codes of sessions happening before this session | +| `next_session` | `string` \| `null` | Code of the next session in the same room | +| `prev_session` | `string` \| `null` | Code of the previous session in the same room |   diff --git a/data/examples/output/sessions.json b/data/examples/europython/sessions.json similarity index 82% rename from data/examples/output/sessions.json rename to data/examples/europython/sessions.json index 20a3d05..530655a 100644 --- a/data/examples/output/sessions.json +++ b/data/examples/europython/sessions.json @@ -5,10 +5,9 @@ "speakers": [ "F3DC8A", "ZXCVBN" ], - "submission_type": "Talk (long session)", + "session_type": "Talk (long session)", "slug": "this-is-a-test-talk-from-a-test-speaker-about-a-test-topic", "track": "Software Engineering & Architecture", - "state": "confirmed", "abstract": "This is the abstract of the talk, it should be about Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec condimentum viverra ante in dignissim. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec molestie lorem enim, id dignissim mi faucibus a. Suspendisse mollis lobortis mollis. Praesent eu lorem id velit maximus blandit eget at nisl. Quisque fringilla pharetra euismod. Morbi id ante vitae tortor volutpat interdum fermentum id tortor. Vivamus ligula nisl, mattis molestie purus vel, interdum venenatis nulla. Nam suscipit scelerisque ornare. Ut consequat sem vel sapien porta pretium. Nullam non lacinia nulla, a tincidunt dui. Sed consequat nibh in nibh ornare, rhoncus sollicitudin sem lobortis. Etiam molestie est et felis sollicitudin, commodo facilisis mi vehicula. Quisque pharetra consectetur ligula, sit amet tincidunt nibh consectetur fringilla. Suspendisse eu libero sed magna malesuada bibendum sed et enim. Phasellus convallis tortor nec lectus venenatis, id tristique quam finibus.", "tweet": "This is a short version of this talk, as a tweet.", "duration": "45", @@ -27,11 +26,11 @@ "room": null, "start": null, "end": null, - "talks_in_parallel": null, - "talks_after": null, - "talks_before": null, - "next_talk": null, - "prev_talk": null, + "sessions_in_parallel": null, + "sessions_after": null, + "sessions_before": null, + "next_session": null, + "prev_session": null, "website_url": "https://ep2024.europython.eu/session/this-is-a-test-talk-from-a-test-speaker-about-a-test-topic" }, "B8CD4F": { @@ -40,11 +39,10 @@ "speakers": [ "G3DC8A" ], - "submission_type": "Talk", + "session_type": "Talk", "slug": "a-talk-with-shorter-title", "track": "PyData: LLMs", - "state": "confirmed", - "abstract": "This is the abstract of the shoerter talk, it should be about Lorem ipsum dolor sit amet", + "abstract": "This is the abstract of the shorter talk, it should be about Lorem ipsum dolor sit amet", "tweet": "Hey, short tweet", "duration": "30", "level": "beginner", @@ -53,11 +51,11 @@ "room": null, "start": null, "end": null, - "talks_in_parallel": null, - "talks_after": null, - "talks_before": null, - "next_talk": null, - "prev_talk": null, + "sessions_in_parallel": null, + "sessions_after": null, + "sessions_before": null, + "next_session": null, + "prev_session": null, "website_url": "https://ep2024.europython.eu/session/a-talk-with-shorter-title" } } diff --git a/data/examples/output/speakers.json b/data/examples/europython/speakers.json similarity index 100% rename from data/examples/output/speakers.json rename to data/examples/europython/speakers.json diff --git a/data/examples/pretalx/submissions.json b/data/examples/pretalx/submissions.json index ee24db7..1da2cf7 100644 --- a/data/examples/pretalx/submissions.json +++ b/data/examples/pretalx/submissions.json @@ -141,7 +141,7 @@ }, "track_id": 4493, "state": "confirmed", - "abstract": "This is the abstract of the talk, it should be about Lorem ipsum dolor sit amet", + "abstract": "This is the abstract of the shorter talk, it should be about Lorem ipsum dolor sit amet", "description": null, "duration": 30, "slot_count": 1, @@ -166,6 +166,19 @@ "person": null, "options": [] }, + { + "question": { + "id": 3412, + "question": { + "en": "Abstract as a tweet / toot" + } + }, + "answer": "Hey, short tweet", + "answer_file": null, + "submission": "B8CD4F", + "review": null, + "person": null + }, { "question": { "id": 3412, diff --git a/src/transform.py b/src/transform.py index fa5fbef..17b3746 100644 --- a/src/transform.py +++ b/src/transform.py @@ -4,16 +4,9 @@ import sys from collections.abc import KeysView from datetime import datetime +from enum import Enum -from pydantic import ( - BaseModel, - Field, - RootModel, - ValidationInfo, - computed_field, - field_validator, - model_validator, -) +from pydantic import BaseModel, Field, computed_field, field_validator, model_validator from slugify import slugify from src.config import Config @@ -35,10 +28,13 @@ class SubmissionQuestion: level = "Expected audience expertise" -class SubmissionState: +class SubmissionState(Enum): accepted = "accepted" confirmed = "confirmed" withdrawn = "withdrawn" + rejected = "rejected" + canceled = "canceled" + submitted = "submitted" class PretalxAnswer(BaseModel): @@ -72,41 +68,78 @@ def handle_localized(cls, v): return v -class TimingRelationship: - # Relationships are stored in a dictionary with the session code as the key - relationships: dict[str, dict[str, list[str] | str | None]] = {} +class TimingRelationships: + all_sessions_in_parallel: dict[str, list[str]] = {} + all_sessions_after: dict[str, list[str]] = {} + all_sessions_before: dict[str, list[str]] = {} + all_next_session: dict[str, str | None] = {} + all_prev_session: dict[str, str | None] = {} @classmethod - def compute_relationships(cls, all_sessions: list[PretalxSession]) -> None: - relationships = {} + def compute(cls, all_sessions: list[PretalxSubmission]) -> None: for session in all_sessions: - talks_in_parallel = cls.compute_talks_in_parallel(session, all_sessions) - talks_after_data = cls.compute_talks_after( - session, all_sessions, talks_in_parallel + if not session.start or not session.end: + continue + + sessions_in_parallel = cls.compute_sessions_in_parallel( + session, all_sessions + ) + sessions_after_data = cls.compute_sessions_after( + session, all_sessions, sessions_in_parallel ) - talks_before_data = cls.compute_talks_before( - session, all_sessions, talks_in_parallel + sessions_before_data = cls.compute_sessions_before( + session, all_sessions, sessions_in_parallel ) - relationships[session.code] = { - "talks_in_parallel": talks_in_parallel, - "talks_after": talks_after_data.get("talks_after"), - "next_talk": talks_after_data.get("next_talk"), - "talks_before": talks_before_data.get("talks_before"), - "prev_talk": talks_before_data.get("prev_talk"), - } + cls.all_sessions_in_parallel[session.code] = sessions_in_parallel + cls.all_sessions_after[session.code] = sessions_after_data.get( + "sessions_after" + ) + cls.all_sessions_before[session.code] = sessions_before_data.get( + "sessions_before" + ) + cls.all_next_session[session.code] = sessions_after_data.get("next_session") + cls.all_prev_session[session.code] = sessions_before_data.get( + "prev_session" + ) - cls.relationships = relationships + @classmethod + def get_sessions_in_parallel( + cls, session_code: str | None = None + ) -> list[str] | None: + if session_code: + return cls.all_sessions_in_parallel.get(session_code) + return cls.all_sessions_in_parallel @classmethod - def get_relationships(cls, code: str) -> dict[str, list[str] | str | None]: - return cls.relationships[code] + def get_sessions_after(cls, session_code: str | None = None) -> list[str] | None: + if session_code: + return cls.all_sessions_after.get(session_code) + return cls.all_sessions_after + + @classmethod + def get_sessions_before(cls, session_code: str | None = None) -> list[str] | None: + if session_code: + return cls.all_sessions_before.get(session_code) + return cls.all_sessions_before + + @classmethod + def get_next_session(cls, session_code: str | None = None) -> str | None: + if session_code: + return cls.all_next_session.get(session_code) + return cls.all_next_session + + @classmethod + def get_prev_session(cls, session_code: str | None = None) -> str | None: + if session_code: + return cls.all_prev_session.get(session_code) + return cls.all_prev_session @staticmethod - def compute_talks_in_parallel( - session: PretalxSession, all_sessions: list[PretalxSession] + def compute_sessions_in_parallel( + session: PretalxSubmission, all_sessions: list[PretalxSubmission] ) -> list[str]: - talks_parallel = [] + sessions_parallel = [] for other_session in all_sessions: if ( other_session.code == session.code @@ -117,15 +150,15 @@ def compute_talks_in_parallel( # If they intersect, they are in parallel if other_session.start < session.end and other_session.end > session.start: - talks_parallel.append(other_session.code) + sessions_parallel.append(other_session.code) - return talks_parallel + return sessions_parallel @staticmethod - def compute_talks_after( - session: PretalxSession, - all_sessions: list[PretalxSession], - talks_in_parallel: list[str] = [], + def compute_sessions_after( + session: PretalxSubmission, + all_sessions: list[PretalxSubmission], + sessions_in_parallel: list[str], ) -> dict[str, list[str] | str | None]: # Sort sessions based on start time, early first all_sessions_sorted = sorted( @@ -138,7 +171,7 @@ def compute_talks_after( for other_session in all_sessions_sorted if other_session.start is not None and other_session.start >= session.end - and other_session.code not in talks_in_parallel + and other_session.code not in sessions_in_parallel and other_session.code != session.code and other_session.start.day == session.start.day and not other_session.submission_type @@ -148,7 +181,7 @@ def compute_talks_after( # Add sessions to the list if they are in different rooms seen_rooms = set() - unique_sessions = [] + unique_sessions: list[PretalxSubmission] = [] for other_session in remaining_sessions: if other_session.room not in seen_rooms: @@ -161,26 +194,26 @@ def compute_talks_after( s for s in unique_sessions if s.submission_type == "Keynote" ] - # Set the next talks in all rooms - talks_after = [s.code for s in unique_sessions] + # Set the next sessions in all rooms + sessions_after = [s.code for s in unique_sessions] - # Set the next talk in the same room, or a keynote - next_talk = None + # Set the next session in the same room, or a keynote + next_session = None for other_session in unique_sessions: if ( other_session.room == session.room or other_session.submission_type == "Keynote" ): - next_talk = other_session.code + next_session = other_session.code break - return {"talks_after": talks_after, "next_talk": next_talk} + return {"sessions_after": sessions_after, "next_session": next_session} @staticmethod - def compute_talks_before( - session: PretalxSession, - all_sessions: list[PretalxSession], - talks_in_parallel: list[str] = [], + def compute_sessions_before( + session: PretalxSubmission, + all_sessions: list[PretalxSubmission], + sessions_in_parallel: list[str], ) -> dict[str, list[str] | str | None]: # Sort sessions based on start time, late first all_sessions_sorted = sorted( @@ -193,7 +226,7 @@ def compute_talks_before( other_session for other_session in all_sessions_sorted if other_session.start is not None - and other_session.code not in talks_in_parallel + and other_session.code not in sessions_in_parallel and other_session.start <= session.start and other_session.code != session.code and other_session.start.day == session.start.day @@ -208,18 +241,35 @@ def compute_talks_before( unique_sessions.append(other_session) seen_rooms.add(other_session.room) - talks_before = [session.code for session in unique_sessions] + sessions_before = [session.code for session in unique_sessions] - prev_talk = None + prev_session = None for other_session in unique_sessions: if other_session.room == session.room: - prev_talk = other_session.code + prev_session = other_session.code break - return {"talks_before": talks_before, "prev_talk": prev_talk} + return {"sessions_before": sessions_before, "prev_session": prev_session} class PretalxSpeaker(BaseModel): + """ + Model for Pretalx speaker data + """ + + code: str + name: str + biography: str | None = None + avatar: str + submissions: list[str] + answers: list[PretalxAnswer] + + +class EuroPythonSpeaker(BaseModel): + """ + Model for EuroPython speaker data, transformed from Pretalx data + """ + code: str name: str biography: str | None = None @@ -238,10 +288,9 @@ class PretalxSpeaker(BaseModel): @model_validator(mode="before") @classmethod - def extract(cls, values) -> dict: + def extract_answers(cls, values) -> dict: answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] - # Extract the answers for answer in answers: if answer.question_text == SpeakerQuestion.affiliation: values["affiliation"] = answer.answer_text @@ -267,14 +316,13 @@ def extract(cls, values) -> dict: if answer.question_text == SpeakerQuestion.gitx: values["gitx"] = answer.answer_text.strip().split()[0] - # Set the slug - values["slug"] = slugify(values["name"]) - return values - # Extract the twitter URL from the answer @staticmethod def extract_twitter_url(text: str) -> str: + """ + Extract the Twitter URL from the answer + """ if text.startswith("@"): twitter_url = f"https://x.com/{text[1:]}" elif not text.startswith(("https://", "http://", "www.")): @@ -286,9 +334,11 @@ def extract_twitter_url(text: str) -> str: return twitter_url.split("?")[0] - # If it's like @user@instance, we need to convert it to a URL @staticmethod def extract_mastodon_url(text: str) -> str: + """ + Extract the Mastodon URL from the answer, handle @username@instance format + """ if not text.startswith(("https://", "http://")) and text.count("@") == 2: mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" else: @@ -298,9 +348,11 @@ def extract_mastodon_url(text: str) -> str: return mastodon_url.split("?")[0] - # Extract the linkedin URL from the answer @staticmethod def extract_linkedin_url(text: str) -> str: + """ + Extract the LinkedIn URL from the answer + """ if text.startswith("in/"): linkedin_url = f"https://linkedin.com/{text}" elif not text.startswith(("https://", "http://", "www.")): @@ -313,28 +365,24 @@ def extract_linkedin_url(text: str) -> str: return linkedin_url.split("?")[0] -class PretalxSession(BaseModel): +class PretalxSubmission(BaseModel): """ - Model for only confirmed and accepted sessions + Model for Pretalx submission data """ code: str title: str speakers: list[str] # We only want the code, not the full info submission_type: str - slug: str track: str | None = None - state: str - abstract: str - tweet: str = "" - duration: str - level: str = "" - delivery: str = "" + state: SubmissionState + abstract: str = "" + duration: str = "" resources: list[dict[str, str]] | None = None - answers: list[PretalxAnswer] = Field(..., exclude=True) + answers: list[PretalxAnswer] slot: PretalxSlot | None = Field(..., exclude=True) - # Extracted + # Extracted from slot data room: str | None = None start: datetime | None = None end: datetime | None = None @@ -358,44 +406,63 @@ def duration_to_string(cls, v) -> str: def handle_resources(cls, v) -> list[dict[str, str]] | None: return v or None - @computed_field - def website_url(self) -> str: - return ( - f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{self.slug}" - ) + @model_validator(mode="before") + @classmethod + def process_values(cls, values) -> dict: + values["speakers"] = sorted([s["code"] for s in values["speakers"]]) - @computed_field - def talks_in_parallel(self) -> list[str] | None: - if self.start and self.end: - return TimingRelationship.get_relationships(self.code)["talks_in_parallel"] + # Set slot information + if values.get("slot"): + slot = PretalxSlot.model_validate(values["slot"]) + values["room"] = slot.room + values["start"] = slot.start + values["end"] = slot.end - @computed_field - def talks_after(self) -> list[str] | None: - if self.start and self.end: - return TimingRelationship.get_relationships(self.code)["talks_after"] + return values - @computed_field - def talks_before(self) -> list[str] | None: - if self.start and self.end: - return TimingRelationship.get_relationships(self.code)["talks_before"] + @property + def is_publishable(self) -> bool: + return self.state in (SubmissionState.accepted, SubmissionState.confirmed) - @computed_field - def next_talk(self) -> str | None: - if self.start and self.end: - return TimingRelationship.get_relationships(self.code)["next_talk"] + +class EuroPythonSession(BaseModel): + """ + Model for EuroPython session data, transformed from Pretalx data + """ + + code: str + title: str + speakers: list[str] + session_type: str + slug: str + track: str | None = None + abstract: str = "" + tweet: str = "" + duration: str = "" + level: str = "" + delivery: str = "" + resources: list[dict[str, str]] | None = None + room: str | None = None + start: datetime | None = None + end: datetime | None = None + answers: list[PretalxAnswer] = Field(..., exclude=True) + sessions_in_parallel: list[str] | None = None + sessions_after: list[str] | None = None + sessions_before: list[str] | None = None + next_session: str | None = None + prev_session: str | None = None @computed_field - def prev_talk(self) -> str | None: - if self.start and self.end: - return TimingRelationship.get_relationships(self.code)["prev_talk"] + def website_url(self) -> str: + return ( + f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{self.slug}" + ) @model_validator(mode="before") @classmethod - def extract(cls, values) -> dict: - values["speakers"] = sorted([s["code"] for s in values["speakers"]]) + def extract_answers(cls, values) -> dict: answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] - # Extract the answers for answer in answers: # TODO if we need any other questions if answer.question_text == SubmissionQuestion.tweet: @@ -410,200 +477,257 @@ def extract(cls, values) -> dict: if answer.question_text == SubmissionQuestion.level: values["level"] = answer.answer_text.lower() - # Set slot information - if values.get("slot"): - slot = PretalxSlot.model_validate(values["slot"]) - values["room"] = slot.room - values["start"] = slot.start - values["end"] = slot.end + return values - # Set the slug - values["slug"] = slugify(values["title"]) - return values +class Parse: + @staticmethod + def publishable_submissions(input_file: str) -> dict[str, PretalxSubmission]: + """ + Returns only publishable submissions + """ + with open(input_file) as fd: + js = json.load(fd) + all_submissions = [PretalxSubmission.model_validate(s) for s in js] + publishable_submissions = [s for s in all_submissions if s.is_publishable] + publishable_submissions_by_code = { + s.code: s for s in publishable_submissions + } + return publishable_submissions_by_code + + @staticmethod + def publishable_speakers( + input_file: str, + publishable_sessions_keys: KeysView[str], + ) -> dict[str, PretalxSpeaker]: + """ + Returns only speakers with publishable sessions + """ + with open(input_file) as fd: + js = json.load(fd) + all_speakers = [PretalxSpeaker.model_validate(s) for s in js] + speakers_with_publishable_sessions = [ + s + for s in all_speakers + if Utils.publishable_sessions_of_speaker(s, publishable_sessions_keys) + ] + publishable_speakers_by_code = { + s.code: s for s in speakers_with_publishable_sessions + } + + return publishable_speakers_by_code -class PretalxData(RootModel): - root: list[PretalxSession | PretalxSpeaker] +class Utils: @staticmethod - def replace_duplicate_slugs(objects: list[PretalxSession | PretalxSpeaker]): + def publishable_sessions_of_speaker( + speaker: PretalxSpeaker, accepted_proposals: KeysView[str] + ) -> set[str]: + return set(speaker.submissions) & accepted_proposals + + @staticmethod + def find_duplicate_attributes(objects, attributes): + """ + Find duplicates in the given objects based on the given attributes + + Returns: dict[attribute_value, list[object_code]] + """ + duplicates = {} + for obj in objects.values(): + for attribute in attributes: + value = getattr(obj, attribute) + if value in duplicates: + duplicates[value].append(obj.code) + else: + duplicates[value] = [obj.code] + + return duplicates + + @staticmethod + def replace_duplicate_slugs(code_to_slug: dict[str, str]) -> dict[str, str]: slug_count: dict[str, int] = {} seen_slugs: set[str] = set() - for obj in objects: - original_slug = obj.slug + for code, slug in code_to_slug.items(): + original_slug = slug if original_slug in seen_slugs: if original_slug in slug_count: slug_count[original_slug] += 1 else: slug_count[original_slug] = 1 - obj.slug = f"{original_slug}-{slug_count[original_slug]}" + code_to_slug[code] = f"{original_slug}-{slug_count[original_slug]}" else: seen_slugs.add(original_slug) + return code_to_slug -class PretalxSubmissions(PretalxData): - root: list[PretalxSession] - - @model_validator(mode="before") - @classmethod - def initiate_publishable_sessions(cls, root) -> list[PretalxSession]: + @staticmethod + def warn_duplicates( + session_attributes_to_check: list[str], + speaker_attributes_to_check: list[str], + sessions_to_check: dict[str, EuroPythonSession], + speakers_to_check: dict[str, PretalxSpeaker], + ) -> None: """ - Returns only the publishable sessions + Warns about duplicate attributes in the given objects """ - sessions = [] - for submission in root: - if cls.is_submission_publishable(submission): - sessions.append(PretalxSession.model_validate(submission)) - - # Sort by start time (early first) for deterministic slug replacement - sessions = sorted(sessions, key=lambda x: (x.start is None, x.start)) - - cls.replace_duplicate_slugs(sessions) - - # Compute the relationships of all sessions - TimingRelationship.compute_relationships( - [s for s in sessions if s.start and s.end] + print( + f"Checking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions..." + ) + duplicate_sessions = Utils.find_duplicate_attributes( + sessions_to_check, session_attributes_to_check ) - return sessions + for attribute, codes in duplicate_sessions.items(): + if len(codes) > 1: + print(f"Duplicate ``{attribute}`` in sessions: {codes}") - @staticmethod - def is_submission_publishable(submission: dict) -> bool: - return submission.get("state") in ( - SubmissionState.accepted, - SubmissionState.confirmed, + print( + f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers..." + ) + duplicate_speakers = Utils.find_duplicate_attributes( + speakers_to_check, speaker_attributes_to_check ) + for attribute, codes in duplicate_speakers.items(): + if len(codes) > 1: + print(f"Duplicate ``{attribute}`` in speakers: {codes}") -class PretalxSpeakers(PretalxData): - root: list[PretalxSpeaker] - - # Overriden to be able to pass the accepted_proposals - @model_validator(mode="before") - @classmethod - def initiate_publishable_speakers( - cls, root, context: ValidationInfo - ) -> list[PretalxSpeaker]: - """ - Returns only speakers with publishable sessions + @staticmethod + def compute_unique_slugs_by_attribute( + objects: dict[str, BaseModel], attribute: str + ): """ - accepted_proposals = context.context["accepted_proposals"] - speakers = [] - for speaker in root: - # Overwrite the submissions with only the publishable ones - if submissions := cls.is_speaker_publishable(speaker, accepted_proposals): - speaker = PretalxSpeaker.model_validate(speaker) - speaker.submissions = list(submissions) - speakers.append(speaker) - - # Sort by code for deterministic slug replacement - speakers = sorted(speakers, key=lambda x: x.code) + Compute the slugs based on the given attribute + and replace the duplicate slugs with incrementing + numbers at the end. - cls.replace_duplicate_slugs(speakers) + Returns: dict[code, slug] + """ + object_code_to_slug = {} + for obj in objects.values(): + object_code_to_slug[obj.code] = slugify(getattr(obj, attribute)) - return speakers + return Utils.replace_duplicate_slugs(object_code_to_slug) @staticmethod - def is_speaker_publishable( - speaker: dict, accepted_proposals: KeysView[str] - ) -> set[str]: - return set(speaker.get("submissions")) & accepted_proposals - - -def parse_publishable_submissions() -> dict[str, PretalxSession]: - """ - Returns only publishable sessions - """ - with open(Config.raw_path / "submissions_latest.json") as fd: - js = json.load(fd) - subs = PretalxSubmissions.model_validate(js).root - subs_dict = {s.code: s for s in subs} - return subs_dict - - -def parse_publishable_speakers( - publishable_sessions: KeysView[str], -) -> dict[str, PretalxSpeaker]: - """ - Returns only speakers with publishable sessions - """ - with open(Config.raw_path / "speakers_latest.json") as fd: - js = json.load(fd) - speakers = PretalxSpeakers.model_validate( - js, context={"accepted_proposals": publishable_sessions} - ).root - speakers_dict = {s.code: s for s in speakers} - return speakers_dict - - -def save_publishable_sessions(sessions: dict[str, PretalxSession]): - path = Config.public_path / "sessions.json" - - data = {k: json.loads(v.model_dump_json()) for k, v in sessions.items()} - with open(path, "w") as fd: - json.dump(data, fd, indent=2) - - -def save_publishable_speakers(speakers: dict[str, PretalxSpeaker]): - path = Config.public_path / "speakers.json" - - data = {k: v.model_dump() for k, v in speakers.items()} - with open(path, "w") as fd: - json.dump(data, fd, indent=2) + def write_to_file(output_file: str, data: dict[str, BaseModel]): + with open(output_file, "w") as fd: + json.dump( + {k: json.loads(v.model_dump_json()) for k, v in data.items()}, + fd, + indent=2, + ) -def save_all( - publishable_sessions: dict[str, PretalxSession], - publishable_speakers: dict[str, PretalxSpeaker], -): - Config.public_path.mkdir(parents=True, exist_ok=True) +class Transform: + @staticmethod + def pretalx_submissions_to_europython_sessions( + submissions: dict[str, PretalxSubmission], + ) -> dict[str, EuroPythonSession]: + """ + Transforms the given Pretalx submissions to EuroPython sessions + """ + # Sort the submissions based on start time for deterministic slug computation + submissions = { + k: v + for k, v in sorted( + submissions.items(), + key=lambda item: (item[1].start is None, item[1].start), + ) + } - save_publishable_sessions(publishable_sessions) - save_publishable_speakers(publishable_speakers) + session_code_to_slug = Utils.compute_unique_slugs_by_attribute( + submissions, "title" + ) + sessions = {} + for code, submission in submissions.items(): + session = EuroPythonSession( + code=submission.code, + title=submission.title, + speakers=submission.speakers, + session_type=submission.submission_type, + slug=session_code_to_slug[submission.code], + track=submission.track, + abstract=submission.abstract, + duration=submission.duration, + resources=submission.resources, + room=submission.room, + start=submission.start, + end=submission.end, + answers=submission.answers, + sessions_in_parallel=TimingRelationships.get_sessions_in_parallel( + submission.code + ), + sessions_after=TimingRelationships.get_sessions_after(submission.code), + sessions_before=TimingRelationships.get_sessions_before( + submission.code + ), + next_session=TimingRelationships.get_next_session(submission.code), + prev_session=TimingRelationships.get_prev_session(submission.code), + ) + sessions[code] = session -def check_duplicate_slugs( - publishable_sessions: dict[str, PretalxSession], - publishable_speakers: dict[str, PretalxSpeaker], -) -> bool: - session_slugs = [s.slug for s in publishable_sessions.values()] - speaker_slugs = [s.slug for s in publishable_speakers.values()] + return sessions - session_duplicates = [ - slug for slug in set(session_slugs) if session_slugs.count(slug) > 1 - ] - speaker_duplicates = [ - slug for slug in set(speaker_slugs) if speaker_slugs.count(slug) > 1 - ] + @staticmethod + def pretalx_speakers_to_europython_speakers( + speakers: dict[str, PretalxSpeaker], + ) -> dict[str, EuroPythonSpeaker]: + """ + Transforms the given Pretalx speakers to EuroPython speakers + """ + # Sort the speakers based on code for deterministic slug computation + speakers = {k: v for k, v in sorted(speakers.items(), key=lambda item: item[0])} + + speaker_code_to_slug = Utils.compute_unique_slugs_by_attribute(speakers, "name") + + euro_python_speakers = {} + for code, speaker in speakers.items(): + euro_python_speaker = EuroPythonSpeaker( + code=speaker.code, + name=speaker.name, + biography=speaker.biography, + avatar=speaker.avatar, + slug=speaker_code_to_slug[speaker.code], + answers=speaker.answers, + submissions=speaker.submissions, + ) + euro_python_speakers[code] = euro_python_speaker - if session_duplicates or speaker_duplicates: - print("Found duplicate slugs:") - for slug in session_duplicates: - print(f"Session: {slug}") - for slug in speaker_duplicates: - print(f"Speaker: {slug}") - return False - return True + return euro_python_speakers if __name__ == "__main__": - print(f"Transforming {Config.event} data...") - print("Checking for duplicate slugs...") - - publishable_sessions = parse_publishable_submissions() - - # Pass only the keys (session codes) - publishable_speakers = parse_publishable_speakers(publishable_sessions.keys()) - - if not check_duplicate_slugs(publishable_sessions, publishable_speakers) and ( - len(sys.argv) <= 1 or sys.argv[1] != "--allow-dupes" - ): - print("Exiting. Use ``make transform ALLOW_DUPES=true`` to continue.") - sys.exit(1) + print(f"Parsing the data from {Config.raw_path}...") + pretalx_submissions = Parse.publishable_submissions( + Config.raw_path / "submissions_latest.json" + ) + pretalx_speakers = Parse.publishable_speakers( + Config.raw_path / "speakers_latest.json", pretalx_submissions.keys() + ) + + print("Computing timing relationships...") + TimingRelationships.compute(pretalx_submissions.values()) + + print("Transforming the data...") + ep_sessions = Transform.pretalx_submissions_to_europython_sessions( + pretalx_submissions + ) + ep_speakers = Transform.pretalx_speakers_to_europython_speakers(pretalx_speakers) + + # Warn about duplicates if the flag is set + if len(sys.argv) > 1 and sys.argv[1] == "--warn-dupes": + Utils.warn_duplicates( + session_attributes_to_check=["slug"], + speaker_attributes_to_check=["slug"], + sessions_to_check=ep_sessions, + speakers_to_check=ep_speakers, + ) - print("Saving publishable data...") - save_all(publishable_sessions, publishable_speakers) - print("Done") + print(f"Writing the data to {Config.public_path}...") + Utils.write_to_file(Config.public_path / "sessions.json", ep_sessions) + Utils.write_to_file(Config.public_path / "speakers.json", ep_speakers) diff --git a/tests/test_examples_are_up_to_date.py b/tests/test_examples_are_up_to_date.py deleted file mode 100644 index 33fa58c..0000000 --- a/tests/test_examples_are_up_to_date.py +++ /dev/null @@ -1,33 +0,0 @@ -import json - -from src.transform import PretalxSession, PretalxSpeaker - -with open("./data/examples/pretalx/submissions.json") as fd: - pretalx_submissions = json.load(fd) - -with open("./data/examples/pretalx/speakers.json") as fd: - pretalx_speakers = json.load(fd) - - -def test_sessions_example(): - assert pretalx_submissions[0]["code"] == "A8CD3F" - pretalx = pretalx_submissions[0] - - transformed = PretalxSession.model_validate(pretalx) - - with open("./data/examples/output/sessions.json") as fd: - sessions = json.load(fd) - - assert transformed.model_dump() == sessions["A8CD3F"] - - -def test_speakers_example(): - assert pretalx_speakers[0]["code"] == "F3DC8A" - pretalx = pretalx_speakers[0] - - transformed = PretalxSpeaker.model_validate(pretalx) - - with open("./data/examples/output/speakers.json") as fd: - speakers = json.load(fd) - - assert transformed.model_dump() == speakers["F3DC8A"] diff --git a/tests/test_social_media_extraction.py b/tests/test_social_media_extraction.py index 6c2cd5c..64758cc 100644 --- a/tests/test_social_media_extraction.py +++ b/tests/test_social_media_extraction.py @@ -1,6 +1,6 @@ import pytest -from src.transform import PretalxSpeaker +from src.transform import EuroPythonSpeaker @pytest.mark.parametrize( @@ -16,7 +16,7 @@ ], ) def test_extract_mastodon_url(input_string: str, result: str) -> None: - assert PretalxSpeaker.extract_mastodon_url(input_string) == result + assert EuroPythonSpeaker.extract_mastodon_url(input_string) == result @pytest.mark.parametrize( @@ -29,5 +29,5 @@ def test_extract_mastodon_url(input_string: str, result: str) -> None: ("https://linkedin.com/in/username", "https://linkedin.com/in/username"), ], ) -def test_extract_linked_url(input_string: str, result: str) -> None: - assert PretalxSpeaker.extract_linkedin_url(input_string) == result +def test_extract_linkedin_url(input_string: str, result: str) -> None: + assert EuroPythonSpeaker.extract_linkedin_url(input_string) == result diff --git a/tests/test_transform_end_to_end.py b/tests/test_transform_end_to_end.py new file mode 100644 index 0000000..db819a3 --- /dev/null +++ b/tests/test_transform_end_to_end.py @@ -0,0 +1,38 @@ +import json + +from src.transform import Parse, TimingRelationships, Transform + +pretalx_submissions = Parse.publishable_submissions( + "./data/examples/pretalx/submissions.json" +) + + +def test_e2e_sessions(): + TimingRelationships.compute(pretalx_submissions.values()) + + ep_sessions = Transform.pretalx_submissions_to_europython_sessions( + pretalx_submissions + ) + ep_sessions_dump = { + k: json.loads(v.model_dump_json()) for k, v in ep_sessions.items() + } + + with open("./data/examples/europython/sessions.json") as fd: + ep_sessions_expected = json.load(fd) + + assert ep_sessions_dump == ep_sessions_expected + + +def test_e2e_speakers(): + pretalx_speakers = Parse.publishable_speakers( + "./data/examples/pretalx/speakers.json", pretalx_submissions.keys() + ) + ep_speakers = Transform.pretalx_speakers_to_europython_speakers(pretalx_speakers) + ep_speakers_dump = { + k: json.loads(v.model_dump_json()) for k, v in ep_speakers.items() + } + + with open("./data/examples/europython/speakers.json") as fd: + ep_speakers_expected = json.load(fd) + + assert ep_speakers_dump == ep_speakers_expected From de3f67d21a3af79ba45b1a085c8976ff0534a567 Mon Sep 17 00:00:00 2001 From: egeakman Date: Tue, 4 Jun 2024 17:37:08 +0300 Subject: [PATCH 25/27] Separate files --- src/misc.py | 26 + src/models/__init__.py | 0 src/models/europython.py | 162 ++++++ src/models/pretalx.py | 109 ++++ src/transform.py | 707 +------------------------- src/utils/__init__.py | 0 src/utils/parse.py | 45 ++ src/utils/timing_relationships.py | 193 +++++++ src/utils/transform.py | 83 +++ src/utils/utils.py | 123 +++++ tests/test_social_media_extraction.py | 2 +- tests/test_transform_end_to_end.py | 8 +- 12 files changed, 753 insertions(+), 705 deletions(-) create mode 100644 src/misc.py create mode 100644 src/models/__init__.py create mode 100644 src/models/europython.py create mode 100644 src/models/pretalx.py create mode 100644 src/utils/__init__.py create mode 100644 src/utils/parse.py create mode 100644 src/utils/timing_relationships.py create mode 100644 src/utils/transform.py create mode 100644 src/utils/utils.py diff --git a/src/misc.py b/src/misc.py new file mode 100644 index 0000000..2aac11c --- /dev/null +++ b/src/misc.py @@ -0,0 +1,26 @@ +from enum import Enum + + +class SpeakerQuestion: + affiliation = "Company / Organization / Educational Institution" + homepage = "Social (Homepage)" + twitter = "Social (X/Twitter)" + mastodon = "Social (Mastodon)" + linkedin = "Social (LinkedIn)" + gitx = "Social (Github/Gitlab)" + + +class SubmissionQuestion: + outline = "Outline" + tweet = "Abstract as a tweet / toot" + delivery = "My presentation can be delivered" + level = "Expected audience expertise" + + +class SubmissionState(Enum): + accepted = "accepted" + confirmed = "confirmed" + withdrawn = "withdrawn" + rejected = "rejected" + canceled = "canceled" + submitted = "submitted" diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/europython.py b/src/models/europython.py new file mode 100644 index 0000000..3fbe282 --- /dev/null +++ b/src/models/europython.py @@ -0,0 +1,162 @@ +from datetime import datetime + +from pydantic import BaseModel, Field, computed_field, model_validator + +from src.config import Config +from src.misc import SpeakerQuestion, SubmissionQuestion +from src.models.pretalx import PretalxAnswer + + +class EuroPythonSpeaker(BaseModel): + """ + Model for EuroPython speaker data, transformed from Pretalx data + """ + + code: str + name: str + biography: str | None = None + avatar: str + slug: str + answers: list[PretalxAnswer] = Field(..., exclude=True) + submissions: list[str] + + # Extracted + affiliation: str | None = None + homepage: str | None = None + twitter_url: str | None = None + mastodon_url: str | None = None + linkedin_url: str | None = None + gitx: str | None = None + + @model_validator(mode="before") + @classmethod + def extract_answers(cls, values) -> dict: + answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] + + for answer in answers: + if answer.question_text == SpeakerQuestion.affiliation: + values["affiliation"] = answer.answer_text + + if answer.question_text == SpeakerQuestion.homepage: + values["homepage"] = answer.answer_text + + if answer.question_text == SpeakerQuestion.twitter: + values["twitter_url"] = cls.extract_twitter_url( + answer.answer_text.strip().split()[0] + ) + + if answer.question_text == SpeakerQuestion.mastodon: + values["mastodon_url"] = cls.extract_mastodon_url( + answer.answer_text.strip().split()[0] + ) + + if answer.question_text == SpeakerQuestion.linkedin: + values["linkedin_url"] = cls.extract_linkedin_url( + answer.answer_text.strip().split()[0] + ) + + if answer.question_text == SpeakerQuestion.gitx: + values["gitx"] = answer.answer_text.strip().split()[0] + + return values + + @staticmethod + def extract_twitter_url(text: str) -> str: + """ + Extract the Twitter URL from the answer + """ + if text.startswith("@"): + twitter_url = f"https://x.com/{text[1:]}" + elif not text.startswith(("https://", "http://", "www.")): + twitter_url = f"https://x.com/{text}" + else: + twitter_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return twitter_url.split("?")[0] + + @staticmethod + def extract_mastodon_url(text: str) -> str: + """ + Extract the Mastodon URL from the answer, handle @username@instance format + """ + if not text.startswith(("https://", "http://")) and text.count("@") == 2: + mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" + else: + mastodon_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return mastodon_url.split("?")[0] + + @staticmethod + def extract_linkedin_url(text: str) -> str: + """ + Extract the LinkedIn URL from the answer + """ + if text.startswith("in/"): + linkedin_url = f"https://linkedin.com/{text}" + elif not text.startswith(("https://", "http://", "www.")): + linkedin_url = f"https://linkedin.com/in/{text}" + else: + linkedin_url = ( + f"https://{text.removeprefix('https://').removeprefix('http://')}" + ) + + return linkedin_url.split("?")[0] + + +class EuroPythonSession(BaseModel): + """ + Model for EuroPython session data, transformed from Pretalx data + """ + + code: str + title: str + speakers: list[str] + session_type: str + slug: str + track: str | None = None + abstract: str = "" + tweet: str = "" + duration: str = "" + level: str = "" + delivery: str = "" + resources: list[dict[str, str]] | None = None + room: str | None = None + start: datetime | None = None + end: datetime | None = None + answers: list[PretalxAnswer] = Field(..., exclude=True) + sessions_in_parallel: list[str] | None = None + sessions_after: list[str] | None = None + sessions_before: list[str] | None = None + next_session: str | None = None + prev_session: str | None = None + + @computed_field + def website_url(self) -> str: + return ( + f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{self.slug}" + ) + + @model_validator(mode="before") + @classmethod + def extract_answers(cls, values) -> dict: + answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] + + for answer in answers: + # TODO if we need any other questions + if answer.question_text == SubmissionQuestion.tweet: + values["tweet"] = answer.answer_text + + if answer.question_text == SubmissionQuestion.delivery: + if "in-person" in answer.answer_text: + values["delivery"] = "in-person" + else: + values["delivery"] = "remote" + + if answer.question_text == SubmissionQuestion.level: + values["level"] = answer.answer_text.lower() + + return values diff --git a/src/models/pretalx.py b/src/models/pretalx.py new file mode 100644 index 0000000..ea19b42 --- /dev/null +++ b/src/models/pretalx.py @@ -0,0 +1,109 @@ +from datetime import datetime + +from pydantic import BaseModel, Field, field_validator, model_validator + +from src.misc import SubmissionState + + +class PretalxAnswer(BaseModel): + question_text: str + answer_text: str + answer_file: str | None = None + submission_id: str | None = None + speaker_id: str | None = None + + @model_validator(mode="before") + @classmethod + def extract(cls, values) -> dict: + values["question_text"] = values["question"]["question"]["en"] + values["answer_text"] = values["answer"] + values["answer_file"] = values["answer_file"] + values["submission_id"] = values["submission"] + values["speaker_id"] = values["person"] + return values + + +class PretalxSlot(BaseModel): + room: str | None = None + start: datetime | None = None + end: datetime | None = None + + @field_validator("room", mode="before") + @classmethod + def handle_localized(cls, v) -> str | None: + if isinstance(v, dict): + return v.get("en") + return v + + +class PretalxSpeaker(BaseModel): + """ + Model for Pretalx speaker data + """ + + code: str + name: str + biography: str | None = None + avatar: str + submissions: list[str] + answers: list[PretalxAnswer] + + +class PretalxSubmission(BaseModel): + """ + Model for Pretalx submission data + """ + + code: str + title: str + speakers: list[str] # We only want the code, not the full info + submission_type: str + track: str | None = None + state: SubmissionState + abstract: str = "" + duration: str = "" + resources: list[dict[str, str]] | None = None + answers: list[PretalxAnswer] + slot: PretalxSlot | None = Field(..., exclude=True) + + # Extracted from slot data + room: str | None = None + start: datetime | None = None + end: datetime | None = None + + @field_validator("submission_type", "track", mode="before") + @classmethod + def handle_localized(cls, v) -> str | None: + if isinstance(v, dict): + return v.get("en") + return v + + @field_validator("duration", mode="before") + @classmethod + def duration_to_string(cls, v) -> str: + if isinstance(v, int): + return str(v) + return v + + @field_validator("resources", mode="before") + @classmethod + def handle_resources(cls, v) -> list[dict[str, str]] | None: + return v or None + + @model_validator(mode="before") + @classmethod + def process_values(cls, values) -> dict: + values["speakers"] = sorted([s["code"] for s in values["speakers"]]) + + # Set slot information + if values.get("slot"): + slot = PretalxSlot.model_validate(values["slot"]) + values["room"] = slot.room + values["start"] = slot.start + values["end"] = slot.end + + return values + + @property + def is_publishable(self) -> bool: + return self.state in (SubmissionState.accepted, SubmissionState.confirmed) diff --git a/src/transform.py b/src/transform.py index 17b3746..6bbfbaf 100644 --- a/src/transform.py +++ b/src/transform.py @@ -1,705 +1,10 @@ -from __future__ import annotations - -import json import sys -from collections.abc import KeysView -from datetime import datetime -from enum import Enum - -from pydantic import BaseModel, Field, computed_field, field_validator, model_validator -from slugify import slugify from src.config import Config - - -class SpeakerQuestion: - affiliation = "Company / Organization / Educational Institution" - homepage = "Social (Homepage)" - twitter = "Social (X/Twitter)" - mastodon = "Social (Mastodon)" - linkedin = "Social (LinkedIn)" - gitx = "Social (Github/Gitlab)" - - -class SubmissionQuestion: - outline = "Outline" - tweet = "Abstract as a tweet / toot" - delivery = "My presentation can be delivered" - level = "Expected audience expertise" - - -class SubmissionState(Enum): - accepted = "accepted" - confirmed = "confirmed" - withdrawn = "withdrawn" - rejected = "rejected" - canceled = "canceled" - submitted = "submitted" - - -class PretalxAnswer(BaseModel): - question_text: str - answer_text: str - answer_file: str | None = None - submission_id: str | None = None - speaker_id: str | None = None - - @model_validator(mode="before") - @classmethod - def extract(cls, values): - values["question_text"] = values["question"]["question"]["en"] - values["answer_text"] = values["answer"] - values["answer_file"] = values["answer_file"] - values["submission_id"] = values["submission"] - values["speaker_id"] = values["person"] - return values - - -class PretalxSlot(BaseModel): - room: str | None = None - start: datetime | None = None - end: datetime | None = None - - @field_validator("room", mode="before") - @classmethod - def handle_localized(cls, v): - if isinstance(v, dict): - return v.get("en") - return v - - -class TimingRelationships: - all_sessions_in_parallel: dict[str, list[str]] = {} - all_sessions_after: dict[str, list[str]] = {} - all_sessions_before: dict[str, list[str]] = {} - all_next_session: dict[str, str | None] = {} - all_prev_session: dict[str, str | None] = {} - - @classmethod - def compute(cls, all_sessions: list[PretalxSubmission]) -> None: - for session in all_sessions: - if not session.start or not session.end: - continue - - sessions_in_parallel = cls.compute_sessions_in_parallel( - session, all_sessions - ) - sessions_after_data = cls.compute_sessions_after( - session, all_sessions, sessions_in_parallel - ) - sessions_before_data = cls.compute_sessions_before( - session, all_sessions, sessions_in_parallel - ) - - cls.all_sessions_in_parallel[session.code] = sessions_in_parallel - cls.all_sessions_after[session.code] = sessions_after_data.get( - "sessions_after" - ) - cls.all_sessions_before[session.code] = sessions_before_data.get( - "sessions_before" - ) - cls.all_next_session[session.code] = sessions_after_data.get("next_session") - cls.all_prev_session[session.code] = sessions_before_data.get( - "prev_session" - ) - - @classmethod - def get_sessions_in_parallel( - cls, session_code: str | None = None - ) -> list[str] | None: - if session_code: - return cls.all_sessions_in_parallel.get(session_code) - return cls.all_sessions_in_parallel - - @classmethod - def get_sessions_after(cls, session_code: str | None = None) -> list[str] | None: - if session_code: - return cls.all_sessions_after.get(session_code) - return cls.all_sessions_after - - @classmethod - def get_sessions_before(cls, session_code: str | None = None) -> list[str] | None: - if session_code: - return cls.all_sessions_before.get(session_code) - return cls.all_sessions_before - - @classmethod - def get_next_session(cls, session_code: str | None = None) -> str | None: - if session_code: - return cls.all_next_session.get(session_code) - return cls.all_next_session - - @classmethod - def get_prev_session(cls, session_code: str | None = None) -> str | None: - if session_code: - return cls.all_prev_session.get(session_code) - return cls.all_prev_session - - @staticmethod - def compute_sessions_in_parallel( - session: PretalxSubmission, all_sessions: list[PretalxSubmission] - ) -> list[str]: - sessions_parallel = [] - for other_session in all_sessions: - if ( - other_session.code == session.code - or other_session.start is None - or session.start is None - ): - continue - - # If they intersect, they are in parallel - if other_session.start < session.end and other_session.end > session.start: - sessions_parallel.append(other_session.code) - - return sessions_parallel - - @staticmethod - def compute_sessions_after( - session: PretalxSubmission, - all_sessions: list[PretalxSubmission], - sessions_in_parallel: list[str], - ) -> dict[str, list[str] | str | None]: - # Sort sessions based on start time, early first - all_sessions_sorted = sorted( - all_sessions, key=lambda x: (x.start is None, x.start) - ) - - # Filter out sessions - remaining_sessions = [ - other_session - for other_session in all_sessions_sorted - if other_session.start is not None - and other_session.start >= session.end - and other_session.code not in sessions_in_parallel - and other_session.code != session.code - and other_session.start.day == session.start.day - and not other_session.submission_type - == session.submission_type - == "Announcements" - ] - - # Add sessions to the list if they are in different rooms - seen_rooms = set() - unique_sessions: list[PretalxSubmission] = [] - - for other_session in remaining_sessions: - if other_session.room not in seen_rooms: - unique_sessions.append(other_session) - seen_rooms.add(other_session.room) - - # If there is a keynote next, only show that - if any(s.submission_type == "Keynote" for s in unique_sessions): - unique_sessions = [ - s for s in unique_sessions if s.submission_type == "Keynote" - ] - - # Set the next sessions in all rooms - sessions_after = [s.code for s in unique_sessions] - - # Set the next session in the same room, or a keynote - next_session = None - for other_session in unique_sessions: - if ( - other_session.room == session.room - or other_session.submission_type == "Keynote" - ): - next_session = other_session.code - break - - return {"sessions_after": sessions_after, "next_session": next_session} - - @staticmethod - def compute_sessions_before( - session: PretalxSubmission, - all_sessions: list[PretalxSubmission], - sessions_in_parallel: list[str], - ) -> dict[str, list[str] | str | None]: - # Sort sessions based on start time, late first - all_sessions_sorted = sorted( - all_sessions, - key=lambda x: (x.start is None, x.start), - reverse=True, - ) - - remaining_sessions = [ - other_session - for other_session in all_sessions_sorted - if other_session.start is not None - and other_session.code not in sessions_in_parallel - and other_session.start <= session.start - and other_session.code != session.code - and other_session.start.day == session.start.day - and other_session.submission_type != "Announcements" - ] - - seen_rooms = set() - unique_sessions = [] - - for other_session in remaining_sessions: - if other_session.room not in seen_rooms: - unique_sessions.append(other_session) - seen_rooms.add(other_session.room) - - sessions_before = [session.code for session in unique_sessions] - - prev_session = None - for other_session in unique_sessions: - if other_session.room == session.room: - prev_session = other_session.code - break - - return {"sessions_before": sessions_before, "prev_session": prev_session} - - -class PretalxSpeaker(BaseModel): - """ - Model for Pretalx speaker data - """ - - code: str - name: str - biography: str | None = None - avatar: str - submissions: list[str] - answers: list[PretalxAnswer] - - -class EuroPythonSpeaker(BaseModel): - """ - Model for EuroPython speaker data, transformed from Pretalx data - """ - - code: str - name: str - biography: str | None = None - avatar: str - slug: str - answers: list[PretalxAnswer] = Field(..., exclude=True) - submissions: list[str] - - # Extracted - affiliation: str | None = None - homepage: str | None = None - twitter_url: str | None = None - mastodon_url: str | None = None - linkedin_url: str | None = None - gitx: str | None = None - - @model_validator(mode="before") - @classmethod - def extract_answers(cls, values) -> dict: - answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] - - for answer in answers: - if answer.question_text == SpeakerQuestion.affiliation: - values["affiliation"] = answer.answer_text - - if answer.question_text == SpeakerQuestion.homepage: - values["homepage"] = answer.answer_text - - if answer.question_text == SpeakerQuestion.twitter: - values["twitter_url"] = cls.extract_twitter_url( - answer.answer_text.strip().split()[0] - ) - - if answer.question_text == SpeakerQuestion.mastodon: - values["mastodon_url"] = cls.extract_mastodon_url( - answer.answer_text.strip().split()[0] - ) - - if answer.question_text == SpeakerQuestion.linkedin: - values["linkedin_url"] = cls.extract_linkedin_url( - answer.answer_text.strip().split()[0] - ) - - if answer.question_text == SpeakerQuestion.gitx: - values["gitx"] = answer.answer_text.strip().split()[0] - - return values - - @staticmethod - def extract_twitter_url(text: str) -> str: - """ - Extract the Twitter URL from the answer - """ - if text.startswith("@"): - twitter_url = f"https://x.com/{text[1:]}" - elif not text.startswith(("https://", "http://", "www.")): - twitter_url = f"https://x.com/{text}" - else: - twitter_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) - - return twitter_url.split("?")[0] - - @staticmethod - def extract_mastodon_url(text: str) -> str: - """ - Extract the Mastodon URL from the answer, handle @username@instance format - """ - if not text.startswith(("https://", "http://")) and text.count("@") == 2: - mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" - else: - mastodon_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) - - return mastodon_url.split("?")[0] - - @staticmethod - def extract_linkedin_url(text: str) -> str: - """ - Extract the LinkedIn URL from the answer - """ - if text.startswith("in/"): - linkedin_url = f"https://linkedin.com/{text}" - elif not text.startswith(("https://", "http://", "www.")): - linkedin_url = f"https://linkedin.com/in/{text}" - else: - linkedin_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) - - return linkedin_url.split("?")[0] - - -class PretalxSubmission(BaseModel): - """ - Model for Pretalx submission data - """ - - code: str - title: str - speakers: list[str] # We only want the code, not the full info - submission_type: str - track: str | None = None - state: SubmissionState - abstract: str = "" - duration: str = "" - resources: list[dict[str, str]] | None = None - answers: list[PretalxAnswer] - slot: PretalxSlot | None = Field(..., exclude=True) - - # Extracted from slot data - room: str | None = None - start: datetime | None = None - end: datetime | None = None - - @field_validator("submission_type", "track", mode="before") - @classmethod - def handle_localized(cls, v) -> str | None: - if isinstance(v, dict): - return v.get("en") - return v - - @field_validator("duration", mode="before") - @classmethod - def duration_to_string(cls, v) -> str: - if isinstance(v, int): - return str(v) - return v - - @field_validator("resources", mode="before") - @classmethod - def handle_resources(cls, v) -> list[dict[str, str]] | None: - return v or None - - @model_validator(mode="before") - @classmethod - def process_values(cls, values) -> dict: - values["speakers"] = sorted([s["code"] for s in values["speakers"]]) - - # Set slot information - if values.get("slot"): - slot = PretalxSlot.model_validate(values["slot"]) - values["room"] = slot.room - values["start"] = slot.start - values["end"] = slot.end - - return values - - @property - def is_publishable(self) -> bool: - return self.state in (SubmissionState.accepted, SubmissionState.confirmed) - - -class EuroPythonSession(BaseModel): - """ - Model for EuroPython session data, transformed from Pretalx data - """ - - code: str - title: str - speakers: list[str] - session_type: str - slug: str - track: str | None = None - abstract: str = "" - tweet: str = "" - duration: str = "" - level: str = "" - delivery: str = "" - resources: list[dict[str, str]] | None = None - room: str | None = None - start: datetime | None = None - end: datetime | None = None - answers: list[PretalxAnswer] = Field(..., exclude=True) - sessions_in_parallel: list[str] | None = None - sessions_after: list[str] | None = None - sessions_before: list[str] | None = None - next_session: str | None = None - prev_session: str | None = None - - @computed_field - def website_url(self) -> str: - return ( - f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{self.slug}" - ) - - @model_validator(mode="before") - @classmethod - def extract_answers(cls, values) -> dict: - answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] - - for answer in answers: - # TODO if we need any other questions - if answer.question_text == SubmissionQuestion.tweet: - values["tweet"] = answer.answer_text - - if answer.question_text == SubmissionQuestion.delivery: - if "in-person" in answer.answer_text: - values["delivery"] = "in-person" - else: - values["delivery"] = "remote" - - if answer.question_text == SubmissionQuestion.level: - values["level"] = answer.answer_text.lower() - - return values - - -class Parse: - @staticmethod - def publishable_submissions(input_file: str) -> dict[str, PretalxSubmission]: - """ - Returns only publishable submissions - """ - with open(input_file) as fd: - js = json.load(fd) - all_submissions = [PretalxSubmission.model_validate(s) for s in js] - publishable_submissions = [s for s in all_submissions if s.is_publishable] - publishable_submissions_by_code = { - s.code: s for s in publishable_submissions - } - - return publishable_submissions_by_code - - @staticmethod - def publishable_speakers( - input_file: str, - publishable_sessions_keys: KeysView[str], - ) -> dict[str, PretalxSpeaker]: - """ - Returns only speakers with publishable sessions - """ - with open(input_file) as fd: - js = json.load(fd) - all_speakers = [PretalxSpeaker.model_validate(s) for s in js] - speakers_with_publishable_sessions = [ - s - for s in all_speakers - if Utils.publishable_sessions_of_speaker(s, publishable_sessions_keys) - ] - publishable_speakers_by_code = { - s.code: s for s in speakers_with_publishable_sessions - } - - return publishable_speakers_by_code - - -class Utils: - @staticmethod - def publishable_sessions_of_speaker( - speaker: PretalxSpeaker, accepted_proposals: KeysView[str] - ) -> set[str]: - return set(speaker.submissions) & accepted_proposals - - @staticmethod - def find_duplicate_attributes(objects, attributes): - """ - Find duplicates in the given objects based on the given attributes - - Returns: dict[attribute_value, list[object_code]] - """ - duplicates = {} - for obj in objects.values(): - for attribute in attributes: - value = getattr(obj, attribute) - if value in duplicates: - duplicates[value].append(obj.code) - else: - duplicates[value] = [obj.code] - - return duplicates - - @staticmethod - def replace_duplicate_slugs(code_to_slug: dict[str, str]) -> dict[str, str]: - slug_count: dict[str, int] = {} - seen_slugs: set[str] = set() - - for code, slug in code_to_slug.items(): - original_slug = slug - - if original_slug in seen_slugs: - if original_slug in slug_count: - slug_count[original_slug] += 1 - else: - slug_count[original_slug] = 1 - code_to_slug[code] = f"{original_slug}-{slug_count[original_slug]}" - else: - seen_slugs.add(original_slug) - - return code_to_slug - - @staticmethod - def warn_duplicates( - session_attributes_to_check: list[str], - speaker_attributes_to_check: list[str], - sessions_to_check: dict[str, EuroPythonSession], - speakers_to_check: dict[str, PretalxSpeaker], - ) -> None: - """ - Warns about duplicate attributes in the given objects - """ - print( - f"Checking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions..." - ) - duplicate_sessions = Utils.find_duplicate_attributes( - sessions_to_check, session_attributes_to_check - ) - - for attribute, codes in duplicate_sessions.items(): - if len(codes) > 1: - print(f"Duplicate ``{attribute}`` in sessions: {codes}") - - print( - f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers..." - ) - duplicate_speakers = Utils.find_duplicate_attributes( - speakers_to_check, speaker_attributes_to_check - ) - - for attribute, codes in duplicate_speakers.items(): - if len(codes) > 1: - print(f"Duplicate ``{attribute}`` in speakers: {codes}") - - @staticmethod - def compute_unique_slugs_by_attribute( - objects: dict[str, BaseModel], attribute: str - ): - """ - Compute the slugs based on the given attribute - and replace the duplicate slugs with incrementing - numbers at the end. - - Returns: dict[code, slug] - """ - object_code_to_slug = {} - for obj in objects.values(): - object_code_to_slug[obj.code] = slugify(getattr(obj, attribute)) - - return Utils.replace_duplicate_slugs(object_code_to_slug) - - @staticmethod - def write_to_file(output_file: str, data: dict[str, BaseModel]): - with open(output_file, "w") as fd: - json.dump( - {k: json.loads(v.model_dump_json()) for k, v in data.items()}, - fd, - indent=2, - ) - - -class Transform: - @staticmethod - def pretalx_submissions_to_europython_sessions( - submissions: dict[str, PretalxSubmission], - ) -> dict[str, EuroPythonSession]: - """ - Transforms the given Pretalx submissions to EuroPython sessions - """ - # Sort the submissions based on start time for deterministic slug computation - submissions = { - k: v - for k, v in sorted( - submissions.items(), - key=lambda item: (item[1].start is None, item[1].start), - ) - } - - session_code_to_slug = Utils.compute_unique_slugs_by_attribute( - submissions, "title" - ) - - sessions = {} - for code, submission in submissions.items(): - session = EuroPythonSession( - code=submission.code, - title=submission.title, - speakers=submission.speakers, - session_type=submission.submission_type, - slug=session_code_to_slug[submission.code], - track=submission.track, - abstract=submission.abstract, - duration=submission.duration, - resources=submission.resources, - room=submission.room, - start=submission.start, - end=submission.end, - answers=submission.answers, - sessions_in_parallel=TimingRelationships.get_sessions_in_parallel( - submission.code - ), - sessions_after=TimingRelationships.get_sessions_after(submission.code), - sessions_before=TimingRelationships.get_sessions_before( - submission.code - ), - next_session=TimingRelationships.get_next_session(submission.code), - prev_session=TimingRelationships.get_prev_session(submission.code), - ) - sessions[code] = session - - return sessions - - @staticmethod - def pretalx_speakers_to_europython_speakers( - speakers: dict[str, PretalxSpeaker], - ) -> dict[str, EuroPythonSpeaker]: - """ - Transforms the given Pretalx speakers to EuroPython speakers - """ - # Sort the speakers based on code for deterministic slug computation - speakers = {k: v for k, v in sorted(speakers.items(), key=lambda item: item[0])} - - speaker_code_to_slug = Utils.compute_unique_slugs_by_attribute(speakers, "name") - - euro_python_speakers = {} - for code, speaker in speakers.items(): - euro_python_speaker = EuroPythonSpeaker( - code=speaker.code, - name=speaker.name, - biography=speaker.biography, - avatar=speaker.avatar, - slug=speaker_code_to_slug[speaker.code], - answers=speaker.answers, - submissions=speaker.submissions, - ) - euro_python_speakers[code] = euro_python_speaker - - return euro_python_speakers - +from src.utils.parse import Parse +from src.utils.timing_relationships import TimingRelationships +from src.utils.transform import Transform +from src.utils.utils import Utils if __name__ == "__main__": print(f"Parsing the data from {Config.raw_path}...") @@ -722,8 +27,8 @@ def pretalx_speakers_to_europython_speakers( # Warn about duplicates if the flag is set if len(sys.argv) > 1 and sys.argv[1] == "--warn-dupes": Utils.warn_duplicates( - session_attributes_to_check=["slug"], - speaker_attributes_to_check=["slug"], + session_attributes_to_check=["title"], + speaker_attributes_to_check=["name"], sessions_to_check=ep_sessions, speakers_to_check=ep_speakers, ) diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/parse.py b/src/utils/parse.py new file mode 100644 index 0000000..f1cce05 --- /dev/null +++ b/src/utils/parse.py @@ -0,0 +1,45 @@ +import json +from collections.abc import KeysView +from pathlib import Path + +from src.models.pretalx import PretalxSpeaker, PretalxSubmission +from src.utils.utils import Utils + + +class Parse: + @staticmethod + def publishable_submissions(input_file: Path | str) -> dict[str, PretalxSubmission]: + """ + Returns only publishable submissions + """ + with open(input_file) as fd: + js = json.load(fd) + all_submissions = [PretalxSubmission.model_validate(s) for s in js] + publishable_submissions = [s for s in all_submissions if s.is_publishable] + publishable_submissions_by_code = { + s.code: s for s in publishable_submissions + } + + return publishable_submissions_by_code + + @staticmethod + def publishable_speakers( + input_file: Path | str, + publishable_sessions_keys: KeysView[str], + ) -> dict[str, PretalxSpeaker]: + """ + Returns only speakers with publishable sessions + """ + with open(input_file) as fd: + js = json.load(fd) + all_speakers = [PretalxSpeaker.model_validate(s) for s in js] + speakers_with_publishable_sessions = [ + s + for s in all_speakers + if Utils.publishable_sessions_of_speaker(s, publishable_sessions_keys) + ] + publishable_speakers_by_code = { + s.code: s for s in speakers_with_publishable_sessions + } + + return publishable_speakers_by_code diff --git a/src/utils/timing_relationships.py b/src/utils/timing_relationships.py new file mode 100644 index 0000000..ac6add5 --- /dev/null +++ b/src/utils/timing_relationships.py @@ -0,0 +1,193 @@ +from collections.abc import ValuesView + +from src.models.pretalx import PretalxSubmission + + +class TimingRelationships: + all_sessions_in_parallel: dict[str, list[str]] = {} + all_sessions_after: dict[str, list[str]] = {} + all_sessions_before: dict[str, list[str]] = {} + all_next_session: dict[str, str | None] = {} + all_prev_session: dict[str, str | None] = {} + + @classmethod + def compute( + cls, all_sessions: ValuesView[PretalxSubmission] | list[PretalxSubmission] + ) -> None: + for session in all_sessions: + if not session.start or not session.end: + continue + + sessions_in_parallel = cls.compute_sessions_in_parallel( + session, all_sessions + ) + sessions_after = cls.compute_sessions_after( + session, all_sessions, sessions_in_parallel + ) + sessions_before = cls.compute_sessions_before( + session, all_sessions, sessions_in_parallel + ) + + cls.all_sessions_in_parallel[session.code] = sessions_in_parallel + cls.all_sessions_after[session.code] = sessions_after + cls.all_sessions_before[session.code] = sessions_before + cls.all_next_session[session.code] = cls.compute_prev_or_next_session( + session, sessions_after, all_sessions + ) + cls.all_prev_session[session.code] = cls.compute_prev_or_next_session( + session, sessions_before, all_sessions + ) + + @classmethod + def get_sessions_in_parallel( + cls, session_code: str | None = None + ) -> list[str] | None: + return cls.all_sessions_in_parallel.get(session_code) + + @classmethod + def get_sessions_after(cls, session_code: str | None = None) -> list[str] | None: + return cls.all_sessions_after.get(session_code) + + @classmethod + def get_sessions_before(cls, session_code: str | None = None) -> list[str] | None: + return cls.all_sessions_before.get(session_code) + + @classmethod + def get_next_session(cls, session_code: str | None = None) -> str | None: + return cls.all_next_session.get(session_code) + + @classmethod + def get_prev_session(cls, session_code: str | None = None) -> str | None: + return cls.all_prev_session.get(session_code) + + @staticmethod + def compute_sessions_in_parallel( + session: PretalxSubmission, + all_sessions: ValuesView[PretalxSubmission] | list[PretalxSubmission], + ) -> list[str]: + sessions_parallel = [] + for other_session in all_sessions: + if ( + other_session.code == session.code + or other_session.start is None + or session.start is None + ): + continue + + # If they intersect, they are in parallel + if other_session.start < session.end and other_session.end > session.start: + sessions_parallel.append(other_session.code) + + return sessions_parallel + + @staticmethod + def compute_sessions_after( + session: PretalxSubmission, + all_sessions: ValuesView[PretalxSubmission] | list[PretalxSubmission], + sessions_in_parallel: list[str], + ) -> list[str]: + # Sort sessions based on start time, early first + all_sessions_sorted = sorted( + all_sessions, key=lambda x: (x.start is None, x.start) + ) + + # Filter out sessions + remaining_sessions = [ + other_session + for other_session in all_sessions_sorted + if other_session.start is not None + and other_session.start >= session.end + and other_session.code not in sessions_in_parallel + and other_session.code != session.code + and other_session.start.day == session.start.day + and not other_session.submission_type + == session.submission_type + == "Announcements" + ] + + # Add sessions to the list if they are in different rooms + seen_rooms = set() + unique_sessions: list[PretalxSubmission] = [] + + for other_session in remaining_sessions: + if other_session.room not in seen_rooms: + unique_sessions.append(other_session) + seen_rooms.add(other_session.room) + + # If there is a keynote next, only show that + if any(s.submission_type == "Keynote" for s in unique_sessions): + unique_sessions = [ + s for s in unique_sessions if s.submission_type == "Keynote" + ] + + # Set the next sessions in all rooms + sessions_after = [s.code for s in unique_sessions] + + return sessions_after + + @staticmethod + def compute_sessions_before( + session: PretalxSubmission, + all_sessions: ValuesView[PretalxSubmission] | list[PretalxSubmission], + sessions_in_parallel: list[str], + ) -> list[str]: + # Sort sessions based on start time, late first + all_sessions_sorted = sorted( + all_sessions, + key=lambda x: (x.start is None, x.start), + reverse=True, + ) + + remaining_sessions = [ + other_session + for other_session in all_sessions_sorted + if other_session.start is not None + and other_session.code not in sessions_in_parallel + and other_session.start <= session.start + and other_session.code != session.code + and other_session.start.day == session.start.day + and other_session.submission_type != "Announcements" + ] + + seen_rooms = set() + unique_sessions: list[PretalxSubmission] = [] + + for other_session in remaining_sessions: + if other_session.room not in seen_rooms: + unique_sessions.append(other_session) + seen_rooms.add(other_session.room) + + sessions_before = [session.code for session in unique_sessions] + + return sessions_before + + @staticmethod + def compute_prev_or_next_session( + session: PretalxSubmission, + sessions_before_or_after: list[str], + all_sessions: ValuesView[PretalxSubmission] | list[PretalxSubmission], + ) -> str | None: + """ + Compute next_session or prev_session based on the given sessions_before_or_after. + If passed sessions_before, it will return prev_session. + If passed sessions_after, it will return next_session. + + Returns the previous or next session in the same room or a keynote. + """ + if not sessions_before_or_after: + return None + + sessions_before_or_after_object = [ + s for s in all_sessions if s.code in sessions_before_or_after + ] + + session_in_same_room = None + for other_session in sessions_before_or_after_object: + if ( + other_session.room == session.room + or other_session.submission_type == "Keynote" + ): + session_in_same_room = other_session.code + break + + return session_in_same_room diff --git a/src/utils/transform.py b/src/utils/transform.py new file mode 100644 index 0000000..178e7cc --- /dev/null +++ b/src/utils/transform.py @@ -0,0 +1,83 @@ +from src.models.europython import EuroPythonSession, EuroPythonSpeaker +from src.models.pretalx import PretalxSpeaker, PretalxSubmission +from src.utils.timing_relationships import TimingRelationships +from src.utils.utils import Utils + + +class Transform: + @staticmethod + def pretalx_submissions_to_europython_sessions( + submissions: dict[str, PretalxSubmission], + ) -> dict[str, EuroPythonSession]: + """ + Transforms the given Pretalx submissions to EuroPython sessions + """ + # Sort the submissions based on start time for deterministic slug computation + submissions = { + k: v + for k, v in sorted( + submissions.items(), + key=lambda item: (item[1].start is None, item[1].start), + ) + } + + session_code_to_slug = Utils.compute_unique_slugs_by_attribute( + submissions, "title" + ) + + sessions = {} + for code, submission in submissions.items(): + session = EuroPythonSession( + code=submission.code, + title=submission.title, + speakers=submission.speakers, + session_type=submission.submission_type, + slug=session_code_to_slug[submission.code], + track=submission.track, + abstract=submission.abstract, + duration=submission.duration, + resources=submission.resources, + room=submission.room, + start=submission.start, + end=submission.end, + answers=submission.answers, + sessions_in_parallel=TimingRelationships.get_sessions_in_parallel( + submission.code + ), + sessions_after=TimingRelationships.get_sessions_after(submission.code), + sessions_before=TimingRelationships.get_sessions_before( + submission.code + ), + next_session=TimingRelationships.get_next_session(submission.code), + prev_session=TimingRelationships.get_prev_session(submission.code), + ) + sessions[code] = session + + return sessions + + @staticmethod + def pretalx_speakers_to_europython_speakers( + speakers: dict[str, PretalxSpeaker], + ) -> dict[str, EuroPythonSpeaker]: + """ + Transforms the given Pretalx speakers to EuroPython speakers + """ + # Sort the speakers based on code for deterministic slug computation + speakers = {k: v for k, v in sorted(speakers.items(), key=lambda item: item[0])} + + speaker_code_to_slug = Utils.compute_unique_slugs_by_attribute(speakers, "name") + + euro_python_speakers = {} + for code, speaker in speakers.items(): + euro_python_speaker = EuroPythonSpeaker( + code=speaker.code, + name=speaker.name, + biography=speaker.biography, + avatar=speaker.avatar, + slug=speaker_code_to_slug[speaker.code], + answers=speaker.answers, + submissions=speaker.submissions, + ) + euro_python_speakers[code] = euro_python_speaker + + return euro_python_speakers diff --git a/src/utils/utils.py b/src/utils/utils.py new file mode 100644 index 0000000..37838cf --- /dev/null +++ b/src/utils/utils.py @@ -0,0 +1,123 @@ +import json +from collections.abc import KeysView +from pathlib import Path + +from slugify import slugify + +from src.models.europython import EuroPythonSession, EuroPythonSpeaker +from src.models.pretalx import PretalxSpeaker, PretalxSubmission + + +class Utils: + @staticmethod + def publishable_sessions_of_speaker( + speaker: PretalxSpeaker, accepted_proposals: KeysView[str] + ) -> set[str]: + return set(speaker.submissions) & accepted_proposals + + @staticmethod + def find_duplicate_attributes( + objects: ( + dict[str, EuroPythonSession] + | dict[str, EuroPythonSpeaker] + | dict[str, PretalxSubmission] + | dict[str, PretalxSpeaker] + ), + attributes: list[str], + ) -> dict[str, list[str]]: + """ + Find duplicates in the given objects based on the given attributes + + Returns: dict[attribute_value, list[object_code]] + """ + duplicates: dict[str, list[str]] = {} + for obj in objects.values(): + for attribute in attributes: + value = getattr(obj, attribute) + if value in duplicates: + duplicates[value].append(obj.code) + else: + duplicates[value] = [obj.code] + + return duplicates + + @staticmethod + def replace_duplicate_slugs(code_to_slug: dict[str, str]) -> dict[str, str]: + slug_count: dict[str, int] = {} + seen_slugs: set[str] = set() + + for code, slug in code_to_slug.items(): + original_slug = slug + + if original_slug in seen_slugs: + if original_slug in slug_count: + slug_count[original_slug] += 1 + else: + slug_count[original_slug] = 1 + code_to_slug[code] = f"{original_slug}-{slug_count[original_slug]}" + else: + seen_slugs.add(original_slug) + + return code_to_slug + + @staticmethod + def warn_duplicates( + session_attributes_to_check: list[str], + speaker_attributes_to_check: list[str], + sessions_to_check: dict[str, EuroPythonSession] | dict[str, PretalxSubmission], + speakers_to_check: dict[str, EuroPythonSpeaker] | dict[str, PretalxSpeaker], + ) -> None: + """ + Warns about duplicate attributes in the given objects + """ + print( + f"Checking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions..." + ) + duplicate_sessions = Utils.find_duplicate_attributes( + sessions_to_check, session_attributes_to_check + ) + + for attribute, codes in duplicate_sessions.items(): + if len(codes) > 1: + print(f"Duplicate ``{attribute}`` in sessions: {codes}") + + print( + f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers..." + ) + duplicate_speakers = Utils.find_duplicate_attributes( + speakers_to_check, speaker_attributes_to_check + ) + + for attribute, codes in duplicate_speakers.items(): + if len(codes) > 1: + print(f"Duplicate ``{attribute}`` in speakers: {codes}") + + @staticmethod + def compute_unique_slugs_by_attribute( + objects: dict[str, PretalxSubmission] | dict[str, PretalxSpeaker], + attribute: str, + ) -> dict[str, str]: + """ + Compute the slugs based on the given attribute + and replace the duplicate slugs with incrementing + numbers at the end. + + Returns: dict[code, slug] + """ + object_code_to_slug = {} + for obj in objects.values(): + object_code_to_slug[obj.code] = slugify(getattr(obj, attribute)) + + return Utils.replace_duplicate_slugs(object_code_to_slug) + + @staticmethod + def write_to_file( + output_file: Path | str, + data: dict[str, EuroPythonSession] | dict[str, EuroPythonSpeaker], + ) -> None: + with open(output_file, "w") as fd: + json.dump( + {k: json.loads(v.model_dump_json()) for k, v in data.items()}, + fd, + indent=2, + ) diff --git a/tests/test_social_media_extraction.py b/tests/test_social_media_extraction.py index 64758cc..48e2548 100644 --- a/tests/test_social_media_extraction.py +++ b/tests/test_social_media_extraction.py @@ -1,6 +1,6 @@ import pytest -from src.transform import EuroPythonSpeaker +from src.models.europython import EuroPythonSpeaker @pytest.mark.parametrize( diff --git a/tests/test_transform_end_to_end.py b/tests/test_transform_end_to_end.py index db819a3..e8315db 100644 --- a/tests/test_transform_end_to_end.py +++ b/tests/test_transform_end_to_end.py @@ -1,13 +1,15 @@ import json -from src.transform import Parse, TimingRelationships, Transform +from src.utils.parse import Parse +from src.utils.timing_relationships import TimingRelationships +from src.utils.transform import Transform pretalx_submissions = Parse.publishable_submissions( "./data/examples/pretalx/submissions.json" ) -def test_e2e_sessions(): +def test_e2e_sessions() -> None: TimingRelationships.compute(pretalx_submissions.values()) ep_sessions = Transform.pretalx_submissions_to_europython_sessions( @@ -23,7 +25,7 @@ def test_e2e_sessions(): assert ep_sessions_dump == ep_sessions_expected -def test_e2e_speakers(): +def test_e2e_speakers() -> None: pretalx_speakers = Parse.publishable_speakers( "./data/examples/pretalx/speakers.json", pretalx_submissions.keys() ) From d87505279c2d4df3a2503bd1b2111c16a454829c Mon Sep 17 00:00:00 2001 From: egeakman Date: Tue, 4 Jun 2024 17:44:29 +0300 Subject: [PATCH 26/27] naming --- src/utils/transform.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/utils/transform.py b/src/utils/transform.py index 178e7cc..34b26bc 100644 --- a/src/utils/transform.py +++ b/src/utils/transform.py @@ -25,9 +25,9 @@ def pretalx_submissions_to_europython_sessions( submissions, "title" ) - sessions = {} + ep_sessions = {} for code, submission in submissions.items(): - session = EuroPythonSession( + ep_session = EuroPythonSession( code=submission.code, title=submission.title, speakers=submission.speakers, @@ -51,9 +51,9 @@ def pretalx_submissions_to_europython_sessions( next_session=TimingRelationships.get_next_session(submission.code), prev_session=TimingRelationships.get_prev_session(submission.code), ) - sessions[code] = session + ep_sessions[code] = ep_session - return sessions + return ep_sessions @staticmethod def pretalx_speakers_to_europython_speakers( @@ -67,9 +67,9 @@ def pretalx_speakers_to_europython_speakers( speaker_code_to_slug = Utils.compute_unique_slugs_by_attribute(speakers, "name") - euro_python_speakers = {} + ep_speakers = {} for code, speaker in speakers.items(): - euro_python_speaker = EuroPythonSpeaker( + ep_speaker = EuroPythonSpeaker( code=speaker.code, name=speaker.name, biography=speaker.biography, @@ -78,6 +78,6 @@ def pretalx_speakers_to_europython_speakers( answers=speaker.answers, submissions=speaker.submissions, ) - euro_python_speakers[code] = euro_python_speaker + ep_speakers[code] = ep_speaker - return euro_python_speakers + return ep_speakers From 42aba10bab0b681988530ae894331818e364a121 Mon Sep 17 00:00:00 2001 From: egeakman Date: Tue, 4 Jun 2024 18:09:44 +0300 Subject: [PATCH 27/27] speaker website_url --- data/examples/README.md | 1 + data/examples/europython/speakers.json | 3 ++- src/models/europython.py | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/data/examples/README.md b/data/examples/README.md index b0d440c..3ed1383 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -136,3 +136,4 @@ The fields are as follows: | `linkedin_url` | `string` \| `null` | URL of the speaker's LinkedIn profile | | `twitter_url` | `string` \| `null` | URL of the speaker's Twitter profile | | `mastodon_url` | `string` \| `null` | URL of the speaker's Mastodon profile | +| `website_url` | `string` | URL of the speaker's profile on the conference website | diff --git a/data/examples/europython/speakers.json b/data/examples/europython/speakers.json index 885f3d6..178299a 100644 --- a/data/examples/europython/speakers.json +++ b/data/examples/europython/speakers.json @@ -11,6 +11,7 @@ "gitx": "https://github.com/F3DC8A", "linkedin_url": "https://www.linkedin.com/in/F3DC8A", "mastodon_url": null, - "twitter_url": null + "twitter_url": null, + "website_url": "https://ep2024.europython.eu/speaker/a-speaker" } } diff --git a/src/models/europython.py b/src/models/europython.py index 3fbe282..ba218de 100644 --- a/src/models/europython.py +++ b/src/models/europython.py @@ -28,6 +28,12 @@ class EuroPythonSpeaker(BaseModel): linkedin_url: str | None = None gitx: str | None = None + @computed_field + def website_url(self) -> str: + return ( + f"https://ep{Config.event.split('-')[1]}.europython.eu/speaker/{self.slug}" + ) + @model_validator(mode="before") @classmethod def extract_answers(cls, values) -> dict: