diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index f1a9cba..dbdb811 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -21,6 +21,25 @@ jobs: - name: Check code formatting run: black --check . + mypy: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11", "3.12"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install . + pip install mypy types-beautifulsoup4 types-requests + - name: Check code formatting + run: mypy -p language_practice + pylint: runs-on: ubuntu-latest strategy: @@ -34,6 +53,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | + sudo apt-get update + sudo apt-get install -y libcairo-dev libgtk-3-dev cmake gobject-introspection libgirepository1.0-dev python -m pip install --upgrade pip pip install . pip install pylint diff --git a/.gitignore b/.gitignore index c4de858..371a485 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ *egg-info build dist +*.db diff --git a/language-practice b/language-practice index d08ebfe..60f4ebb 100755 --- a/language-practice +++ b/language-practice @@ -9,10 +9,11 @@ Inflection charts are pulled from wiktionary. """ import argparse -import asyncio import sys -from language_practice.terminal import Application +from language_practice.gui import GuiApplication +from language_practice.sqlite import SqliteHandle +from language_practice.terminal import TerminalApplication class Once(argparse.Action): @@ -37,27 +38,38 @@ def main(): prog="language-practice", description="Flashcard app" ) parse.add_argument("-t", "--traceback", action="store_true") - parse.add_argument("-r", "--reset", action="store_true") - parse.add_argument("-f", "--file", action=Once, required=True) - parse.add_argument("-d", "--dir", action="store_true") + parse.add_argument("-g", "--gui", action="store_true") + parse.add_argument("-d", "--db", action="store", required=True) args = parse.parse_args() try: - app = Application( - args.file, - args.dir, - args.reset, - ) - asyncio.run(app.startup()) - app.run() + handle = SqliteHandle(args.db) + except Exception as err: # pylint: disable=broad-exception-caught + if args.traceback: + raise err + print(f"{err}") + sys.exit(1) + + try: + all_sets = handle.get_all_sets() + if args.gui: + gui = GuiApplication(handle, all_sets) + gui.run() + else: + tui = TerminalApplication(handle, all_sets) + tui.run() except Exception as err: # pylint: disable=broad-exception-caught if args.traceback: raise err print(f"{err}") + handle.close() sys.exit(1) except KeyboardInterrupt: print("Exiting...") + handle.close() + else: + handle.close() if __name__ == "__main__": diff --git a/language_practice/cache.py b/language_practice/cache.py deleted file mode 100644 index 5fb20ff..0000000 --- a/language_practice/cache.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Handles caching for web scraped data. -""" - -import json -from json.decoder import JSONDecodeError - - -class Cache: - """ - Cache for web scraped data. - """ - - def __init__(self, cache_path): - self.cache_path = cache_path - try: - with open(cache_path, "r", encoding="utf-8") as file_handle: - self.cache = json.loads(file_handle.read()) - except IOError: - self.cache = {} - except JSONDecodeError: - self.cache = {} - - def __contains__(self, value): - return value in self.cache - - def __setitem__(self, key, value): - self.cache[key] = value - - def __getitem__(self, key): - return self.cache[key] - - def save(self): - """ - Save cache. - """ - with open(self.cache_path, "w", encoding="utf-8") as file_handle: - file_handle.write(json.dumps(self.cache)) diff --git a/language_practice/config.py b/language_practice/config.py new file mode 100644 index 0000000..c4a3523 --- /dev/null +++ b/language_practice/config.py @@ -0,0 +1,184 @@ +""" +Handles TOML parsing from the configuration file. +""" + +from datetime import date +from tomllib import load +from typing import Any, Self + +from language_practice.repetition import WordRepetition + + +# pylint: disable=too-many-instance-attributes +class Entry: + """ + A single entry in the TOML file. + """ + + # pylint: disable=too-many-arguments + def __init__( + self, + word: str, + definition: str, + gender: str | None, + aspect: str | None, + usage: str | None, + part_of_speech: str | None, + charts: list[list[list[str]]] | None, + repetition: WordRepetition, + ): + self.word = word + self.definition = definition + self.gender = gender + self.aspect = aspect + self.usage = usage + self.part_of_speech = part_of_speech + self.charts = charts + self.repetition = repetition + + def get_word(self) -> str: + """ + Get word. + """ + return self.word + + def get_definition(self) -> str: + """ + Get definition. + """ + return self.definition + + def get_gender(self) -> str | None: + """ + Get gender. + """ + return self.gender + + def get_aspect(self) -> str | None: + """ + Get aspect. + """ + return self.aspect + + def get_usage(self) -> str | None: + """ + Get usage. + """ + return self.usage + + def get_part_of_speech(self) -> str | None: + """ + Get part of speech. + """ + return self.part_of_speech + + def get_charts(self) -> list[list[list[str]]] | None: + """ + Get charts. + """ + return self.charts + + def get_repetition(self) -> WordRepetition: + """ + Get repetition data structure. + """ + return self.repetition + + +class Config: + """ + Generic config data structure. + """ + + def __init__(self, lang: str | None, entries: list[Entry]): + self.lang = lang + self.words = entries + + def __iter__(self): + return iter(self.words) + + def __len__(self) -> int: + return len(self.words) + + def get_lang(self) -> str | None: + """ + Get the language associated with this word file, if any. + """ + return self.lang + + def get_words(self) -> list[Entry]: + """ + Get a list of all words in the TOML file. + """ + return self.words + + def extend(self, config: Self): + """ + Extend a TOML config with another TOML config. + """ + if self.lang != config.lang: + raise RuntimeError( + f"Attempted to join a TOML config with lang {self.lang} with \ + one with lang {config.lang}" + ) + + self.words += config.words + return self + + +class GraphicalConfig(Config): + """ + All entries in the graphical config. + """ + + def __init__(self, lang: str | None, dcts: list[dict[str, Any]]): + try: + words = [ + Entry( + dct["word"], + dct["definition"], + dct.get("gender", None), + dct.get("aspect", None), + dct.get("usage", None), + dct.get("part_of_speech", None), + dct.get("charts", None), + WordRepetition(2.5, 0, 0, date.today(), False), + ) + for dct in dcts + ] + super().__init__(lang, words) + except KeyError as err: + raise RuntimeError(f"Key {err} not found") from err + + +class TomlConfig(Config): + """ + All entries in the TOML file. + """ + + def __init__(self, file_path: str): + try: + with open(file_path, "rb") as file_handle: + toml = load(file_handle) + lang = toml.get("lang", None) + if lang is not None and lang not in ["fr", "uk", "ru"]: + raise RuntimeError( + f"Language {lang} is not supported; if you would like it to " + "be, please open a feature request!" + ) + words = [ + Entry( + dct["word"], + dct["definition"], + dct.get("gender", None), + dct.get("aspect", None), + dct.get("usage", None), + dct.get("part_of_speech", None), + dct.get("charts", None), + WordRepetition(2.5, 0, 0, date.today(), False), + ) + for dct in toml["words"] + ] + super().__init__(lang, words) + except KeyError as err: + raise RuntimeError(f"Key {err} not found") from err diff --git a/language_practice/flashcard.py b/language_practice/flashcard.py new file mode 100644 index 0000000..6aabe27 --- /dev/null +++ b/language_practice/flashcard.py @@ -0,0 +1,78 @@ +""" +Flashcard handling code. +""" + +from collections import deque +from datetime import date +from random import shuffle + +from language_practice.config import Entry +from language_practice.sqlite import SqliteHandle + + +class Flashcard: + """ + Handler for studying flashcards. + """ + + def __init__(self, handle: SqliteHandle, words: list[Entry]): + self.handle = handle + + scheduled: list[Entry] = [] + review: list[Entry] = [] + for entry in words: + repetition = entry.get_repetition() + if repetition.get_review(): + review.append(entry) + if repetition.get_date_of_next() <= date.today(): + scheduled.append(entry) + + shuffle(scheduled) + self.scheduled = deque(scheduled) + shuffle(review) + self.review = deque(review) + self.complete: list[Entry] = [] + + def current(self) -> tuple[Entry | None, bool | None]: + """ + Get current flashcard. + """ + if len(self.scheduled) > 0: + current_entry = self.scheduled[0] + is_review = False + elif len(self.review) > 0: + current_entry = self.review[0] + is_review = True + else: + current_entry = None + is_review = None + + return (current_entry, is_review) + + def post_grade(self): + """ + Handle changing to a new flashcard after grading has been completed. + """ + if len(self.scheduled) > 0: + next_entry = self.scheduled.popleft() + else: + next_entry = self.review.popleft() + + if next_entry.get_repetition().get_review(): + self.review.append(next_entry) + else: + self.complete.append(next_entry) + + def get_all_entries(self) -> list[Entry]: + """ + Get all flashcard entries. + """ + return list(self.review) + list(self.scheduled) + self.complete + + def save(self): + """ + Save updates to the flashcards. + """ + all_entries = self.get_all_entries() + for entry in all_entries: + self.handle.update_config(entry.get_word(), entry.get_repetition()) diff --git a/language_practice/gui.py b/language_practice/gui.py new file mode 100644 index 0000000..d201093 --- /dev/null +++ b/language_practice/gui.py @@ -0,0 +1,358 @@ +""" +Graphical user interface. +""" + +# pylint: disable=wrong-import-position +# pylint: disable=too-few-public-methods + +import asyncio +import tomllib +from typing import Self + +import gi # type: ignore + +gi.require_version("Gtk", "4.0") +gi.require_version("Adw", "1") +from gi.repository import Adw, Gtk, Gio # type: ignore # pylint: disable=wrong-import-order +from language_practice.flashcard import Flashcard # type: ignore +from language_practice.config import TomlConfig +from language_practice.sqlite import SqliteHandle +from language_practice.web import scrape + + +class GuiApplication(Adw.Application): + """ + Graphical application. + """ + + def __init__(self, handle: SqliteHandle, flashcard_sets: list[str], **kwargs): + super().__init__(**kwargs) + + self.handle = handle + self.flashcard_sets = flashcard_sets + self.win: None | MainWindow = None + + self.connect("activate", self.on_activate) + + def on_activate(self, app: Self): + """ + Handle window setup on activation of application. + """ + self.win = MainWindow(self.handle, self.flashcard_sets, application=app) + self.win.set_title("Language Practice") + self.win.set_default_size(700, 700) + self.win.present() + + +class MainWindow(Gtk.ApplicationWindow): + """ + Main window for GUI application. + """ + + def __init__( + self, handle: SqliteHandle, flashcard_sets: list[str], *args, **kwargs + ): + super().__init__(*args, **kwargs) + + self.handle = handle + self.imports: list[str] = [] + self.flashcard: Flashcard | None = None + + vbox = Gtk.Box(spacing=6, orientation=Gtk.Orientation.VERTICAL) + + self.flashcard_set_grid = FlashcardSetGrid() + for flashcard_set in flashcard_sets: + delete_button = Gtk.Button(label="Delete") + delete_button.connect("clicked", self.delete_flashcard_set) + self.flashcard_set_grid.add_row( + Gtk.CheckButton(), Gtk.Label.new(flashcard_set), delete_button + ) + scrollable = Gtk.ScrolledWindow() + scrollable.set_size_request(700, 600) + scrollable.set_child(self.flashcard_set_grid) + + button_hbox = Gtk.Box(spacing=6) + import_button = Gtk.Button(label="Import") + import_button.connect("clicked", self.import_button) + button_hbox.append(import_button) + select_all_button = Gtk.Button(label="Select all") + select_all_button.connect("clicked", self.flashcard_set_grid.select_all) + button_hbox.append(select_all_button) + start_button = Gtk.Button(label="Start") + start_button.connect("clicked", self.handle_start) + button_hbox.append(start_button) + button_hbox.set_halign(Gtk.Align.CENTER) + + vbox.append(scrollable) + vbox.append(button_hbox) + + self.set_child(vbox) + + # pylint: disable=unused-argument + def import_button(self, button: Gtk.Button): + """ + Handle import button action. + """ + file_dialog = Gtk.FileDialog() + file_dialog.open_multiple(callback=self.handle_files) + + def delete_flashcard_set(self, button: Gtk.Button): + """ + Handle deleting flashcard set on button press. + """ + self.handle.delete_set(button.get_prev_sibling().get_text()) + self.flashcard_set_grid.delete_row(button) + + def handle_files(self, dialog: Gtk.FileDialog, task: Gio.Task): + """ + Handle importing files on button press. + """ + self.imports = [entry.get_path() for entry in dialog.open_multiple_finish(task)] + for current_import in self.imports: + try: + toml = TomlConfig(current_import) + except tomllib.TOMLDecodeError: + continue + + self.handle.import_set( + current_import, + toml, + asyncio.run(scrape(toml.get_words(), toml.get_lang())), + ) + + delete_button = Gtk.Button(label="Delete") + delete_button.connect("clicked", self.delete_flashcard_set) + self.flashcard_set_grid.add_row( + Gtk.CheckButton(), Gtk.Label.new(current_import), delete_button + ) + self.imports = [] + + # pylint: disable=unused-argument + def handle_start(self, button: Gtk.Button): + """ + Handle starting flashcard study + """ + files = self.flashcard_set_grid.get_selected() + config = None + for file in files: + if config is None: + config = self.handle.load_config(file) + else: + config = config.extend(self.handle.load_config(file)) + + if config is not None: + self.flashcard = Flashcard(self.handle, config.get_words()) + win = StudyWindow(self.flashcard) + win.connect("destroy", self.handle_study_exit) + win.set_default_size(700, 300) + win.present() + + def handle_study_exit(self): + """ + Handle exit of study window. + """ + if self.flashcard is not None: + self.flashcard.save() + self.flashcard = None + + +class FlashcardSetGrid(Gtk.Grid): + """ + Grid used for flashcard sets. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.set_column_spacing(10) + self.num_rows = 0 + + def add_row( + self, checkbox: Gtk.CheckButton, label: Gtk.Label, delete_button: Gtk.Button + ): + """ + Add a row to the grid. + """ + self.attach(checkbox, 0, self.num_rows, 1, 1) + self.attach(label, 1, self.num_rows, 1, 1) + self.attach(delete_button, 2, self.num_rows, 1, 1) + self.num_rows += 1 + + def delete_row(self, contains_child: Gtk.Button): + """ + Delete a row from the grid. + """ + info = self.query_child(contains_child) + self.remove_row(info.row) + + # pylint: disable=unused-argument + def select_all(self, button: Gtk.Button): + """ + Mark all checkboxes as selected. + """ + for row in range(self.num_rows): + self.get_child_at(0, row).set_active(True) + + # pylint: disable=unused-argument + def get_selected(self) -> list[str]: + """ + Get all selected flashcard sets. + """ + files = [] + for row in range(self.num_rows): + if self.get_child_at(0, row).get_active(): + files.append(self.get_child_at(1, row).get_text()) + + return files + + +class StudyWindow(Gtk.ApplicationWindow): + """ + Window for studying flashcards. + """ + + def __init__(self, flashcard: Flashcard, *args, **kwargs): + super().__init__(*args, **kwargs) + self.flashcard = flashcard + + (self.peek, self.is_review) = self.flashcard.current() + if self.peek is not None: + button_hbox_1 = self.grade_button_box() + button_hbox_2 = self.navigation_button_box() + + self.display_box = Gtk.ScrolledWindow() + self.display_box.set_halign(Gtk.Align.CENTER) + self.display_box.set_size_request(700, 200) + self.initial_display() + + vbox = Gtk.Box(spacing=6, orientation=Gtk.Orientation.VERTICAL) + vbox.append(self.display_box) + vbox.append(button_hbox_1) + vbox.append(button_hbox_2) + + self.set_child(vbox) + + def grade_button_box(self) -> Gtk.Box: + """ + Set up button box for grading. + """ + button_hbox_1 = Gtk.Box(spacing=6) + zero = Gtk.Button(label="No recall") + zero.connect("clicked", lambda button: self.grade(0)) + button_hbox_1.append(zero) + one = Gtk.Button(label="Wrong, familiar") + one.connect("clicked", lambda button: self.grade(1)) + button_hbox_1.append(one) + two = Gtk.Button(label="Wrong, easy to remember") + two.connect("clicked", lambda button: self.grade(2)) + button_hbox_1.append(two) + three = Gtk.Button(label="Correct, hard") + three.connect("clicked", lambda button: self.grade(3)) + button_hbox_1.append(three) + four = Gtk.Button(label="Correct, medium") + four.connect("clicked", lambda button: self.grade(4)) + button_hbox_1.append(four) + five = Gtk.Button(label="Correct, easy") + five.connect("clicked", lambda button: self.grade(5)) + button_hbox_1.append(five) + button_hbox_1.set_halign(Gtk.Align.CENTER) + + return button_hbox_1 + + def navigation_button_box(self) -> Gtk.Box: + """ + Set up button box for navigation. + """ + button_hbox_2 = Gtk.Box(spacing=6) + definition = Gtk.Button(label="Flashcard front") + definition.connect("clicked", lambda button: self.initial_display()) + button_hbox_2.append(definition) + back = Gtk.Button(label="Flashcard back") + back.connect("clicked", lambda button: self.on_flashcard_back()) + button_hbox_2.append(back) + usage = Gtk.Button(label="Usage") + usage.connect("clicked", lambda button: self.on_usage()) + button_hbox_2.append(usage) + charts = Gtk.Button(label="Charts") + charts.connect("clicked", lambda button: self.on_charts()) + button_hbox_2.append(charts) + button_hbox_2.set_halign(Gtk.Align.CENTER) + + return button_hbox_2 + + def next(self): + """ + Select next flashcard. + """ + self.flashcard.post_grade() + (self.peek, self.is_review) = self.flashcard.current() + + def grade(self, grade: int): + """ + Grade a flashcard. + """ + if self.peek is not None: + if self.is_review: + self.peek.get_repetition().review(grade) + else: + self.peek.get_repetition().grade(grade) + self.next() + self.initial_display() + + def initial_display(self): + """ + Initial display for a flashcard. + """ + if self.peek is not None: + part_of_speech = self.peek.get_part_of_speech() + aspect = self.peek.get_aspect() + definition = Gtk.Label.new(self.peek.get_definition()) + box = Gtk.Box(spacing=10) + box.prepend(definition) + if part_of_speech is not None: + box.prepend(Gtk.Label.new(part_of_speech)) + if aspect is not None: + box.prepend(Gtk.Label.new(aspect)) + self.display_box.set_child(box) + + def on_flashcard_back(self): + """ + Handle flashcard back button press. + """ + if self.peek is not None: + word = Gtk.Label.new(self.peek.get_word()) + gender = self.peek.get_gender() + box = Gtk.Box(spacing=10) + box.prepend(word) + if gender is None: + box.prepend(Gtk.Label.new(gender)) + self.display_box.set_child(box) + + def on_usage(self): + """ + Handle usage button press. + """ + if self.peek is not None: + usage = self.peek.get_usage() + if usage is not None: + self.display_box.set_child(Gtk.Label.new(usage)) + + def on_charts(self): + """ + Handle usage button press. + """ + if self.peek is not None: + vbox = Gtk.Box(spacing=6, orientation=Gtk.Orientation.VERTICAL) + charts = self.peek.get_charts() + if charts is not None: + for chart in charts: + grid = Gtk.Grid() + grid.set_column_spacing(10) + grid.set_row_spacing(10) + for i, row in enumerate(chart): + for j, col_val in enumerate(row): + grid.attach(Gtk.Label.new(col_val), j, i, 1, 1) + + vbox.append(grid) + + self.display_box.set_child(vbox) diff --git a/language_practice/repetition.py b/language_practice/repetition.py index a97933b..035eafd 100644 --- a/language_practice/repetition.py +++ b/language_practice/repetition.py @@ -2,9 +2,8 @@ Handles spaced repetition. """ -import json -from collections import deque -from random import shuffle +import math +from datetime import date, timedelta class WordRepetition: @@ -12,126 +11,85 @@ class WordRepetition: Information on a single word's repetition frequency. """ - def __init__(self, word, dct=None): - self.word = word - - self.correct = dct["correct"] if dct is not None else 0 - self.incorrect = dct["incorrect"] if dct is not None else 0 - self.incorrect_since_ten_correct = ( - dct["incorrect_since_ten_correct"] if dct is not None else 0 + DEFAULT_EASYNESS_FACTOR = 2.5 + + # pylint: disable=too-many-arguments + def __init__( + self, + easiness_factor: float, + num_correct: int, + in_n_days: int, + date_of_next: date, + should_review: bool, + ): + self.easiness_factor = easiness_factor + self.num_correct = num_correct + self.in_n_days = in_n_days + self.date_of_next = date_of_next + self.should_review = should_review + + def grade(self, grade: int): + """ + Grade workflow. + """ + if grade >= 3: + if self.num_correct == 0: + self.in_n_days = 1 + self.date_of_next = date.today() + timedelta(days=self.in_n_days) + elif self.num_correct == 1: + self.in_n_days = 6 + self.date_of_next = date.today() + timedelta(days=self.in_n_days) + else: + self.in_n_days = math.ceil(self.in_n_days * self.easiness_factor) + self.date_of_next = date.today() + timedelta(days=self.in_n_days) + self.num_correct += 1 + + if grade < 4: + self.should_review = True + else: + self.num_correct = 0 + self.in_n_days = 1 + self.date_of_next = date.today() + timedelta(days=self.in_n_days) + self.should_review = True + + self.easiness_factor = max( + 1.3, + self.easiness_factor + (0.1 - (5 - grade) * (0.08 + (5 - grade) * 0.02)), ) - def get_word(self): - """ - Get the word associated with repetition data. - """ - return self.word - - def mark_correct(self): - """ - Mark entry as correctly guessed. - """ - self.correct += 1 - self.incorrect = 0 - if self.correct >= 10: - self.incorrect_since_ten_correct = 0 - - def mark_incorrect(self): + def review(self, grade: int): """ - Mark entry as incorrectly guessed. + Review workflow. """ - self.incorrect += 1 - self.correct = 0 - self.incorrect_since_ten_correct += 1 + if grade >= 4: + self.should_review = False - def repeat_in(self): - """ - Get number of flashcards until this one should be repeated again. + def get_easiness_factor(self) -> float: """ - index = 15 - if self.correct == 0: - index = max(index - self.incorrect, 1) - elif self.incorrect == 0: - index = max(index - self.incorrect_since_ten_correct, 1) - index *= self.correct - - return index - - def save(self): + Get easiness factor. """ - Save repetition data. - """ - return { - "word": self.word, - "correct": self.correct, - "incorrect": self.incorrect, - "incorrect_since_ten_correct": self.incorrect_since_ten_correct, - } - - -class Repetition: - """ - All repetition data for words in configuration file. - """ - - def __init__(self, path, words): - self.repetition_path = path - - shuffle(words) - - try: - with open(self.repetition_path, "r", encoding="utf-8") as file_handle: - lst = json.loads(file_handle.read()) - self.all_words = set(dct["word"] for dct in lst if dct["word"] in words) - self.repetitions = deque( - WordRepetition(rep_dct["word"], rep_dct) - for rep_dct in lst - if rep_dct["word"] in words - ) - except IOError: - self.repetitions = deque(WordRepetition(word) for word in words) - self.all_words = set(words) - except json.JSONDecodeError: - self.repetitions = deque(WordRepetition(word) for word in words) - self.all_words = set(words) - except KeyError: - self.repetitions = deque(WordRepetition(word) for word in words) - self.all_words = set(words) - except TypeError: - self.repetitions = deque(WordRepetition(word) for word in words) - self.all_words = set(words) - - for word in words: - if word not in self.all_words: - self.repetitions.insert(0, WordRepetition(word)) + return self.easiness_factor - def peek(self): + def get_num_correct(self) -> int: """ - Peek at current word entry. + Get number correct. """ - return self.repetitions[0].get_word() if len(self.repetitions) > 0 else None + return self.num_correct - def incorrect(self): + def get_in_n_days(self) -> int: """ - Mark as incorrectly guessed. + Get the number of days in which a card should repeat. """ - elem = self.repetitions.popleft() - elem.mark_incorrect() - index = elem.repeat_in() - self.repetitions.insert(min(index, len(self.repetitions)), elem) + return self.in_n_days - def correct(self): + def get_date_of_next(self) -> date: """ - Mark as correctly guessed. + Get the date on which a card should repeat. """ - elem = self.repetitions.popleft() - elem.mark_correct() - index = elem.repeat_in() - self.repetitions.insert(min(index, len(self.repetitions)), elem) + return self.date_of_next - def save(self): + def get_review(self) -> bool: """ - Save repetition data for all words. + Get whether card should be reviewed. """ - with open(self.repetition_path, "w", encoding="utf-8") as file_handle: - file_handle.write(json.dumps([item.save() for item in self.repetitions])) + return self.should_review diff --git a/language_practice/sqlite.py b/language_practice/sqlite.py new file mode 100644 index 0000000..3970c22 --- /dev/null +++ b/language_practice/sqlite.py @@ -0,0 +1,271 @@ +""" +Database code +""" + +import sqlite3 +from datetime import date + +from language_practice.config import Config, Entry, WordRepetition + + +class SqliteHandle: + """ + Handler for sqlite operations. + """ + + FLASHCARDS_SCHEMA = "file_name TEXT PRIMARY KEY, lang TEXT" + WORD_SCHEMA = ( + "word TEXT PRIMARY KEY, definition TEXT, gender TEXT, aspect TEXT, " + "usage TEXT, part_of_speech TEXT, easiness_factor REAL, num_correct INTEGER, " + "in_n_days INTEGER, date_of_next TEXT, review NUMERIC, file_name TEXT" + ) + + def __init__(self, db: str): + self.conn = sqlite3.connect(db) + self.cursor = self.conn.cursor() + + self.create_table_idempotent("flashcard_sets", SqliteHandle.FLASHCARDS_SCHEMA) + + def create_table_idempotent(self, name: str, schema: str): + """ + Create a table only if it doesn't exist + """ + self.cursor.execute(f"CREATE TABLE IF NOT EXISTS '{name}' ({schema});") + + def recreate_table(self, name: str, schema: str): + """ + Recreate a table even if it exists + """ + self.drop_table(name) + self.cursor.execute(f"CREATE TABLE '{name}' ({schema});") + + def delete(self, name: str, search: str): + """ + Delete entry. + """ + self.cursor.execute(f"DELETE FROM '{name}' WHERE {search};") + + def drop_table(self, name: str): + """ + Drop table. + """ + res = self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + if (name,) in res.fetchall(): + self.cursor.execute(f"DROP TABLE '{name}';") + + def insert_into(self, name: str, columns: str, values: str): + """ + Insert into table. + """ + self.cursor.execute(f"INSERT INTO '{name}' ({columns}) VALUES({values});") + + # pylint: disable=too-many-nested-blocks + # pylint: disable=too-many-statements + # pylint: disable=too-many-branches + # pylint: disable=too-many-locals + def import_set( + self, file_name: str, config: Config, scraped: dict[str, list[list[list[str]]]] + ): + """ + Import set into database. + """ + lang = config.get_lang() + columns = ["file_name"] + values = [f"'{file_name}'"] + if lang is not None: + columns.append("lang") + values.append(f"'{lang}'") + self.insert_into("flashcard_sets", ", ".join(columns), ", ".join(values)) + self.create_table_idempotent( + "words", + SqliteHandle.WORD_SCHEMA, + ) + for entry in iter(config): + word = entry.get_word() + definition = entry.get_definition() + gender = entry.get_gender() + aspect = entry.get_aspect() + usage = entry.get_usage() + part_of_speech = entry.get_part_of_speech() + charts = entry.get_charts() + repetition = entry.get_repetition() + easiness_factor = repetition.get_easiness_factor() + num_correct = repetition.get_num_correct() + in_n_days = repetition.get_in_n_days() + date_of_next = repetition.get_date_of_next() + review = 1 if repetition.get_review() else 0 + if charts is None: + charts = scraped.get(entry.get_word(), None) + else: + charts = [charts] + + if charts is not None: + for i, chart in enumerate(charts): + max_len = max(map(len, chart)) + schema = ", ".join( + [f"{chr(i + 97)} TEXT" for i in range(0, max_len)] + ) + self.recreate_table(f"{word}-{i}", schema) + for row in chart: + columns = [] + values = [] + for j in range(0, max_len): + try: + val = row[j] + val = val.replace("'", "''") + except IndexError: + pass + else: + columns.append(chr(j + 97)) + values.append(f"'{val}'") + self.insert_into( + f"{word}-{i}", ", ".join(columns), ", ".join(values) + ) + + columns = [ + "word", + "definition", + "easiness_factor", + "num_correct", + "in_n_days", + "date_of_next", + "review", + "file_name", + ] + values = [ + f"'{word}'", + f"'{definition}'", + f"{easiness_factor}", + f"{num_correct}", + f"{in_n_days}", + f"'{date_of_next}'", + f"{review}", + f"'{file_name}'", + ] + if gender is not None: + columns.append("gender") + values.append(f"'{gender}'") + if aspect is not None: + columns.append("aspect") + values.append(f"'{aspect}'") + if usage is not None: + columns.append("usage") + values.append(f"'{usage}'") + if part_of_speech is not None: + columns.append("part_of_speech") + values.append(f"'{part_of_speech}'") + self.insert_into("words", ", ".join(columns), ", ".join(values)) + + def delete_set(self, file_name: str): + """ + Delete a set from the database. + """ + words = self.cursor.execute( + f"SELECT word FROM 'words' WHERE file_name = '{file_name}';" + ).fetchall() + res = self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + names = res.fetchall() + names_to_drop = [ + name[0] for name in names for word in words if name[0].startswith(word) + ] + for name in names_to_drop: + self.drop_table(name) + self.drop_table(file_name) + self.delete("words", f"file_name = '{file_name}'") + self.delete("flashcard_sets", f"file_name = '{file_name}'") + + def load_config(self, file_name: str) -> Config: + """ + Load config from database. + """ + res = self.cursor.execute( + f"SELECT lang FROM flashcard_sets WHERE file_name = '{file_name}';" + ) + lang = res.fetchall()[0] + + res = self.cursor.execute( + f"SELECT word, definition, gender, aspect, usage, part_of_speech, " + f"easiness_factor, num_correct, in_n_days, date_of_next, review " + f"FROM 'words' WHERE file_name = '{file_name}';" + ) + entries = res.fetchall() + + res = self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + table_names = res.fetchall() + + loaded_entries = [] + for entry in entries: + ( + word, + definition, + gender, + aspect, + usage, + part_of_speech, + easiness_factor, + num_correct, + in_n_days, + date_of_next, + review, + ) = entry + date_of_next = date.fromisoformat(date_of_next) + review = review != 0 + + charts = [] + names_to_get = [name[0] for name in table_names if name[0].startswith(word)] + for name in names_to_get: + res = self.cursor.execute(f"SELECT * FROM '{name}';") + chart = res.fetchall() + charts.append(chart) + + if date.today() >= date_of_next or review: + loaded_entries.append( + Entry( + word, + definition, + gender, + aspect, + usage, + part_of_speech, + charts, + WordRepetition( + easiness_factor, + num_correct, + in_n_days, + date_of_next, + review, + ), + ) + ) + + return Config(lang, loaded_entries) + + def update_config(self, word: str, repetition: WordRepetition): + """ + Update config for word. + """ + easiness_factor = repetition.get_easiness_factor() + num_correct = repetition.get_num_correct() + in_n_days = repetition.get_in_n_days() + date_of_next = str(repetition.get_date_of_next()) + review = 1 if repetition.get_review() else 0 + self.cursor.execute( + f"UPDATE words SET easiness_factor = {easiness_factor}, num_correct = " + f"{num_correct}, in_n_days = {in_n_days}, date_of_next = '{date_of_next}', " + f"review = {review} WHERE word = '{word}';" + ) + + def get_all_sets(self) -> list[str]: + """ + Get all flashcard sets from database. + """ + res = self.cursor.execute("SELECT * FROM flashcard_sets;") + return [entry[0] for entry in res.fetchall()] + + def close(self): + """ + Close connection to database. + """ + self.conn.commit() + self.cursor.close() + self.conn.close() diff --git a/language_practice/terminal.py b/language_practice/terminal.py index abdcffa..6290b89 100644 --- a/language_practice/terminal.py +++ b/language_practice/terminal.py @@ -3,199 +3,394 @@ """ import os -import sys -import termios -import tty -from functools import reduce +import tomllib +from uuid import uuid4 -from tabulate import tabulate +from textual.app import App +from textual.containers import ( + Container, + Horizontal, + HorizontalScroll, + ScrollableContainer, + Vertical, +) +from textual.css.query import NoMatches +from textual.dom import DOMNode +from textual.screen import ModalScreen +from textual.widgets import ( + Button, + Checkbox, + DataTable, + DirectoryTree, + Footer, + Header, + Label, +) -from language_practice.cache import Cache -from language_practice.repetition import Repetition -from language_practice.toml import TomlConfig -from language_practice.web import refresh, scrape +from language_practice.config import TomlConfig +from language_practice.flashcard import Flashcard +from language_practice.sqlite import SqliteHandle +from language_practice.web import scrape -class Application: +class TerminalApplication(App): """ - Handles the interactive user input from the terminal. + Terminal application. """ - def __init__(self, word_path, is_dir, reset): - if is_dir: - (directory, _, files) = next(os.walk(word_path)) - self.words = reduce( - lambda toml1, toml2: toml1.extend(toml2), - [ - TomlConfig(f"{directory}/{file}") - for file in files - if file.endswith(".toml") - ], - ) - name = "directory" - else: - if not word_path.endswith(".toml"): - raise RuntimeError("Word file needs to be a TOML file") + CSS = """ + .bottom-buttons-one { + height: 3; + } - self.words = TomlConfig(word_path) - (name, _) = os.path.splitext(os.path.basename(word_path)) - directory = os.path.dirname(word_path) + .bottom-buttons-two { + height: 6; + } - repetition_path = f"{directory}/{name}-repetition.json" - cache_path = f"{directory}/{name}-cache.json" + .flashcard-set { + height: 3; + } + """ - if reset: - try: - os.remove(repetition_path) - os.remove(cache_path) - except FileNotFoundError: - pass + def __init__( + self, handle: SqliteHandle, flashcard_sets: list[str], *args, **kwargs + ): + self.handle = handle + self.flashcard_sets = flashcard_sets + self.file_path = None + self.flashcard: Flashcard | None = None + self.imports: dict[str, str] = {} + super().__init__(*args, **kwargs) - self.settings = termios.tcgetattr(sys.stdin.fileno()) + def compose(self): + yield Header() + yield Footer() + args = [] + for flashcard_set in self.flashcard_sets: + hex_string = flashcard_set.encode("utf-8").hex() + args.append( + Horizontal( + Checkbox(id=f"select-{hex_string}"), + Container(), + Label(flashcard_set), + Container(), + Button("Delete", id=f"delete-{hex_string}"), + classes="flashcard-set", + ) + ) + yield ScrollableContainer(*args, id="scrollable") + yield Horizontal( + Container(), + Button("Import", id="import"), + Button("Select all", id="select_all"), + Button("Start", id="start"), + Button("Exit", id="exit"), + Container(), + classes="bottom-buttons-one", + ) - self.cache = Cache(cache_path) - self.lang = self.words.get_lang() - self.repetition = Repetition(repetition_path, self.words.get_words()) + def on_start(self): + """ + On start button press. + """ + scrollable = self.query_one("#scrollable") + checkboxes = map(lambda child: child.query_one(Checkbox), scrollable.children) + config = None + for checkbox in checkboxes: + if checkbox.value: + checkbox_id = checkbox.id + if checkbox_id is not None: + name = bytes.fromhex(checkbox_id.split("select-")[1]).decode( + "utf-8" + ) + if config is None: + config = self.handle.load_config(name) + else: + config = config.extend(self.handle.load_config(name)) + if config is not None: + self.flashcard = Flashcard(self.handle, config.get_words()) + self.push_screen(StudyScreen(self.flashcard)) - current_word = self.repetition.peek() - self.current_entry = self.words[current_word] + def on_delete(self, button_id: str, parent: DOMNode | None): + """ + On delete button press. + """ + name = bytes.fromhex(button_id.split("delete-")[1]).decode("utf-8") + self.handle.delete_set(name) + if parent is not None: + parent.remove() # type: ignore - async def startup(self): + def on_exit_study(self): """ - Start up application. + On exit study button press. """ - await scrape(self.words.get_words(), self.cache, self.lang) + if self.flashcard is not None: + self.flashcard.save() + self.flashcard = None + self.pop_screen() - def run(self): + async def on_complete_import(self): """ - Run application. + On complete import button press. """ - try: - tty.setraw(sys.stdin.fileno()) + toml = None + for import_file in self.imports: + try: + toml = TomlConfig(import_file) + except tomllib.TOMLDecodeError: + self.pop_screen() - self.definition() + if toml is not None: + self.handle.import_set( + import_file, + toml, + await scrape(toml.get_words(), toml.get_lang()), + ) - cont = True - while cont: - code = sys.stdin.read(1) - cont = self.handle_code(code) + hex_string = import_file.encode("utf-8").hex() + scrollable = self.query_one("#scrollable") + try: + scrollable.query_one(f"#delete-{hex_string}") + except NoMatches: + scrollable.mount( + Horizontal( + Checkbox(id=f"select-{hex_string}"), + Container(), + Label(import_file), + Container(), + Button("Delete", id=f"delete-{hex_string}"), + ) + ) - except: - self.shutdown() - raise + self.pop_screen() - self.shutdown() + self.imports = {} - def shutdown(self): + # pylint: disable=too-many-branches + async def on_button_pressed(self, event: Button.Pressed): """ - Shutdown application. + Handle button presses in application. """ - if hasattr(self, "settings"): - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, self.settings) - if hasattr(self, "repetition"): - self.repetition.save() - if hasattr(self, "cache"): - self.cache.save() + button_id = event.button.id + if button_id is not None: + if button_id == "import": + self.push_screen(ImportPopup()) + elif button_id == "start": + self.on_start() + elif button_id.startswith("delete"): + self.on_delete(button_id, event.button.parent) + elif button_id == "exit": + self.exit() + elif button_id == "exit_study": + self.on_exit_study() + elif button_id == "complete_import": + await self.on_complete_import() + elif button_id == "select_all": + for checkbox in self.query(Checkbox): + checkbox.value = True - def handle_code(self, code): + async def on_directory_tree_file_selected(self, event): """ - Handle user input. + Handle directory tree selection in application. """ - if code == "\x03": - raise KeyboardInterrupt + table = self.query_one("#selected") + path = event.path + realpath = os.path.realpath(path) + if realpath in self.imports: + this_uuid = self.imports.pop(realpath) + table.remove_row(this_uuid) + else: + this_uuid = str(uuid4()) + self.imports[realpath] = this_uuid + table.add_row(realpath, key=this_uuid) + + +class ImportPopup(ModalScreen): + """ + Popup for importing TOML files. + """ + + def compose(self): + table = DataTable(id="selected") + table.add_columns("Files") + yield Vertical( + HorizontalScroll( + DirectoryTree(os.environ["HOME"]), + table, + ), + Horizontal( + Container(), + Button("Complete import", id="complete_import"), + Container(), + classes="bottom-buttons-one", + ), + ) - if code == "e": - self.definition() - elif code == "d": - self.show_word() - elif code == "c": - self.chart() - elif code == "u": - self.usage() - elif code == "r": - self.refresh_cache() - elif code == "n": - self.repetition.incorrect() - current_word = self.repetition.peek() - self.current_entry = self.words[current_word] - self.definition() - elif code == "y": - self.repetition.correct() - current_word = self.repetition.peek() - self.current_entry = self.words[current_word] - self.definition() - return True +class StudyScreen(ModalScreen): + """ + Study screen where flashcards can be reviewed. + """ - def definition(self): + def __init__(self, flashcard, *args, **kwargs): + super().__init__(*args, **kwargs) + self.flashcard = flashcard + self.peek = None + self.is_review = None + + def compose(self): + (self.peek, self.is_review) = self.flashcard.current() + if self.peek is not None: + part_of_speech = self.peek.get_part_of_speech() + aspect = self.peek.get_aspect() + definition = self.peek.get_definition() + if part_of_speech is not None: + definition = r"\[" + f"{part_of_speech}] {definition}" + if aspect is not None: + definition = r"\[" + f"{aspect}] {definition}" + yield Horizontal( + Container(), + Label(definition, id="display"), + Container(id="post_display"), + ) + yield Vertical( + Horizontal( + Container(), + Button("No recall", id="zero"), + Button("Wrong, familiar", id="one"), + Button("Wrong, easy to remember", id="two"), + Button("Correct, hard", id="three"), + Button("Correct, medium", id="four"), + Button("Correct, easy", id="five"), + Container(), + id="grade", + ), + Horizontal( + Container(), + Button("Flashcard front", id="definition"), + Button("Flashcard back", id="word"), + Button("Usage", id="usage"), + Button("Charts", id="charts"), + Button("Exit", id="exit_study"), + Container(), + ), + classes="bottom-buttons-two", + ) + else: + yield Label("Nothing to study") + yield Button("Exit", id="exit_study") + + def initial_display(self): + """ + Initial display for a flashcard. + """ + part_of_speech = self.peek.get_part_of_speech() + aspect = self.peek.get_aspect() + definition = self.peek.get_definition() + if part_of_speech is not None: + definition = r"\[" + f"{part_of_speech}] {definition}" + if aspect is not None: + definition = r"\[" + f"{aspect}] {definition}" + self.mount(Label(definition, id="display"), before="#post_display") + + def next(self): + """ + Select next flashcard. + """ + self.flashcard.post_grade() + (self.peek, self.is_review) = self.flashcard.current() + + async def at_end(self): """ - Display the definition for the current word. + Handle condition at end of flashcard set. """ - definition = self.current_entry.show_definition() - if definition is None: - return - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, self.settings) - print("\033c", end="") - print(definition) - tty.setraw(sys.stdin) + self.mount(Label("All done!", id="display"), before="#post_display") + await self.query_one("#grade").remove() + await self.query_one("#definition").remove() + await self.query_one("#word").remove() + await self.query_one("#usage").remove() + await self.query_one("#charts").remove() - def usage(self): + async def grade(self, grade): """ - Display the usage for the current word. + Grade a flashcard. """ - usage = self.current_entry.show_usage() - if usage is None: - return - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, self.settings) - print("\033c", end="") - print(usage) - tty.setraw(sys.stdin) + if self.is_review: + self.peek.get_repetition().review(grade) + else: + self.peek.get_repetition().grade(grade) + self.next() + await self.query_one("#display").remove() + if self.peek is None: + await self.at_end() + else: + self.initial_display() - def chart(self): + async def on_word(self): """ - Display the chart for the current word. + Handle flashcard back button. """ - charts = self.current_entry.get_charts() - if charts is None: - cache = self.cache[self.current_entry.get_word()] - if "charts" not in cache: - return - charts = cache["charts"] + word = self.peek.get_word() + gender = self.peek.get_gender() + if gender is None: + string = f"{word}" else: - charts = [charts] + string = r"\[" + f"{gender}] {word}" + await self.query_one("#display").remove() + self.mount(Label(string, id="display"), before="#post_display") - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, self.settings) - print("\033c", end="") - for chart in charts: - print(tabulate(chart, tablefmt="pretty"), end="\n\n") - tty.setraw(sys.stdin) + async def on_usage(self): + """ + Handle usage button. + """ + if self.peek.get_usage() is not None: + await self.query_one("#display").remove() + self.mount( + Label(self.peek.get_usage(), id="display"), before="#post_display" + ) - def refresh_cache(self): + async def on_charts(self): """ - Refresh the cache for the current word. + Handle charts button. """ - self.cache[self.current_entry.show_word()] = refresh( - self.current_entry.get_word(), self.lang - ) + await self.query_one("#display").remove() + tables = [] + for chart in self.peek.get_charts(): + table = DataTable(show_header=False) + max_cols = max(map(len, chart)) + cols = [chr(i + 97) for i in range(0, max_cols)] + table.add_columns(*cols) + for row in chart: + table.add_row(*row) + tables.append(table) + + vert = Vertical(*tables, id="display") + self.mount(vert, before="#post_display") - def show_word(self): - """ - Show the current word. - """ - word = self.current_entry.show_word() - if word is None: - return - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, self.settings) - print("\033c", end="") - print(word) - cache = self.cache[self.current_entry.get_word()] - if "comparative" in cache: - comparative = ", ".join(cache["comparative"]) - print(f"\n\n{comparative}", end="") - if "superlative" in cache: - superlative = ", ".join(cache["superlative"]) - print(f"\n\n{superlative}", end="") - if "adjective_forms" in cache: - adj_forms = ", ".join(cache["adjective_forms"]) - print(f"\n\n{adj_forms}", end="") - tty.setraw(sys.stdin) + # pylint: disable=too-many-branches + # pylint: disable=too-many-statements + async def on_button_pressed(self, event): + """ + Handle button presses in study screen. + """ + if event.button.id == "definition": + await self.query_one("#display").remove() + self.initial_display() + elif event.button.id == "word": + await self.on_word() + elif event.button.id == "usage": + await self.on_usage() + elif event.button.id == "charts": + await self.on_charts() + elif event.button.id == "zero": + await self.grade(0) + elif event.button.id == "one": + await self.grade(1) + elif event.button.id == "two": + await self.grade(2) + elif event.button.id == "three": + await self.grade(3) + elif event.button.id == "four": + await self.grade(4) + elif event.button.id == "five": + await self.grade(5) diff --git a/language_practice/toml.py b/language_practice/toml.py deleted file mode 100644 index 701084b..0000000 --- a/language_practice/toml.py +++ /dev/null @@ -1,121 +0,0 @@ -""" -Handles TOML parsing from the configuration file. -""" - -from tomllib import load - - -class TomlEntry: - """ - A single entry in the TOML file. - """ - - def __init__(self, dct): - try: - self.word = dct["word"] - self.definition = dct["definition"] - self.gender = dct.get("gender") - self.aspect = dct.get("aspect") - self.usage = dct.get("usage") - self.part_of_speech = dct.get("part_of_speech") - self.charts = dct.get("charts") - except KeyError as err: - error = f"Key {err} not found" - if hasattr(self, "word"): - error += f" for entry {self.word}" - raise RuntimeError(error) from err - - def show_definition(self): - """ - Show the definition as a user readable string. - """ - ret = self.definition - if self.aspect is not None: - ret = f"[{self.aspect}] " + ret - if self.part_of_speech is not None: - ret = f"[{self.part_of_speech}] " + ret - - return ret - - def show_usage(self): - """ - Show the usage as a user readable string. - """ - return self.usage - - def get_word(self): - """ - Return the word to be used programmatically. - """ - return self.word - - def show_word(self): - """ - Show the word as a user readable string. - """ - ret = self.word - if self.gender is not None: - ret = f"[{self.gender}] " + ret - - return ret - - def get_charts(self): - """ - Get the inflection charts as a list of lists. - """ - return self.charts - - -class TomlConfig: - """ - All entries in the TOML file. - """ - - def __init__(self, file_path): - try: - with open(file_path, "rb") as file_handle: - toml = load(file_handle) - lang = toml["lang"] - if lang is not None and lang not in ["fr", "uk", "ru"]: - raise RuntimeError( - f"Language {lang} is not supported; if you would like it to " - "be, please open a feature request!" - ) - self.lang = lang - self.words = {dct["word"]: TomlEntry(dct) for dct in toml["words"]} - except KeyError as err: - raise RuntimeError(f"Key {err} not found") from err - - def __iter__(self): - return iter(self.words) - - def __len__(self): - return len(self.words) - - def __getitem__(self, item): - return self.words[item] - - def get_lang(self): - """ - Get the language associated with this word file, if any. - """ - return self.lang - - def get_words(self): - """ - Get a list of all words in the TOML file. - """ - return list(self.words.keys()) - - def extend(self, toml): - """ - Extend a TOML config with another TOML config. - """ - if self.lang != toml.lang: - raise RuntimeError( - f"Attempted to join a TOML config with lang {self.lang} with \ - one with lang {toml.lang}" - ) - - self.words.update(toml.words) - return self diff --git a/language_practice/web/__init__.py b/language_practice/web/__init__.py index 19fee09..88bf869 100644 --- a/language_practice/web/__init__.py +++ b/language_practice/web/__init__.py @@ -3,27 +3,29 @@ """ import asyncio +from typing import Any import aiohttp from bs4 import BeautifulSoup from requests import get +from language_practice.config import Entry from language_practice.web import fr, ru, uk URL = "https://en.wiktionary.org/wiki/" -def refresh(word, lang): +def refresh(word: str, lang: str | None) -> list[list[list[str]]]: """ Refresh individual cache entry. """ if lang is None: - return {} + return [] try: response = get(URL + word.replace("\u0301", ""), timeout=5) if response.status_code == 404: - return {} + return [] html = BeautifulSoup(response.text, "html.parser") if lang == "fr": @@ -39,17 +41,19 @@ def refresh(word, lang): raise RuntimeError(f"Error fetching word {word}") from err -async def fetch(session, word, lang): +async def fetch( + session: aiohttp.ClientSession, word: str, lang: str | None +) -> tuple[str, list[list[list[str]]]]: """ Fetch individual word asynchronously. """ if lang is None: - return (word, {}) + return (word, []) try: async with session.get(URL + word.replace("\u0301", "")) as response: if response.status == 404: - return (word, {}) + return (word, []) text = await response.text() html = BeautifulSoup(text, "html.parser") @@ -66,14 +70,18 @@ async def fetch(session, word, lang): raise RuntimeError(f"Error fetching word {word}") from err -async def scrape(words, cache, lang): +async def scrape( + words: list[Entry], lang: str | None +) -> dict[str, list[list[list[str]]]]: """ Fetch all words asynchronously. """ async with aiohttp.ClientSession() as session: - words_not_in_cache = [word for word in words if word not in cache] ret = await asyncio.gather( - *[fetch(session, word, lang) for word in words_not_in_cache] + *[fetch(session, word.get_word(), lang) for word in words] ) + scraped_info = {} for word, info in ret: - cache[word] = info + scraped_info[word] = info + + return scraped_info diff --git a/language_practice/web/fr.py b/language_practice/web/fr.py index 07eb356..b55fe2c 100644 --- a/language_practice/web/fr.py +++ b/language_practice/web/fr.py @@ -2,13 +2,13 @@ Parsing for French language grammar charts. """ +from bs4 import BeautifulSoup -def parse(html): + +def parse(html: BeautifulSoup) -> list[list[list[str]]]: """ Parse HTML returned from web request for a French word. """ - cache = {} - all_tables = html.find_all("table", {"class": "inflection-table"}) tables = [table for table in all_tables if table.select(".lang-fr") != []] @@ -26,11 +26,9 @@ def parse(html): for tr in table.find_all("tr") ] charts.append(chart) - if charts: - cache["charts"] = charts adj_forms = html.select(".form-of.lang-fr") if adj_forms: - cache["adjective_forms"] = [adj.text for adj in adj_forms] + charts.append([[adj.text for adj in adj_forms]]) - return cache + return charts diff --git a/language_practice/web/ru.py b/language_practice/web/ru.py index ac3561c..87019f3 100644 --- a/language_practice/web/ru.py +++ b/language_practice/web/ru.py @@ -2,26 +2,31 @@ Parsing for Russian language grammar charts. """ +from bs4 import BeautifulSoup + from language_practice.web.shared import uk_ru_tables -def parse(html): +def parse(html: BeautifulSoup) -> list[list[list[str]]]: """ Parse HTML returned from web request for a Russian word. """ - cache = {} - all_tables = html.find_all("table", {"class": "inflection-table"}) tables = [table for table in all_tables if table.select(".lang-ru") != []] - uk_ru_tables(cache, tables, "ru-Latn") + charts = uk_ru_tables(tables, "ru-Latn") + + rows = [] comparative = html.find_all("b", {"class": "comparative-form-of"}) if comparative: - cache["comparative"] = [comp.text for comp in comparative] + rows.append([comp.text for comp in comparative]) superlative = html.find_all("b", {"class": "superlative-form-of"}) if superlative: - cache["superlative"] = [sup.text for sup in superlative] + rows.append([sup.text for sup in superlative]) + + if rows: + charts.append(rows) - return cache + return charts diff --git a/language_practice/web/shared.py b/language_practice/web/shared.py index 9213022..481e07c 100644 --- a/language_practice/web/shared.py +++ b/language_practice/web/shared.py @@ -2,8 +2,10 @@ Shared code across parsers """ +from typing import Any -def uk_ru_tables(cache, tables, remove): + +def uk_ru_tables(tables: list[Any], remove: str) -> list[list[list[str]]]: """ Shared code between Russian and Ukrainian table parsing. """ @@ -16,11 +18,7 @@ def uk_ru_tables(cache, tables, remove): for tr in table.find_all("tr") if [entry.text.strip() for entry in tr.find_all("td")] != [] ] - max_len = max(len(line) for line in chart) - for line in chart: - len_of_line = len(line) - for i in range(max_len - len_of_line): - line.insert(1 + i, "") + charts.append(chart) - if charts: - cache["charts"] = charts + + return charts diff --git a/language_practice/web/uk.py b/language_practice/web/uk.py index efd5169..5d3eb2b 100644 --- a/language_practice/web/uk.py +++ b/language_practice/web/uk.py @@ -2,18 +2,18 @@ Parsing for Ukrainian language grammar charts. """ +from bs4 import BeautifulSoup + from language_practice.web.shared import uk_ru_tables -def parse(html): +def parse(html: BeautifulSoup) -> list[list[list[str]]]: """ Parse HTML returned from web request for a Russian word. """ - cache = {} - all_tables = html.find_all("table", {"class": "inflection-table"}) tables = [table for table in all_tables if table.select(".lang-uk") != []] - uk_ru_tables(cache, tables, "uk-Latn") + charts = uk_ru_tables(tables, "uk-Latn") - return cache + return charts diff --git a/setup.cfg b/setup.cfg index 7bfef48..a6337e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,14 +11,15 @@ version = 0.2.1 [options] install_requires = - aiohttp - beautifulsoup4 - requests - tabulate + aiohttp + beautifulsoup4 + pygobject + requests + textual packages = - language_practice - language_practice.web + language_practice + language_practice.web scripts = - language-practice + language-practice