From 50aeba7f6000559a59e73de13d42f12150b506bb Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 2 Oct 2024 15:53:12 +0200 Subject: [PATCH 01/17] remove updated_highlighter setting for main_content_mapping - it is now standard --- backend/addcorpus/es_mappings.py | 14 +++++--------- backend/corpora/parliament/utils/field_defaults.py | 3 +-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/backend/addcorpus/es_mappings.py b/backend/addcorpus/es_mappings.py index 921870a65..f76021ef1 100644 --- a/backend/addcorpus/es_mappings.py +++ b/backend/addcorpus/es_mappings.py @@ -4,7 +4,10 @@ def primary_mapping_type(es_mapping: Dict) -> str: return es_mapping.get('type', None) -def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None, updated_highlighting=True): + +def main_content_mapping( + token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None +): ''' Mapping for the main content field. Options: @@ -14,14 +17,7 @@ def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_an - `updated_highlighting`: enables the new highlighter, which only works for fields that are indexed with the term vector set to 'with_positions_offsets'. ''' - mapping = { - 'type': 'text' - } - - if updated_highlighting: - mapping.update({ - 'term_vector': 'with_positions_offsets' # include char positions on _source (in addition to the multifields) for highlighting - }) + mapping = {"type": "text", "term_vector": "with_positions_offsets"} if any([token_counts, stopword_analysis, stemming_analysis]): multifields = {} diff --git a/backend/corpora/parliament/utils/field_defaults.py b/backend/corpora/parliament/utils/field_defaults.py index 35dc4c651..8861db5f7 100644 --- a/backend/corpora/parliament/utils/field_defaults.py +++ b/backend/corpora/parliament/utils/field_defaults.py @@ -1,4 +1,4 @@ -from datetime import datetime + from datetime import datetime from addcorpus.python_corpora.corpus import FieldDefinition from addcorpus.python_corpora.filters import DateFilter, MultipleChoiceFilter @@ -289,7 +289,6 @@ def speech(language=None): stopword_analysis=has_language, stemming_analysis=has_language, language=language, - updated_highlighting=True ), results_overview=True, search_field_core=True, From 1773e88d3269d956c9ff2d5ab704729beba45460 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:31:22 +0000 Subject: [PATCH 02/17] Bump django from 4.2.15 to 4.2.16 in /backend Bumps [django](https://github.com/django/django) from 4.2.15 to 4.2.16. - [Commits](https://github.com/django/django/compare/4.2.15...4.2.16) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- backend/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index d2743de8b..6452878f8 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -73,9 +73,9 @@ defusedxml==0.7.1 # djangosaml2 # pysaml2 # python3-openid -dj-rest-auth[with_social]==4.0.1 +dj-rest-auth[with-social,with_social]==4.0.1 # via -r requirements.in -django==4.2.15 +django==4.2.16 # via # -r requirements.in # dj-rest-auth From ad19358fd5047a4a02700d84151974b113eabf5b Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 9 Oct 2024 12:40:49 +0200 Subject: [PATCH 03/17] fix: add language specifiers for parliament-finland and parliament-ireland --- backend/corpora/parliament/finland.py | 2 +- backend/corpora/parliament/ireland.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/corpora/parliament/finland.py b/backend/corpora/parliament/finland.py index 8be053707..0d99aed0d 100644 --- a/backend/corpora/parliament/finland.py +++ b/backend/corpora/parliament/finland.py @@ -108,7 +108,7 @@ def sources(self, start, end): speaker_birth_year = field_defaults.speaker_birth_year() speaker_birth_year.extractor = person_attribute_extractor('birth_year') - speech = field_defaults.speech() + speech = field_defaults.speech(language="fi") speech.extractor = XML(transform = clean_value) speech_id = field_defaults.speech_id() diff --git a/backend/corpora/parliament/ireland.py b/backend/corpora/parliament/ireland.py index 3c06238a4..d7671abcd 100644 --- a/backend/corpora/parliament/ireland.py +++ b/backend/corpora/parliament/ireland.py @@ -127,7 +127,7 @@ def sources(self, start, end): speaker_constituency = field_defaults.speaker_constituency() speaker_constituency.extractor = CSV('const_name') - speech = field_defaults.speech() + speech = field_defaults.speech(language="en") speech.extractor = CSV( 'speech', multiple=True, @@ -149,7 +149,6 @@ def sources(self, start, end): source_archive = field_defaults.source_archive() source_archive.extractor = Constant('1919-2013') - fields = [ date, country, From 946e660cb07c65b6b3588f364d9bee0f9180e327 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 9 Oct 2024 12:43:11 +0200 Subject: [PATCH 04/17] fix indent --- backend/corpora/parliament/utils/field_defaults.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/corpora/parliament/utils/field_defaults.py b/backend/corpora/parliament/utils/field_defaults.py index 8861db5f7..9ee54d2ef 100644 --- a/backend/corpora/parliament/utils/field_defaults.py +++ b/backend/corpora/parliament/utils/field_defaults.py @@ -1,4 +1,4 @@ - from datetime import datetime +from datetime import datetime from addcorpus.python_corpora.corpus import FieldDefinition from addcorpus.python_corpora.filters import DateFilter, MultipleChoiceFilter From 8bc5f3af9b6b41ccc8cfc337e035a96e575f14bb Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 9 Oct 2024 14:01:28 +0200 Subject: [PATCH 05/17] remove speech.es_mappings override of parliament-ireland --- backend/corpora/parliament/ireland.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/backend/corpora/parliament/ireland.py b/backend/corpora/parliament/ireland.py index d7671abcd..a76a47fd3 100644 --- a/backend/corpora/parliament/ireland.py +++ b/backend/corpora/parliament/ireland.py @@ -10,6 +10,7 @@ from addcorpus.python_corpora.corpus import CorpusDefinition, CSVCorpusDefinition, XMLCorpusDefinition from addcorpus.python_corpora.extract import Constant, CSV, XML, Metadata, Combined, Backup +from addcorpus.es_mappings import main_content_mapping from corpora.parliament.parliament import Parliament import corpora.parliament.utils.field_defaults as field_defaults import corpora.utils.formatting as formatting @@ -127,7 +128,7 @@ def sources(self, start, end): speaker_constituency = field_defaults.speaker_constituency() speaker_constituency.extractor = CSV('const_name') - speech = field_defaults.speech(language="en") + speech = field_defaults.speech() speech.extractor = CSV( 'speech', multiple=True, @@ -494,17 +495,8 @@ def source2dicts(self, source): speaker_id = field_defaults.speaker_id() speaker_constituency = field_defaults.speaker_constituency() - speech = field_defaults.speech() # no language-specific analysers since the corpus is mixed-language - speech.es_mapping = { - "type" : "text", - "fields": { - "length": { - "type": "token_count", - "analyzer": "standard" - } - } - } + speech = field_defaults.speech() speech_id = field_defaults.speech_id() topic = field_defaults.topic() From 19bb6351358a8b989b9a7b16c724e1d18b12cce9 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 10 Oct 2024 16:27:45 +0200 Subject: [PATCH 06/17] roll back push / pull from registry --- .github/workflows/backend-test.yml | 26 -------------------------- .github/workflows/frontend-test.yml | 17 ----------------- 2 files changed, 43 deletions(-) diff --git a/.github/workflows/backend-test.yml b/.github/workflows/backend-test.yml index 15b83ca73..4f94bf484 100644 --- a/.github/workflows/backend-test.yml +++ b/.github/workflows/backend-test.yml @@ -24,34 +24,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build and push Elasticsearch image - uses: docker/build-push-action@v6 - with: - context: . - file: DockerfileElastic - push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest - cache-to: type=inline - - name: Build and push Backend - uses: docker/build-push-action@v6 - with: - context: backend/. - push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest - cache-to: type=inline - name: Run backend tests run: | sudo mkdir -p /ci-data docker compose pull elasticsearch - docker compose pull backend docker compose --env-file .env-ci run --rm backend pytest diff --git a/.github/workflows/frontend-test.yml b/.github/workflows/frontend-test.yml index 0e19cb73a..7fd78d7de 100644 --- a/.github/workflows/frontend-test.yml +++ b/.github/workflows/frontend-test.yml @@ -24,23 +24,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build frontend image, using cache from Github registry - uses: docker/build-push-action@v6 - with: - context: frontend/. - push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest - cache-to: type=inline - name: Run frontend unit tests run: | - docker compose pull frontend docker compose --env-file .env-ci run --rm frontend yarn test From 9a3f94711f2c9a612277f084ab675a3cd2a7da81 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 10 Oct 2024 16:46:08 +0200 Subject: [PATCH 07/17] add build and push actions --- .github/workflows/backend-build-and-push.yml | 32 +++++++++++++++++ .github/workflows/backend-test.yml | 5 ++- .github/workflows/frontend-build-and-push.yml | 34 +++++++++++++++++++ .github/workflows/frontend-test.yml | 4 +-- 4 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/backend-build-and-push.yml create mode 100644 .github/workflows/frontend-build-and-push.yml diff --git a/.github/workflows/backend-build-and-push.yml b/.github/workflows/backend-build-and-push.yml new file mode 100644 index 000000000..87db5faa6 --- /dev/null +++ b/.github/workflows/backend-build-and-push.yml @@ -0,0 +1,32 @@ +name: Backend build and push after merge of requirements.txt + +on: + pull_request: + branches: + - develop + types: + - closed + paths: + - backend/requirements.txt + +jobs: + if_merged: + name: Build and push backend image + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push Backend + uses: docker/build-push-action@v6 + with: + context: backend/. + push: true + tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest diff --git a/.github/workflows/backend-test.yml b/.github/workflows/backend-test.yml index 4f94bf484..35e39b4a9 100644 --- a/.github/workflows/backend-test.yml +++ b/.github/workflows/backend-test.yml @@ -15,7 +15,7 @@ on: - 'dependabot/**' paths: - 'backend/**' - - '.github/workflows/backend*' + - '.github/workflows/backend-test.yml' - 'docker-compose.yaml' jobs: @@ -27,5 +27,4 @@ jobs: - name: Run backend tests run: | sudo mkdir -p /ci-data - docker compose pull elasticsearch - docker compose --env-file .env-ci run --rm backend pytest + docker compose --env-file .env-ci run --build backend pytest diff --git a/.github/workflows/frontend-build-and-push.yml b/.github/workflows/frontend-build-and-push.yml new file mode 100644 index 000000000..9eb0dab25 --- /dev/null +++ b/.github/workflows/frontend-build-and-push.yml @@ -0,0 +1,34 @@ +name: Frontend build and push after merge of yarn.lock + +on: + pull_request: + branches: + - develop + types: + - closed + paths: + - frontend/yarn.lock + +jobs: + if_merged: + name: Build and push frontend image + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build frontend image, using cache from Github registry + uses: docker/build-push-action@v6 + with: + context: frontend/. + push: true + tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + cache-to: type=inline diff --git a/.github/workflows/frontend-test.yml b/.github/workflows/frontend-test.yml index 7fd78d7de..2ed4627fb 100644 --- a/.github/workflows/frontend-test.yml +++ b/.github/workflows/frontend-test.yml @@ -15,7 +15,7 @@ on: - 'dependabot/**' paths: - 'frontend/**' - - '.github/workflows/frontend*' + - '.github/workflows/frontend-test.yml' - 'docker-compose.yaml' jobs: @@ -26,4 +26,4 @@ jobs: - uses: actions/checkout@v4 - name: Run frontend unit tests run: | - docker compose --env-file .env-ci run --rm frontend yarn test + docker compose --env-file .env-ci run --build frontend yarn test From 064a443d0a9fea7f39283b6537a7b942cefe9318 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 14 Oct 2024 11:55:03 +0200 Subject: [PATCH 08/17] create separate action to run when requirements or yarn.lock are changed --- .github/workflows/backend-build-and-push.yml | 1 + .github/workflows/backend-build-and-test.yml | 26 ++++++++++ .github/workflows/backend-test.yml | 5 +- .github/workflows/frontend-build-and-push.yml | 1 + .github/workflows/frontend-build-and-test.yml | 25 ++++++++++ .github/workflows/frontend-test.yml | 2 +- .../workflows/scheduled-build-and-push.yml | 47 +++++++++++++++++++ 7 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/backend-build-and-test.yml create mode 100644 .github/workflows/frontend-build-and-test.yml create mode 100644 .github/workflows/scheduled-build-and-push.yml diff --git a/.github/workflows/backend-build-and-push.yml b/.github/workflows/backend-build-and-push.yml index 87db5faa6..5e80c1318 100644 --- a/.github/workflows/backend-build-and-push.yml +++ b/.github/workflows/backend-build-and-push.yml @@ -8,6 +8,7 @@ on: - closed paths: - backend/requirements.txt + - 'docker-compose.yaml' jobs: if_merged: diff --git a/.github/workflows/backend-build-and-test.yml b/.github/workflows/backend-build-and-test.yml new file mode 100644 index 000000000..ae3b93249 --- /dev/null +++ b/.github/workflows/backend-build-and-test.yml @@ -0,0 +1,26 @@ +# This workflow will build the backend container and then run tests; it will only be triggered when requirements change + +name: Build backend and run unit tests + +on: + workflow_dispatch: + push: + branches: + - 'feature/**' + - 'bugfix/**' + - 'hotfix/**' + - 'dependabot/**' + paths: + - 'backend/requirements.txt' + - 'docker-compose.yaml' + +jobs: + backend-test: + name: Test Backend + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run backend tests + run: | + sudo mkdir -p /ci-data + docker compose --env-file .env-ci run --build backend pytest diff --git a/.github/workflows/backend-test.yml b/.github/workflows/backend-test.yml index 35e39b4a9..ab369ffc9 100644 --- a/.github/workflows/backend-test.yml +++ b/.github/workflows/backend-test.yml @@ -1,4 +1,4 @@ -# This workflow will run backend tests on the Python version defined in the backend/Dockerfile +# This workflow will run backend tests using the `ianalyzer-backend:latest` image name: Backend unit tests @@ -12,7 +12,6 @@ on: - 'bugfix/**' - 'hotfix/**' - 'release/**' - - 'dependabot/**' paths: - 'backend/**' - '.github/workflows/backend-test.yml' @@ -27,4 +26,4 @@ jobs: - name: Run backend tests run: | sudo mkdir -p /ci-data - docker compose --env-file .env-ci run --build backend pytest + docker compose --env-file .env-ci run backend pytest diff --git a/.github/workflows/frontend-build-and-push.yml b/.github/workflows/frontend-build-and-push.yml index 9eb0dab25..52c70f95c 100644 --- a/.github/workflows/frontend-build-and-push.yml +++ b/.github/workflows/frontend-build-and-push.yml @@ -8,6 +8,7 @@ on: - closed paths: - frontend/yarn.lock + - 'docker-compose.yaml' jobs: if_merged: diff --git a/.github/workflows/frontend-build-and-test.yml b/.github/workflows/frontend-build-and-test.yml new file mode 100644 index 000000000..535db812c --- /dev/null +++ b/.github/workflows/frontend-build-and-test.yml @@ -0,0 +1,25 @@ +# This workflow will build the frontend container and then run tests; it will only be triggered when yarn.lock changes + +name: Frontend unit tests + +on: + workflow_dispatch: + push: + branches: + - 'feature/**' + - 'bugfix/**' + - 'hotfix/**' + - 'dependabot/**' + paths: + - frontend/yarn.lock + - 'docker-compose.yaml' + +jobs: + frontend-test: + name: Test Frontend + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run frontend unit tests + run: | + docker compose --env-file .env-ci run --build frontend yarn test diff --git a/.github/workflows/frontend-test.yml b/.github/workflows/frontend-test.yml index 2ed4627fb..46970db62 100644 --- a/.github/workflows/frontend-test.yml +++ b/.github/workflows/frontend-test.yml @@ -1,4 +1,4 @@ -# This workflow will run frontend tests on the Node version defined in the Dockerfiles +# This workflow will run frontend tests on the `ianalyzer-frontend:latest` image name: Frontend unit tests diff --git a/.github/workflows/scheduled-build-and-push.yml b/.github/workflows/scheduled-build-and-push.yml new file mode 100644 index 000000000..4d5961913 --- /dev/null +++ b/.github/workflows/scheduled-build-and-push.yml @@ -0,0 +1,47 @@ +# This workflow will run every first of the month, to make sure we update the underlying images and libraries + +name: Scheduled build and push of all images + +on: + schedule: + - cron: "0 0 1 * *" + +jobs: + rebuild-scheduled: + name: Rebuild images + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build frontend image, using cache from Github registry + uses: docker/build-push-action@v6 + with: + context: frontend/. + push: true + tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + cache-to: type=inline + - name: Build backend image, using cache from Github registry + uses: docker/build-push-action@v6 + with: + context: backend/. + push: true + tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest + cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest + cache-to: type=inline + - name: Build Elasticsearch image, using cache from Github registry + uses: docker/build-push-action@v6 + with: + context: . + dockerfile: DockerfileElastic + push: true + tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elasticsearch:latest + cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elasticsearch:latest + cache-to: type=inline From 6c4fd0648f4be66caab5cc70fecf6abeb891e2ca Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 16 Oct 2024 16:33:48 +0200 Subject: [PATCH 09/17] move package registry to cdh organization --- .github/workflows/backend-build-and-push.yml | 2 +- .github/workflows/frontend-build-and-push.yml | 4 ++-- .github/workflows/scheduled-build-and-push.yml | 12 ++++++------ docker-compose.yaml | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/backend-build-and-push.yml b/.github/workflows/backend-build-and-push.yml index 5e80c1318..86a98645f 100644 --- a/.github/workflows/backend-build-and-push.yml +++ b/.github/workflows/backend-build-and-push.yml @@ -30,4 +30,4 @@ jobs: with: context: backend/. push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest + tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest diff --git a/.github/workflows/frontend-build-and-push.yml b/.github/workflows/frontend-build-and-push.yml index 52c70f95c..1e91e822b 100644 --- a/.github/workflows/frontend-build-and-push.yml +++ b/.github/workflows/frontend-build-and-push.yml @@ -30,6 +30,6 @@ jobs: with: context: frontend/. push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest + cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest cache-to: type=inline diff --git a/.github/workflows/scheduled-build-and-push.yml b/.github/workflows/scheduled-build-and-push.yml index 4d5961913..487afab25 100644 --- a/.github/workflows/scheduled-build-and-push.yml +++ b/.github/workflows/scheduled-build-and-push.yml @@ -25,16 +25,16 @@ jobs: with: context: frontend/. push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest + cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest cache-to: type=inline - name: Build backend image, using cache from Github registry uses: docker/build-push-action@v6 with: context: backend/. push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest + tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest + cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest cache-to: type=inline - name: Build Elasticsearch image, using cache from Github registry uses: docker/build-push-action@v6 @@ -42,6 +42,6 @@ jobs: context: . dockerfile: DockerfileElastic push: true - tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elasticsearch:latest - cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elasticsearch:latest + tags: ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest + cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest cache-to: type=inline diff --git a/docker-compose.yaml b/docker-compose.yaml index 90f5481c7..e5cc9a35c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,7 +13,7 @@ services: volumes: - ianalyzer-db:/var/lib/postgresql/data/ backend: - image: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest + image: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest build: context: ./backend depends_on: @@ -40,7 +40,7 @@ services: target: /corpora command: bash -c "python manage.py migrate && python manage.py loadcorpora && python manage.py runserver 0.0.0.0:8000" frontend: - image: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest + image: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest build: context: ./frontend ports: @@ -54,7 +54,7 @@ services: target: /frontend/build command: sh -c "yarn prebuild && yarn start-docker" elasticsearch: - image: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest + image: ghcr.io/centrefordigitalhumanities/ianalyzer-elastic:latest build: context: . dockerfile: DockerfileElastic @@ -82,7 +82,7 @@ services: image: redis:latest restart: unless-stopped celery: - image: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest + image: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest environment: CELERY_BROKER: $CELERY_BROKER SQL_DATABASE: $SQL_DATABASE From 3807c6d18e4dee0c337c4f24b958b66b81afa64c Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 16 Oct 2024 16:35:33 +0200 Subject: [PATCH 10/17] allow manual trigger of build and push --- .github/workflows/backend-build-and-push.yml | 1 + .github/workflows/frontend-build-and-push.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/backend-build-and-push.yml b/.github/workflows/backend-build-and-push.yml index 86a98645f..a93cabedc 100644 --- a/.github/workflows/backend-build-and-push.yml +++ b/.github/workflows/backend-build-and-push.yml @@ -1,6 +1,7 @@ name: Backend build and push after merge of requirements.txt on: + workflow_dispatch: pull_request: branches: - develop diff --git a/.github/workflows/frontend-build-and-push.yml b/.github/workflows/frontend-build-and-push.yml index 1e91e822b..42273f9af 100644 --- a/.github/workflows/frontend-build-and-push.yml +++ b/.github/workflows/frontend-build-and-push.yml @@ -1,6 +1,7 @@ name: Frontend build and push after merge of yarn.lock on: + workflow_dispatch: pull_request: branches: - develop From 6ae9dfab2ae90312faa654a0f45d7fd1c0c6c5dc Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 16 Oct 2024 16:36:34 +0200 Subject: [PATCH 11/17] move manual trigger to scheduled build and push --- .github/workflows/backend-build-and-push.yml | 1 - .github/workflows/frontend-build-and-push.yml | 1 - .github/workflows/scheduled-build-and-push.yml | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/backend-build-and-push.yml b/.github/workflows/backend-build-and-push.yml index a93cabedc..86a98645f 100644 --- a/.github/workflows/backend-build-and-push.yml +++ b/.github/workflows/backend-build-and-push.yml @@ -1,7 +1,6 @@ name: Backend build and push after merge of requirements.txt on: - workflow_dispatch: pull_request: branches: - develop diff --git a/.github/workflows/frontend-build-and-push.yml b/.github/workflows/frontend-build-and-push.yml index 42273f9af..1e91e822b 100644 --- a/.github/workflows/frontend-build-and-push.yml +++ b/.github/workflows/frontend-build-and-push.yml @@ -1,7 +1,6 @@ name: Frontend build and push after merge of yarn.lock on: - workflow_dispatch: pull_request: branches: - develop diff --git a/.github/workflows/scheduled-build-and-push.yml b/.github/workflows/scheduled-build-and-push.yml index 487afab25..f854525d9 100644 --- a/.github/workflows/scheduled-build-and-push.yml +++ b/.github/workflows/scheduled-build-and-push.yml @@ -3,6 +3,7 @@ name: Scheduled build and push of all images on: + workflow_dispatch: schedule: - cron: "0 0 1 * *" From 10db306df5154162c3cb44f0263ca2717147879c Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 16 Oct 2024 16:50:15 +0200 Subject: [PATCH 12/17] correct reference to Dockerfile Elasticsearch --- .github/workflows/scheduled-build-and-push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled-build-and-push.yml b/.github/workflows/scheduled-build-and-push.yml index f854525d9..62712487b 100644 --- a/.github/workflows/scheduled-build-and-push.yml +++ b/.github/workflows/scheduled-build-and-push.yml @@ -41,7 +41,7 @@ jobs: uses: docker/build-push-action@v6 with: context: . - dockerfile: DockerfileElastic + file: DockerfileElastic push: true tags: ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest From 6e7c893cff083e225abbe64a9da4a97ab6048caf Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 31 Oct 2024 14:26:19 +0100 Subject: [PATCH 13/17] add survey message --- frontend/src/app/home/home.component.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/frontend/src/app/home/home.component.html b/frontend/src/app/home/home.component.html index 968439281..1528d33e9 100644 --- a/frontend/src/app/home/home.component.html +++ b/frontend/src/app/home/home.component.html @@ -1,3 +1,9 @@ +
+

+ Participate in the I-analyzer user survey! +

+
+
From 315d2ee1ae32473668e976e453ab5f88ef7ea94d Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 31 Oct 2024 17:00:43 +0100 Subject: [PATCH 14/17] set minimum size for terms aggreation in visualisation close #1683 --- .../app/visualization/barchart/histogram.component.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/frontend/src/app/visualization/barchart/histogram.component.ts b/frontend/src/app/visualization/barchart/histogram.component.ts index 9f32f05aa..f67d052aa 100644 --- a/frontend/src/app/visualization/barchart/histogram.component.ts +++ b/frontend/src/app/visualization/barchart/histogram.component.ts @@ -41,19 +41,16 @@ export class HistogramComponent * used in document requests. */ getAggregator(): TermsAggregator { - let size = 0; - - if (!this.visualizedField.filterOptions) { - return new TermsAggregator(this.visualizedField, 100); - } + let size = 100; const filterOptions = this.visualizedField.filterOptions; if (filterOptions.name === 'MultipleChoiceFilter') { size = (filterOptions as MultipleChoiceFilterOptions).option_count; } else if (filterOptions.name === 'RangeFilter') { - size = + const filterRange = (filterOptions as RangeFilterOptions).upper - (filterOptions as RangeFilterOptions).lower; + size = _.max([size, filterRange]) } return new TermsAggregator(this.visualizedField, size); } From 1c3ca9a8277082256624ad9833e6167bbea9e0e5 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 31 Oct 2024 17:42:55 +0100 Subject: [PATCH 15/17] replace organisation in urls --- CITATION.cff | 2 +- README.md | 4 ++-- backend/addcorpus/schemas/corpus.schema.json | 2 +- documentation/Authentication-and-authorization.md | 8 ++++---- documentation/Local-Debian-I-Analyzer-setup.md | 2 +- documentation/Making-a-release.md | 4 ++-- frontend/src/assets/about/en-GB/ianalyzer.md | 4 ++-- frontend/src/environments/environment.git.ts | 2 +- frontend/src/environments/environment.ts | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index ee333f972..dce5aa248 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -15,7 +15,7 @@ authors: identifiers: - type: doi value: 10.5281/zenodo.8064133 -repository-code: 'https://github.com/UUDigitalHumanitieslab/I-analyzer' +repository-code: 'https://github.com/CentreForDigitalHumanities/I-analyzer' url: 'https://ianalyzer.hum.uu.nl' abstract: >- I-analyzer is a tool for exploring corpora (large diff --git a/README.md b/README.md index 8fd6080d8..e71fa0a1a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # I-analyzer [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8064133.svg)](https://doi.org/10.5281/zenodo.8064133) -[![Actions Status](https://github.com/UUDigitalHumanitiesLab/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/UUDigitalHumanitiesLab/I-analyzer/actions) +[![Actions Status](https://github.com/CentreForDigitalHumanities/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/CentreForDigitalHumanities/I-analyzer/actions) > "The great text mining tool that obviates all others." > — Julian Gonggrijp @@ -41,7 +41,7 @@ If you wish to cite material that you accessed through I-analyzer, or you are no ## Contact -For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/UUDigitalHumanitieslab/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/). +For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/CentreForDigitalHumanities/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/). If you want to add a new corpus to I-analyzer, or have an idea for a project, please [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/) rather than making an issue, so we can discuss the possibilities with you. diff --git a/backend/addcorpus/schemas/corpus.schema.json b/backend/addcorpus/schemas/corpus.schema.json index b170ce566..a3dc405b7 100644 --- a/backend/addcorpus/schemas/corpus.schema.json +++ b/backend/addcorpus/schemas/corpus.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json", + "$id": "https://github.com/CentreForDigitalHumanities/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json", "title": "Corpus", "description": "Definition of a corpus in I-analyzer", "type": "object", diff --git a/documentation/Authentication-and-authorization.md b/documentation/Authentication-and-authorization.md index 9cf811b36..cbf893dbd 100644 --- a/documentation/Authentication-and-authorization.md +++ b/documentation/Authentication-and-authorization.md @@ -2,7 +2,7 @@ > **This documentation is not up to date with version 4.x** -**Authentication** is the process of logging in the user, this can be done by logging in directly on the user database or using a Solis account. **Authorization** is the process of determining what that logged in user is allowed to do: e.g. go to the admin environment and search certain corpora. Both of these are modeled on the front end by the [`User class`](https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/models/user.ts) and handled by the [`User service`](https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/services/user.service.ts). +**Authentication** is the process of logging in the user, this can be done by logging in directly on the user database or using a Solis account. **Authorization** is the process of determining what that logged in user is allowed to do: e.g. go to the admin environment and search certain corpora. Both of these are modeled on the front end by the [`User class`](https://github.com/CentreForDigitalHumanities/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/models/user.ts) and handled by the [`User service`](https://github.com/CentreForDigitalHumanities/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/services/user.service.ts). A user needs to be logged into both the Flask server (back end) and the Angular user interface (front end). The back end authentication and authorization is essential for the actual security: relying only on front end security would allow accessing the data through manually sending requests. Providing this security on the user interface is mostly for the usability of the application: show only corpora which can actually be queried, display the currently logged on user (or that the user isn't logged on yet) and what other parts of the application might actually be accessible. Ideally both the back end and front end would be in perfect harmony about the user's session status. This is however complicated because sessions are temporary: both the front-end and back-end can separately decide to cancel sessions. Generally because they expire, but it could also happen if a server is reset, the user logs off or the user decides to throw away cookies. @@ -24,9 +24,9 @@ It is possible to add a "guest" user in the admin without a password. If this is Flask, like most back end frameworks, will expire a session after a certain period of inactivity. To prevent this from happening when the interface is open the `UserService` will periodically check the session on the server. If it is expired it will fallback to guest or be redirected to the login page. This can also happen if the user logged of from another tab. -## Check before querying +## Check before querying -The [`ApiRetryService.requireLogin`](https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/services/api-retry.service.ts#L18) method is used by the query service to confirm that the session is still active before querying the server. If the user isn't logged on, it will fallback to "guest" or mark the session as expired. +The [`ApiRetryService.requireLogin`](https://github.com/CentreForDigitalHumanities/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/services/api-retry.service.ts#L18) method is used by the query service to confirm that the session is still active before querying the server. If the user isn't logged on, it will fallback to "guest" or mark the session as expired. ## Page opened in new tab/page @@ -34,4 +34,4 @@ When opening the front end on a new page, nothing is known yet about any active ## Navigating -To check the authorization on (manual) navigation a [`LoggedOnGuard`](https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/logged-on.guard.ts) and [`CorpusGuard`](https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/corpus.guard.ts) exist. Both can (depending on the route) check the rights and if necessary redirect to the log on page detailing the lack of authorization. +To check the authorization on (manual) navigation a [`LoggedOnGuard`](https://github.com/CentreForDigitalHumanities/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/logged-on.guard.ts) and [`CorpusGuard`](https://github.com/CentreForDigitalHumanities/I-analyzer/blob/aee207f1a4e1a6fd2521f05f3f34839ab902247c/web-ui/src/app/corpus.guard.ts) exist. Both can (depending on the route) check the rights and if necessary redirect to the log on page detailing the lack of authorization. diff --git a/documentation/Local-Debian-I-Analyzer-setup.md b/documentation/Local-Debian-I-Analyzer-setup.md index 1ba72adcd..311acd6dc 100644 --- a/documentation/Local-Debian-I-Analyzer-setup.md +++ b/documentation/Local-Debian-I-Analyzer-setup.md @@ -110,4 +110,4 @@ Chrome / chromedriver seems to be needed as well? If using LXD, login as the user you created: `lxc exec IAnalyzer -- su --login yourname` -Now follow the installation as described in the [README](https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/README.md). +Now follow the installation as described in the [README](https://github.com/CentreForDigitalHumanities/I-analyzer/blob/develop/README.md). diff --git a/documentation/Making-a-release.md b/documentation/Making-a-release.md index e06d0a1dc..b1d0aa943 100644 --- a/documentation/Making-a-release.md +++ b/documentation/Making-a-release.md @@ -11,8 +11,8 @@ It's recommended that you use [git-flow to make releases](https://danielkummer.g Check if anything ought to be included with the new release: -- Check [open pull requests](https://github.com/UUDigitalHumanitieslab/I-analyzer/pulls) -- Check [issues labelled "bug"](https://github.com/UUDigitalHumanitieslab/I-analyzer/issues?q=is%3Aissue+is%3Aopen+label%3Abug) +- Check [open pull requests](https://github.com/CentreForDigitalHumanities/I-analyzer/pulls) +- Check [issues labelled "bug"](https://github.com/CentreForDigitalHumanities/I-analyzer/issues?q=is%3Aissue+is%3Aopen+label%3Abug) - Check project boards that keep track of a release cycle. If issues or PRs are scheduled for this release, wait until they are closed or move them to the next release cycle. Discuss open pull requests, known bugs, and scheduled issues with your fellow developers. If you agree that the develop branch is release-ready, move on to the next step. diff --git a/frontend/src/assets/about/en-GB/ianalyzer.md b/frontend/src/assets/about/en-GB/ianalyzer.md index 1f7868ce8..1bc534d45 100644 --- a/frontend/src/assets/about/en-GB/ianalyzer.md +++ b/frontend/src/assets/about/en-GB/ianalyzer.md @@ -14,7 +14,7 @@ This way, whenever researchers have data that they want to add, we don't need to As I-analyzer is designed to be flexible, we have worked with different research projects over time to add corpora and develop the application. You can find more information about some of these projects in our [portfolio](https://cdh.uu.nl/portfolio/). -The [source code of I-analyzer](https://github.com/UUDigitalHumanitieslab/I-analyzer) is shared under an MIT license. +The [source code of I-analyzer](https://github.com/CentreForDigitalHumanities/I-analyzer) is shared under an MIT license. ## Research using I-analyzer @@ -45,4 +45,4 @@ Do you think that some of these may apply to you? We are still interested in hea For questions, suggestions, or adding new data: contact us via [cdh@uu.nl](mailto:cdh@uu.nl). -For small suggestions, feedback, or bug reports, you can also make an issue on the [I-analyzer github repository](https://github.com/UUDigitalHumanitieslab/I-analyzer/issues). +For small suggestions, feedback, or bug reports, you can also make an issue on the [I-analyzer github repository](https://github.com/CentreForDigitalHumanities/I-analyzer/issues). diff --git a/frontend/src/environments/environment.git.ts b/frontend/src/environments/environment.git.ts index ea5ba0534..eec842c7e 100644 --- a/frontend/src/environments/environment.git.ts +++ b/frontend/src/environments/environment.git.ts @@ -11,6 +11,6 @@ export const environment = { runInIFrame: false, directDownloadLimit: 1000, version, - sourceUrl: 'https://github.com/UUDigitalHumanitieslab/I-analyzer/', + sourceUrl: 'https://github.com/CentreForDigitalHumanities/I-analyzer/', logos: undefined, }; diff --git a/frontend/src/environments/environment.ts b/frontend/src/environments/environment.ts index ae3b9eb89..bdd3770b3 100644 --- a/frontend/src/environments/environment.ts +++ b/frontend/src/environments/environment.ts @@ -15,6 +15,6 @@ export const environment = { runInIFrame: false, directDownloadLimit: 1000, version, - sourceUrl: 'https://github.com/UUDigitalHumanitieslab/I-analyzer/', + sourceUrl: 'https://github.com/CentreForDigitalHumanities/I-analyzer/', logos: undefined, }; From 55a38531b77fbbc7860d3b9b86bcee16c2101638 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 31 Oct 2024 17:58:38 +0100 Subject: [PATCH 16/17] move sortable property on times fields --- backend/corpora/times/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/corpora/times/times.py b/backend/corpora/times/times.py index 65fbcbf09..85d1a7853 100644 --- a/backend/corpora/times/times.py +++ b/backend/corpora/times/times.py @@ -102,6 +102,7 @@ def sources(self, start=datetime.min, end=datetime.max): es_mapping={"type": "date", "format": "yyyy-MM-dd"}, hidden=True, visualizations=["resultscount", "termfrequency"], + sortable=True, search_filter=filters.DateFilter( min_date, max_date, @@ -165,7 +166,6 @@ def sources(self, start=datetime.min, end=datetime.max): es_mapping=keyword_mapping(), csv_core=True, results_overview=True, - sortable=True, description="Publication date as full string, as found in source file", extractor=extract.XML(Tag("da"), toplevel=True), ), From 3455c811b701a567e1a0ecf1634391e7b4df024d Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Wed, 6 Nov 2024 16:05:01 +0100 Subject: [PATCH 17/17] update version --- CITATION.cff | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index dce5aa248..adc9d8596 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -35,5 +35,5 @@ keywords: - elasticsearch - natural language processing license: MIT -version: 5.13.0 -date-released: '2024-08-30' +version: 5.14.0 +date-released: '2024-11-06' diff --git a/package.json b/package.json index 14787fa77..ebbdffdcf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "i-analyzer", - "version": "5.13.0", + "version": "5.14.0", "license": "MIT", "scripts": { "postinstall": "yarn install-back && yarn install-front",