Skip to content

Commit

Permalink
Merge branch 'release/5.14.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
lukavdplas committed Nov 6, 2024
2 parents b0c8002 + 3455c81 commit 72b31c1
Show file tree
Hide file tree
Showing 26 changed files with 215 additions and 104 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/backend-build-and-push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Backend build and push after merge of requirements.txt

on:
pull_request:
branches:
- develop
types:
- closed
paths:
- backend/requirements.txt
- 'docker-compose.yaml'

jobs:
if_merged:
name: Build and push backend image
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Backend
uses: docker/build-push-action@v6
with:
context: backend/.
push: true
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
26 changes: 26 additions & 0 deletions .github/workflows/backend-build-and-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# This workflow will build the backend container and then run tests; it will only be triggered when requirements change

name: Build backend and run unit tests

on:
workflow_dispatch:
push:
branches:
- 'feature/**'
- 'bugfix/**'
- 'hotfix/**'
- 'dependabot/**'
paths:
- 'backend/requirements.txt'
- 'docker-compose.yaml'

jobs:
backend-test:
name: Test Backend
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run backend tests
run: |
sudo mkdir -p /ci-data
docker compose --env-file .env-ci run --build backend pytest
34 changes: 3 additions & 31 deletions .github/workflows/backend-test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This workflow will run backend tests on the Python version defined in the backend/Dockerfile
# This workflow will run backend tests using the `ianalyzer-backend:latest` image

name: Backend unit tests

Expand All @@ -12,10 +12,9 @@ on:
- 'bugfix/**'
- 'hotfix/**'
- 'release/**'
- 'dependabot/**'
paths:
- 'backend/**'
- '.github/workflows/backend*'
- '.github/workflows/backend-test.yml'
- 'docker-compose.yaml'

jobs:
Expand All @@ -24,34 +23,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Elasticsearch image
uses: docker/build-push-action@v6
with:
context: .
file: DockerfileElastic
push: true
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
cache-to: type=inline
- name: Build and push Backend
uses: docker/build-push-action@v6
with:
context: backend/.
push: true
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
cache-to: type=inline
- name: Run backend tests
run: |
sudo mkdir -p /ci-data
docker compose pull elasticsearch
docker compose pull backend
docker compose --env-file .env-ci run --rm backend pytest
docker compose --env-file .env-ci run backend pytest
35 changes: 35 additions & 0 deletions .github/workflows/frontend-build-and-push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Frontend build and push after merge of yarn.lock

on:
pull_request:
branches:
- develop
types:
- closed
paths:
- frontend/yarn.lock
- 'docker-compose.yaml'

jobs:
if_merged:
name: Build and push frontend image
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build frontend image, using cache from Github registry
uses: docker/build-push-action@v6
with:
context: frontend/.
push: true
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
cache-to: type=inline
25 changes: 25 additions & 0 deletions .github/workflows/frontend-build-and-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This workflow will build the frontend container and then run tests; it will only be triggered when yarn.lock changes

name: Frontend unit tests

on:
workflow_dispatch:
push:
branches:
- 'feature/**'
- 'bugfix/**'
- 'hotfix/**'
- 'dependabot/**'
paths:
- frontend/yarn.lock
- 'docker-compose.yaml'

jobs:
frontend-test:
name: Test Frontend
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run frontend unit tests
run: |
docker compose --env-file .env-ci run --build frontend yarn test
23 changes: 3 additions & 20 deletions .github/workflows/frontend-test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This workflow will run frontend tests on the Node version defined in the Dockerfiles
# This workflow will run frontend tests on the `ianalyzer-frontend:latest` image

name: Frontend unit tests

Expand All @@ -15,7 +15,7 @@ on:
- 'dependabot/**'
paths:
- 'frontend/**'
- '.github/workflows/frontend*'
- '.github/workflows/frontend-test.yml'
- 'docker-compose.yaml'

jobs:
Expand All @@ -24,23 +24,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build frontend image, using cache from Github registry
uses: docker/build-push-action@v6
with:
context: frontend/.
push: true
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
cache-to: type=inline
- name: Run frontend unit tests
run: |
docker compose pull frontend
docker compose --env-file .env-ci run --rm frontend yarn test
docker compose --env-file .env-ci run --build frontend yarn test
48 changes: 48 additions & 0 deletions .github/workflows/scheduled-build-and-push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# This workflow will run every first of the month, to make sure we update the underlying images and libraries

name: Scheduled build and push of all images

on:
workflow_dispatch:
schedule:
- cron: "0 0 1 * *"

jobs:
rebuild-scheduled:
name: Rebuild images
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build frontend image, using cache from Github registry
uses: docker/build-push-action@v6
with:
context: frontend/.
push: true
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-frontend:latest
cache-to: type=inline
- name: Build backend image, using cache from Github registry
uses: docker/build-push-action@v6
with:
context: backend/.
push: true
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-backend:latest
cache-to: type=inline
- name: Build Elasticsearch image, using cache from Github registry
uses: docker/build-push-action@v6
with:
context: .
file: DockerfileElastic
push: true
tags: ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest
cache-from: type=registry,ref=ghcr.io/centrefordigitalhumanities/ianalyzer-elasticsearch:latest
cache-to: type=inline
6 changes: 3 additions & 3 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ authors:
identifiers:
- type: doi
value: 10.5281/zenodo.8064133
repository-code: 'https://github.com/UUDigitalHumanitieslab/I-analyzer'
repository-code: 'https://github.com/CentreForDigitalHumanities/I-analyzer'
url: 'https://ianalyzer.hum.uu.nl'
abstract: >-
I-analyzer is a tool for exploring corpora (large
Expand All @@ -35,5 +35,5 @@ keywords:
- elasticsearch
- natural language processing
license: MIT
version: 5.13.0
date-released: '2024-08-30'
version: 5.14.0
date-released: '2024-11-06'
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# I-analyzer

[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8064133.svg)](https://doi.org/10.5281/zenodo.8064133)
[![Actions Status](https://github.com/UUDigitalHumanitiesLab/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/UUDigitalHumanitiesLab/I-analyzer/actions)
[![Actions Status](https://github.com/CentreForDigitalHumanities/I-analyzer/workflows/Unit%20tests/badge.svg)](https://github.com/CentreForDigitalHumanities/I-analyzer/actions)

> "The great text mining tool that obviates all others."
> — Julian Gonggrijp
Expand Down Expand Up @@ -41,7 +41,7 @@ If you wish to cite material that you accessed through I-analyzer, or you are no

## Contact

For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/UUDigitalHumanitieslab/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/).
For questions, small feature suggestions, and bug reports, feel free to [create an issue](https://github.com/CentreForDigitalHumanities/I-analyzer/issues/new/choose). If you don't have a Github account, you can also [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/).

If you want to add a new corpus to I-analyzer, or have an idea for a project, please [contact the Centre for Digital Humanities](https://cdh.uu.nl/contact/) rather than making an issue, so we can discuss the possibilities with you.

14 changes: 5 additions & 9 deletions backend/addcorpus/es_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
def primary_mapping_type(es_mapping: Dict) -> str:
return es_mapping.get('type', None)

def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None, updated_highlighting=True):

def main_content_mapping(
token_counts=True, stopword_analysis=False, stemming_analysis=False, language=None
):
'''
Mapping for the main content field. Options:
Expand All @@ -14,14 +17,7 @@ def main_content_mapping(token_counts=True, stopword_analysis=False, stemming_an
- `updated_highlighting`: enables the new highlighter, which only works for fields that are indexed with the term vector set to 'with_positions_offsets'.
'''

mapping = {
'type': 'text'
}

if updated_highlighting:
mapping.update({
'term_vector': 'with_positions_offsets' # include char positions on _source (in addition to the multifields) for highlighting
})
mapping = {"type": "text", "term_vector": "with_positions_offsets"}

if any([token_counts, stopword_analysis, stemming_analysis]):
multifields = {}
Expand Down
2 changes: 1 addition & 1 deletion backend/addcorpus/schemas/corpus.schema.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/UUDigitalHumanitieslab/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
"$id": "https://github.com/CentreForDigitalHumanities/I-analyzer/blob/develop/backend/addcorpus/schemas/corpus.schema.json",
"title": "Corpus",
"description": "Definition of a corpus in I-analyzer",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion backend/corpora/parliament/finland.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def sources(self, start, end):
speaker_birth_year = field_defaults.speaker_birth_year()
speaker_birth_year.extractor = person_attribute_extractor('birth_year')

speech = field_defaults.speech()
speech = field_defaults.speech(language="fi")
speech.extractor = XML(transform = clean_value)

speech_id = field_defaults.speech_id()
Expand Down
13 changes: 2 additions & 11 deletions backend/corpora/parliament/ireland.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from addcorpus.python_corpora.corpus import CorpusDefinition, CSVCorpusDefinition, XMLCorpusDefinition
from addcorpus.python_corpora.extract import Constant, CSV, XML, Metadata, Combined, Backup
from addcorpus.es_mappings import main_content_mapping
from corpora.parliament.parliament import Parliament
import corpora.parliament.utils.field_defaults as field_defaults
import corpora.utils.formatting as formatting
Expand Down Expand Up @@ -149,7 +150,6 @@ def sources(self, start, end):
source_archive = field_defaults.source_archive()
source_archive.extractor = Constant('1919-2013')


fields = [
date,
country,
Expand Down Expand Up @@ -495,17 +495,8 @@ def source2dicts(self, source):
speaker_id = field_defaults.speaker_id()
speaker_constituency = field_defaults.speaker_constituency()

speech = field_defaults.speech()
# no language-specific analysers since the corpus is mixed-language
speech.es_mapping = {
"type" : "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "standard"
}
}
}
speech = field_defaults.speech()

speech_id = field_defaults.speech_id()
topic = field_defaults.topic()
Expand Down
1 change: 0 additions & 1 deletion backend/corpora/parliament/utils/field_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,6 @@ def speech(language=None):
stopword_analysis=has_language,
stemming_analysis=has_language,
language=language,
updated_highlighting=True
),
results_overview=True,
search_field_core=True,
Expand Down
Loading

0 comments on commit 72b31c1

Please sign in to comment.