From 8e8c3a08e645a8a8866ab9b81218c5ede10906de Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 20 Oct 2024 07:25:36 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Bump=20v2024.10.20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 ++--- jurisprudence/settings.py | 2 +- release_notes/v2024.10.20.md | 90 ++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 release_notes/v2024.10.20.md diff --git a/README.md b/README.md index 6788c70..d7228cb 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/antoinejeannot/jurisprudence) [![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/antoinejeannot/jurisprudence) -# ✨ Jurisprudence, release v2024.10.16 🏛️ +# ✨ Jurisprudence, release v2024.10.20 🏛️ Jurisprudence is an open-source project that automates the collection and distribution of French legal decisions. It leverages the Judilibre API provided by the Cour de Cassation to: @@ -17,12 +17,12 @@ Whether you're conducting legal research, developing AI models, or simply intere | Jurisdiction | Jurisprudences | Oldest | Latest | Tokens | JSONL (gzipped) | Parquet | |--------------|----------------|--------|--------|--------|-----------------|---------| -| Cour d'Appel | 393,027 | 1996-03-25 | 2024-10-10 | 1,966,344,301 | [Download (1.72 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.jsonl.gz?download=true) | [Download (2.87 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.parquet?download=true) | -| Tribunal Judiciaire | 77,347 | 2023-12-14 | 2024-10-10 | 275,740,841 | [Download (248.46 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.jsonl.gz?download=true) | [Download (413.09 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.parquet?download=true) | -| Cour de Cassation | 536,658 | 1860-08-01 | 2024-10-15 | 1,106,970,947 | [Download (931.31 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.jsonl.gz?download=true) | [Download (1.58 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet?download=true) | -| **Total** | **1,007,032** | **1860-08-01** | **2024-10-15** | **3,349,056,089** | **2.87 GB** | **4.86 GB** | +| Cour d'Appel | 393,528 | 1996-03-25 | 2024-10-17 | 1,969,498,268 | [Download (1.72 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.jsonl.gz?download=true) | [Download (2.88 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.parquet?download=true) | +| Tribunal Judiciaire | 78,227 | 2023-12-14 | 2024-10-14 | 278,779,170 | [Download (251.72 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.jsonl.gz?download=true) | [Download (417.80 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.parquet?download=true) | +| Cour de Cassation | 536,698 | 1860-08-01 | 2024-10-17 | 1,107,052,096 | [Download (931.72 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.jsonl.gz?download=true) | [Download (1.58 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet?download=true) | +| **Total** | **1,008,453** | **1860-08-01** | **2024-10-17** | **3,355,329,534** | **2.88 GB** | **4.86 GB** | -Latest update date: 2024-10-16 +Latest update date: 2024-10-20 # Tokens are computed using GPT-4 tiktoken and the `text` column. diff --git a/jurisprudence/settings.py b/jurisprudence/settings.py index be76388..66e14e8 100644 --- a/jurisprudence/settings.py +++ b/jurisprudence/settings.py @@ -1 +1 @@ -JURISPRUDENCE_LAST_EXPORT_DATETIME = "2024-10-16 01:22:05" +JURISPRUDENCE_LAST_EXPORT_DATETIME = "2024-10-20 06:52:42" diff --git a/release_notes/v2024.10.20.md b/release_notes/v2024.10.20.md new file mode 100644 index 0000000..d7228cb --- /dev/null +++ b/release_notes/v2024.10.20.md @@ -0,0 +1,90 @@ +

+ +[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/antoinejeannot/jurisprudence) [![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/antoinejeannot/jurisprudence) + +# ✨ Jurisprudence, release v2024.10.20 🏛️ + +Jurisprudence is an open-source project that automates the collection and distribution of French legal decisions. It leverages the Judilibre API provided by the Cour de Cassation to: + +- Fetch rulings from major French courts (Cour de Cassation, Cour d'Appel, Tribunal Judiciaire) +- Process and convert the data into easily accessible formats +- Publish & version updated datasets on Hugging Face every few days. + +It aims to democratize access to legal information, enabling researchers, legal professionals and the public to easily access and analyze French court decisions. +Whether you're conducting legal research, developing AI models, or simply interested in French jurisprudence, this project might provide a valuable, open resource for exploring the French legal landscape. + +## 📊 Exported Data + +| Jurisdiction | Jurisprudences | Oldest | Latest | Tokens | JSONL (gzipped) | Parquet | +|--------------|----------------|--------|--------|--------|-----------------|---------| +| Cour d'Appel | 393,528 | 1996-03-25 | 2024-10-17 | 1,969,498,268 | [Download (1.72 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.jsonl.gz?download=true) | [Download (2.88 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.parquet?download=true) | +| Tribunal Judiciaire | 78,227 | 2023-12-14 | 2024-10-14 | 278,779,170 | [Download (251.72 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.jsonl.gz?download=true) | [Download (417.80 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.parquet?download=true) | +| Cour de Cassation | 536,698 | 1860-08-01 | 2024-10-17 | 1,107,052,096 | [Download (931.72 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.jsonl.gz?download=true) | [Download (1.58 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet?download=true) | +| **Total** | **1,008,453** | **1860-08-01** | **2024-10-17** | **3,355,329,534** | **2.88 GB** | **4.86 GB** | + +Latest update date: 2024-10-20 + +# Tokens are computed using GPT-4 tiktoken and the `text` column. + +## 🤗 Hugging Face Dataset + +The up-to-date jurisprudences dataset is available at: https://huggingface.co/datasets/antoinejeannot/jurisprudence in JSONL (gzipped) and parquet formats. + +This allows you to easily fetch, query, process and index all jurisprudences in the blink of an eye! + +### Usage Examples +#### HuggingFace Datasets +```python +# pip install datasets +import datasets + +dataset = load_dataset("antoinejeannot/jurisprudence") +dataset.shape +>> {'tribunal_judiciaire': (58986, 33), +'cour_d_appel': (378392, 33), +'cour_de_cassation': (534258, 33)} + +# alternatively, you can load each jurisdiction separately +cour_d_appel = load_dataset("antoinejeannot/jurisprudence", "cour_d_appel") +tribunal_judiciaire = load_dataset("antoinejeannot/jurisprudence", "tribunal_judiciaire") +cour_de_cassation = load_dataset("antoinejeannot/jurisprudence", "cour_de_cassation") +``` + +Leveraging datasets allows you to easily ingest data to [PyTorch](https://huggingface.co/docs/datasets/use_with_pytorch), [Tensorflow](https://huggingface.co/docs/datasets/use_with_tensorflow), [Jax](https://huggingface.co/docs/datasets/use_with_jax) etc. + +#### BYOL: Bring Your Own Lib +For analysis, using polars, pandas or duckdb is quite common and also possible: +```python +url = "https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet" # or tribunal_judiciaire.parquet, cour_d_appel.parquet + +# pip install polars +import polars as pl +df = pl.scan_parquet(url) + +# pip install pandas +import pandas as pd +df = pd.read_parquet(url) + +# pip install duckdb +import duckdb +table = duckdb.read_parquet(url) +``` + +## 🪪 Citing & Authors + +If you use this code in your research, please use the following BibTeX entry: +```bibtex +@misc{antoinejeannot2024, +author = {Jeannot Antoine and {Cour de Cassation}}, +title = {Jurisprudence}, +year = {2024}, +howpublished = {\url{https://github.com/antoinejeannot/jurisprudence}}, +note = {Data source: API Judilibre, \url{https://www.data.gouv.fr/en/datasets/api-judilibre/}} +} +``` + +This project relies on the [Judilibre API par la Cour de Cassation](https://www.data.gouv.fr/en/datasets/api-judilibre/), which is made available under the Open License 2.0 (Licence Ouverte 2.0) + +It scans the API every 3 days at midnight UTC and exports its data in various formats to Hugging Face, without any fundamental transformation but conversions. + +

license ouverte / open license

\ No newline at end of file