From 3a56ab89d29edb816fe0d25580daf40fcc17018d Mon Sep 17 00:00:00 2001 From: Ian Cornelius Date: Sat, 14 Dec 2024 11:06:52 -0600 Subject: [PATCH] initial commit --- .github/workflows/action.yaml | 47 ++++ .gitignore | 2 + README.md | 11 + docs/bibliography/items.yaml | 318 ++++++++++++++++++++++ docs/config/default.yaml | 10 + docs/documentation.md | 490 ++++++++++++++++++++++++++++++++++ schemas/bibliography.xsd | 69 +++++ schemas/glossary.xsd | 51 ++++ schemas/inscriptions.xsd | 25 ++ schemas/manuscripts.xsd | 75 ++++++ schemas/mssindex.xsd | 88 ++++++ schemas/printedbooks.xsd | 66 +++++ schemas/records.xsd | 237 ++++++++++++++++ scripts/build-docs.sh | 13 + scripts/validator.py | 35 +++ 15 files changed, 1537 insertions(+) create mode 100644 .github/workflows/action.yaml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 docs/bibliography/items.yaml create mode 100644 docs/config/default.yaml create mode 100644 docs/documentation.md create mode 100644 schemas/bibliography.xsd create mode 100644 schemas/glossary.xsd create mode 100644 schemas/inscriptions.xsd create mode 100644 schemas/manuscripts.xsd create mode 100644 schemas/mssindex.xsd create mode 100644 schemas/printedbooks.xsd create mode 100644 schemas/records.xsd create mode 100755 scripts/build-docs.sh create mode 100644 scripts/validator.py diff --git a/.github/workflows/action.yaml b/.github/workflows/action.yaml new file mode 100644 index 0000000..ffa6689 --- /dev/null +++ b/.github/workflows/action.yaml @@ -0,0 +1,47 @@ +name: Release documentation + +on: + push: + branches: main + tags: + - v* +jobs: + deploy: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Working directory check + run: | + pwd + ls + - name: Install Pandoc + uses: r-lib/actions/setup-pandoc@v2 + - name: Install XeLaTeX + run: sudo apt-get install -y texlive-xetex + - name: Download and install fonts + run: | + wget -O Junicode.zip https://github.com/psb1558/Junicode-font/releases/download/v2.209/Junicode_2.209.zip + unzip Junicode.zip + mv Junicode/ /usr/share/fonts + fc-cache -fv + - name: Verify dependencies + run: | + pandoc --version + xetex --version + - name: Build + run: | + pushd ./docs/ + echo "Building the PDF..." + pandoc --metadata-file ./config/default.yaml --citeproc --number-sections --toc --pdf-engine xelatex documentation.md -o ../documentation.pdf + popd + - name: Verify build + run: ls documentation.pdf + - name: Release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + with: + files: | + documentation.pdf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..61704e5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +temp.* +*.pdf diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a5caf5 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +This is the principal repository of the *Digital Index of Middle English Verse* (DIMEV). +Repository contents as follows: + +- `data/` DIMEV data files +- `docs/` Components for the documentation +- `schemas/` XSD files for validation of data files +- `scripts/` Python and Shell scripts + +For additional detail see the documentation. +Documentation is built and released with GitHub Actions. +For local builds an equivalent build script is supplied in the directory `scripts/`. diff --git a/docs/bibliography/items.yaml b/docs/bibliography/items.yaml new file mode 100644 index 0000000..7a3e604 --- /dev/null +++ b/docs/bibliography/items.yaml @@ -0,0 +1,318 @@ +--- +references: +- id: BenskinElectronicVersionLinguistic2013 + author: + - family: Benskin + given: M. + - family: Laing + given: M. + - family: Karaiskos + given: V. + - family: Williamson + given: K. + citation-key: BenskinElectronicVersionLinguistic2013 + edition: version 1.1 + event-place: Edinburgh + issued: + - year: 2013 + - year: 0 + publisher: The Authors and The University of Edinburgh + publisher-place: Edinburgh + title: An electronic version of A Linguistic Atlas of Late Mediaeval English + type: book + URL: http://www.lel.ed.ac.uk/ihd/elalme/elalme.html + +- id: BoffeyNewIndexMiddle2005 + author: + - family: Boffey + given: Julia + - family: Edwards + given: A. S. G. + call-number: Z2014.P7 B655 2005 + citation-key: BoffeyNewIndexMiddle2005 + event-place: London + ISBN: 0-7123-4831-X + issued: + - year: 2005 + number-of-pages: '344' + publisher: British Library + publisher-place: London + source: www.franklin.library.upenn.edu Library Catalog + title: A new index of Middle English verse + type: book + +- id: BrownIndexMiddleEnglish1943 + citation-key: BrownIndexMiddleEnglish1943 + editor: + - family: Brown + given: Carleton + - family: Robbins + given: Rossell Hope + event-place: New York + issued: + - year: 1943 + number-of-pages: '785' + publisher: Columbia University Press + publisher-place: New York + source: orbis.library.yale.edu Library Catalog + title: The index of Middle English verse + type: book + +- id: BrownRegisterMiddleEnglish1916 + author: + - family: Brown + given: Carleton + citation-key: BrownRegisterMiddleEnglish1916 + event-place: Oxford + issued: + - year: 1916 + - year: 1920 + number-of-volumes: '2' + publisher: Oxford University Press + publisher-place: Oxford + title: A register of Middle English religious & didactic verse + type: book + +- id: DeanAngloNormanLiteratureGuide1999 + author: + - family: Dean + given: Ruth J. + - family: Boulton + given: Maureen B. M. + call-number: PC2942 .D43 1999 + citation-key: DeanAngloNormanLiteratureGuide1999 + event-place: London + ISBN: 0-905474-38-4 + issued: + - year: 1999 + number-of-pages: '553' + publisher: Anglo-Norman Text Society + publisher-place: London + source: www.franklin.library.upenn.edu Library Catalog + title: 'Anglo-Norman literature: a guide to texts and manuscripts' + title-short: Anglo-Norman Literature + type: book + +- id: HamerManuscriptIndexIndex1995 + call-number: PR1203 H33X 1995 + citation-key: HamerManuscriptIndexIndex1995 + editor: + - family: Hamer + given: R. F. S. + event-place: London + ISBN: 0-7123-0387-1 + issued: + - year: 1995 + number-of-pages: '62' + publisher: British Library + publisher-place: London + source: orbis.library.yale.edu Library Catalog + title: A manuscript index to the "Index of Middle English Verse" + type: book + +- id: HealeyDictionaryOldEnglish2009 + citation-key: HealeyDictionaryOldEnglish2009 + editor: + - family: Healey + given: Antonette + dropping-particle: diPaulo + - family: Wilkin + given: John Price + - family: Xiang + given: Xin + event-place: Toronto + issued: + - year: 2009 + publisher: Dictionary of Old English Project + publisher-place: Toronto + title: Dictionary of Old English web corpus + type: book + URL: http://tapor.library.utoronto.ca/doecorpus/ + +- id: LaingLinguisticAtlasEarly2013 + author: + - family: Laing + given: Margaret + citation-key: LaingLinguisticAtlasEarly2013 + edition: version 3.2 + event-place: Edinburgh + issued: + - year: 2013 + - year: 0 + publisher: The University of Edinburgh + publisher-place: Edinburgh + title: A linguistic atlas of early Middle English, 1150–1325 + type: book + URL: http://www.lel.ed.ac.uk/ihd/laeme2/laeme2.html + +- id: LewisIndexPrintedMiddle1985 + author: + - family: Lewis + given: Robert E. + - family: Blake + given: N. F. + - family: Edwards + given: A. S. G. + call-number: Z2014 P795 L49 1985 (LC) + citation-key: LewisIndexPrintedMiddle1985 + event-place: New York + ISBN: 0-8240-8839-5 + issued: + - year: 1985 + number-of-pages: '362' + publisher: Garland + publisher-place: New York + source: orbis.library.yale.edu Library Catalog + title: Index of printed Middle English prose + type: book + +- id: MayElizabethanPoetryBibliography2004 + author: + - family: May + given: Steven W. + - family: Ringler + given: William A. + citation-key: MayElizabethanPoetryBibliography2004 + event-place: London + ISBN: 978-0-8264-7278-6 + issued: + - year: 2004 + language: eng + number-of-pages: xx+2337 + publisher: Thoemmes Continuum + publisher-place: London + source: i-share-nby.primo.exlibrisgroup.com + title: >- + Elizabethan poetry: a bibliography and first-line index of English verse, + 1559-1603 + title-short: Elizabethan poetry + type: book + +- id: PollardShorttitleCatalogueBooks1950 + author: + - family: Pollard + given: Alfred W. + - family: Redgrave + given: G. R. + - family: Haller + given: William + call-number: Z2002 .P7 1950 copy1 + citation-key: PollardShorttitleCatalogueBooks1950 + event-place: London + issued: + - year: 1950 + number-of-pages: '609' + publisher: Bibliographical Society + publisher-place: London + source: Catalog of the Folger Shakespeare Library + title: >- + A short-title catalogue of books printed in England, Scotland, & Ireland and + of English books printed abroad, 1475-1640 + type: book + +- id: RandIndexMiddleEnglish2014 + author: + - family: Rand + given: Kari Anne + call-number: PR281 .I52 Index 1-20 + citation-key: RandIndexMiddleEnglish2014 + event-place: Woodbridge + ISBN: 978-1-84384-383-2 + issued: + - year: 2014 + note: 'OCLC: ocn903949216' + number-of-pages: '603' + publisher: D.S. Brewer + publisher-place: Woodbridge + source: orbis.library.yale.edu Library Catalog + title: 'The index of Middle English prose: index to volumes I-XX' + type: book + +- id: RinglerBibliographyIndexEnglish1988 + author: + - family: Ringler + given: William A. + call-number: Reference ; PR531 .R56 1988 + citation-key: RinglerBibliographyIndexEnglish1988 + event-place: London + ISBN: 978-0-7201-1892-6 + issued: + - year: 1988 + language: en + number-of-pages: viii+440 + publisher: Mansell + publisher-place: London + source: luc.primo.exlibrisgroup.com + title: Bibliography and index of English verse printed 1476-1558 + type: book + +- id: RinglerBibliographyIndexEnglish1992 + author: + - family: Ringler + given: William A. + - family: Rudick + given: Michael + - family: Ringler + given: Susan J. + citation-key: RinglerBibliographyIndexEnglish1992 + event-place: London + ISBN: 978-0-7201-2099-8 + issued: + - year: 1992 + language: en + note: 'OCLC: 26162572' + number-of-pages: '315' + publisher: Mansell + publisher-place: London + source: Open WorldCat + title: Bibliography and index of English verse in manuscript, 1501-1558 + type: book + +- id: RobbinsSupplementIndexMiddle1965 + author: + - family: Robbins + given: Rossell Hope + - family: Cutler + given: John L. + citation-key: RobbinsSupplementIndexMiddle1965 + event-place: Lexington + issued: + - year: 1965 + number-of-pages: '551' + publisher: University of Kentucky Press + publisher-place: Lexington + source: orbis.library.yale.edu Library Catalog + title: Supplement to the "Index of Middle English Verse" + type: book + +- id: SharpeHandlistLatinWriters1997 + author: + - family: Sharpe + given: Richard + call-number: Z6605.L3 + citation-key: SharpeHandlistLatinWriters1997 + collection-number: '1' + collection-title: Publications of the Journal of medieval Latin + event-place: Turnhout + ISBN: 2-503-50575-9 + issued: + - year: 1997 + number-of-pages: '912' + publisher: Brepols + publisher-place: Turnhout + source: orbis.library.yale.edu Library Catalog + title: A handlist of the Latin writers of Great Britain and Ireland before 1540 + type: book + +- id: TEIConsortiumTEIP5Guidelines2024 + citation-key: TEIConsortiumTEIP5Guidelines2024 + edition: Version 4.8.0. Last updated on 2nd September 2024 + editor: + - literal: TEI Consortium + issued: + - year: 2024 + title: 'TEI P5: Guidelines for Electronic Text Encoding and Interchange' + title-short: TEI P5 + type: book + URL: http://www.tei-c.org/Guidelines/P5/ +... diff --git a/docs/config/default.yaml b/docs/config/default.yaml new file mode 100644 index 0000000..a6918f8 --- /dev/null +++ b/docs/config/default.yaml @@ -0,0 +1,10 @@ +--- +fontsize: 12pt +geometry: margin=1in +mainfont: Junicode +colorlinks: True +linkcolor: blue +secnumdepth: 4 +link-citations: True +bibliography: ./bibliography/items.yaml +--- diff --git a/docs/documentation.md b/docs/documentation.md new file mode 100644 index 0000000..377c1da --- /dev/null +++ b/docs/documentation.md @@ -0,0 +1,490 @@ +--- +title: The Digital Index of Middle English Verse +subtitle: A Technical Introduction +author: +- Ian Cornelius +- Michael Johnston +- Linne R. Mooney +- Daniel W. Mosser +date: \today +--- + +\newpage + +[dimev.net]: http://www.dimev.net +[manuscript description module]: https://tei-c.org/release/doc/tei-p5-doc/en/html/MS.html +[git]: https://git-scm.com/ + +# General Introduction + +The *Digital Index of Middle English Verse* (DIMEV) is a comprehensive, open-access bibliographic database of the surviving English-language verse from the period 1200--1525. +The aim is to record all original witnesses to all surviving Middle English verse. +Witnesses after 1525 are recorded only if derived from an exemplar that no longer survives. +For each witness, DIMEV aims to provide transcriptions of at least the first two lines and last two lines. +References to modern scholarly transcriptions and critical editions are supplied where available; references to digital facsimiles of witnesses have been added for some items. +Other recorded metadata include: author, scribe, verse form, subject-matter, standard bibliographic reference numbers, and linguistic descriptions of manuscript witness. + +Like any finding aid or bibliographic register, DIMEV aims (1) to enable students and researchers to identify, locate, and retrieve texts relevant to their research questions or areas of inquiry, and (2) to keep the surviving historical record visible, verifiable, and accessible. +The importance of DIMEV to its scholarly community is illustrated by a search for "DIMEV" on [Google Books](https://www.google.com/search?tbm=bks&q=dimev). +Search results show that DIMEV is employed in scholarship on Middle English literature and culture as a standard way of identifying texts under discussion and as a launching point for deeper study. + +DIMEV has been live online at [dimev.net] since 2011, updated regularly to supply omissions, record newly identified items and witnesses, record newly published transcriptions, editions, and facsimiles, and correct error. +**All such updates will cease after 31 December 2024**, to accommodate modernization of source data and maintenance practices. +In due course, the current DIMEV website will be replaced by a new one. +Until such time, users who seek the most current authoritative record of Middle English verse should consult DIMEV's source data directly. +This documentation serves as a guide to the source data. +Readers new to XML should consult an introductory grammar, for instance @TEIConsortiumTEIP5Guidelines2024, [A Gentle Introduction to XML](https://tei-c.org/release/doc/tei-p5-doc/en/html/SG.html). + +# Data files + +## Overview + +DIMEV data are serialized in seven XML files: + +- `Records.xml`: The principal file, collecting metadata on verse items, transcriptions of witnesses to verse items, and cross-references for acephalous or fragmentary texts +- `Manuscripts.xml`, `PrintedBooks.xml`, `Inscriptions.xml`: Bibliographic details for witnesses cited in `Records.xml` +- `MSSIndex.xml`: Another presentation of bibliographic details for manuscript witnesses cited in `Records.xml`, duplicating `Manuscripts.xml` in part, but including (1) some manuscripts not recorded there, and (2) hard-coded counts of the number of DIMEV items transmitted in each manuscript +- `Bibliography.xml`: Bibliographic details for modern transcriptions, editions, and facsimiles cited in DIMEV entries +- `Glossary.xml`: Definitions and dictionary references for some lexical items that appear in DIMEV transcriptions + +These XML files are located within the directory `data/` in DIMEV's principal GitHub repository. +(Note to co-authors: URL to be supplied in the published version, after I push my local files to GitHub.) + +Each file employs a custom structure, documented in two forms: + +1. A human-readable specification, provided in the following subsections of this documentation +1. Machine-readable schemas, written as XSD files and located in the directory `schemas/` + +The Python script `validator.py` can be used to validate XML files against the corresponding XSD file. +For additional instructions see the comment at the head of `validator.py`. + +## `Records.xml` + +### Overview {#overview-records} + +This XML file stores information on verse items, including extracts and acephalous texts. +Each verse item is represented within a `record` element. +`record` elements are serialized as children of the root element `records`. + +The XML structure makes an implicit distinction between two types of `record` elements: + +1. **Full `record` elements**. + These represent items with the bibliographic status of *works*. + Each has a unique identifier (recorded as the attribute `xml:id`) and a child element `witnesses`, listing one or more documentary witnesses to the item. +1. **Partial `record` elements**. + These represent independently circulated extracts, fragments, and acephalous texts of *works*. + Each supplies a cross reference to the relevant full `record` element. + Cross references are supplied in the child element `description`. + Partial `record` elements usually lack a unique identifier and most other child elements of full `record` elements, including `witnesses`. + +### Document structure {#doc-struct-records} + +#### Root element {#root-records} + +- Name: `records` +- Description: contains a collection of bibliographic entries for Middle English verse items +- Content: `record` elements + +#### Child elements of `records` + +- Name: `record` +- Description: contains a bibliographic entry for a single Middle English verse item +- Content and Attributes: as detailed in the following sections. + **Note**: the specification in the following sections applies to full `record` elements, as defined above in the [Overview](#overview-records) to `Records.xml`. + +#### Attributes of `record` + +Each full `record` element carries attributes that supply (1) a unique identifier for the item and (2) cross-reference to corresponding item records in prior bibliographic surveys of Middle English verse. +The attributes are: + +- `xml:id`: This is a unique identifier, assigned by DIMEV editors. + The unique identifier has two hyphen-delimited components: (1) the invariant string `record`; (2) a number, usually integer but sometimes with one or two decimal places. + The unique identifier allows for cross-reference within `Records.xml` and from DIMEV's other XML files. + The numerical component of the unique identifier is the "DIMEV number" for the item; it may be used in scholarship as a persistent bibliographic pointer to the item. +- `imev`: The number assigned to the corresponding item in @BrownIndexMiddleEnglish1943 or @RobbinsSupplementIndexMiddle1965. +- `nimev`: The number assigned to the corresponding item in @BoffeyNewIndexMiddle2005. + Alternatively (when the value is prefaced by "TM"), the reference is to the corresponding item in @RinglerBibliographyIndexEnglish1992. + +#### Child elements of `record` + +Each full `record` element may have the following child elements. +Except `name` and `alpha`, which are required, all child elements (and their children, recursively) are optional. + +- `name`. The *incipit*, or first line of the verse item, employed for purposes of identification in a textual tradition in which most items are anonymous and untitled. + Spellings are standardized. + Data type: usually string; may contain inline formatting or `gloss` elements. +- `alpha`. Another standardized first line, manually forced to downcase string and sometimes truncated. + This is used for alphabetical sorting of items. + Data type: string +- `description`. Descriptive commentary on the item. + Data type: free text with mixed content (recursively mixed where allowed), including inline formatting and references to witnesses, scholarly publications, and other item records. +- `descNote`. More descriptive commentary on the item, more limited in scope or importance. + Data type: free text with mixed content like `description`. +- `authors`. Any generally accepted author attributions for the item. + Dubious attributions may be flagged with a question mark in the child element `suffix` (see below). + Author attributions given by documentary witnesses are transcribed within the element `MSAuthor`, a child of `witness`. + See [Child elements of `witness`]. + Data type: an array of one or more child elements with the tag `author`. + Each `author` element is a structured sequence with child elements `last`, `first`, and `suffix`, each of which has string content. + There is no controlled vocabulary. +- `titles`. Any titles generally assigned to the item in modern scholarship. + Titles given by documentary witnesses are transcribed within the element `MSTitle`, a child of `witness`. + See [Child elements of `witness`]. + Data type: an array of one or more child elements with the tag `title`; alternatively, a single `title` element may appear as the direct child of `record`. + Content of the element `title` is ordinary string but may contain inline formatting. +- `subjects`. Descriptive keywords for content. + Data type: an array of one or more child elements with the tag `subject`. + Content of the element `subject` is ordinary string but may contain inline formatting. + There is no controlled vocabulary. +- `verseForms`. Descriptive keywords for prosodic characteristics. + Data type: an array of one or more child elements with the tag `verseForm`. + Content of the element `versePattern` is string. + There is no controlled vocabulary. +- `versePatterns`. Descriptive keywords for rhyme scheme and other prosodic characteristics, sometimes overlapping with `verseForms`. + Data type: an array of one or more child elements with the tag `versePatterns`. + Content of the element `versePattern` is string. + There is no controlled vocabulary. +- `languages`. Names of languages employed in the item, other than English. + Data type: an array of one or more child elements with the tag `language`. + There is no controlled vocabulary. +- `ghosts`. Any bibliographic ghosts, that is, documents which, in prior scholarly tradition, are erroneously claimed to contain a copy of this item. + Data type: an array of one or more child elements with the tag `ghost`, each of which contains free text with mixed content (recursively mixed where allowed), including inline formatting and references to witnesses, scholarly publications, and other item records. +- `witnesses`. The original witnesses to the item (usually manuscripts, sometimes inscriptions or early printed books). + Data type: an array of one or more child elements with the tag `witness`. + See [Attributes of `witness`] and [Child elements of `witness`]. + +#### Attributes of `witness` + +Each `witness` element carries attributes that (1) supply a unique identifier for the witness and (2) indicate whether the witness has accompanying music or illustration. +The attributes are: + +- `xml:id`. This is a unique identifier for the witness, assigned by DIMEV editors. +It has three hyphen-delimited components: (1) the invariant string `wit`; (2) the "DIMEV number" for the item (see [Attributes of `record`]); (3) an integer designating this witness. +The unique identifier allows for cross-reference within `Records.xml` and from DIMEV's other XML files. +- `illust`. Report of accompanying illustrations. Values are 'y' (for *yes*) and 'n' (for *no*). +- `music`. Report of accompanying music. Values are 'y' (for *yes*) and 'n' (for *no*). + +**Caveat lector**: The final component of DIMEV's unique identifier for a witness may differ from the positional number assigned to that witness on [dimev.net]. +The witness numbers printed on the current website are generated anew during each build, as a function of witness order, and can change without notice. +Scholars are advised to reference witnesses by manuscript shelfmarks, not the number assigned to a given witness on DIMEV's current website. + +#### Child elements of `witness` + +Each `witness` element may have the following child elements. +All child elements are optional. + +- `allLines`. A full transcription of the item as transmitted in the document. + Data type: text with inline formatting. +- `firstLines`. A transcription of the first lines of the item as transmitted in the document. + Data type: text with inline formatting. +- `lastLines`. A transcription of the last lines of the item as transmitted in the document. + Data type: text with inline formatting. + The elements `firstLines` and `lastLines` may repeat in alternation. + This is employed when a witness comprises two or more discontinuous fragments or excerpts of the item. +- `source`. Bibliographic citation for the witness. + Data type: a complex element containing attributes and child elements. + See [Attributes of `source`] and [Child elements of `source`]. +- `sourceNote`. Notes regarding the witness. + Data type: free text with mixed content (recursively mixed where allowed), including inline formatting and references to scholarly publications and other witnesses, items, or documents. +- `MSAuthor`. Any author attribution transmitted with the witness. + Data type: free text with mixed content (recursively mixed where allowed), including inline formatting and references to scholarly publications and other witnesses, items, or documents. +- `MSTitle`. Any title attribution transmitted with the witness. + Data type: free text with mixed content (recursively mixed where allowed), including inline formatting and references to scholarly publications and other witnesses, items, or documents. +- `facsimiles`. References to published facsimile reproductions of the source document. + Data type: an array of one or more child elements with the tag `facsimile`. + Each child element carries an attribute `key`, which links to an entry in the XML file `Bibliography.xml`. + The content of `facsimile` is often string but may be mixed. + **Note**: facsimiles are recorded by DIMEV under individual witnesses, even when the facsimile reproduction is of the entire source document. +- `editions`. References to modern scholarly transcriptions and editions. + Data type: an array of one or more child elements with the tag `edition`. + Each child element carries an attribute `key`, which links to an entry in the XML file `Bibliography.xml`. + The content of `edition` is often string but may be mixed. + **Note**: critical editions are recorded by DIMEV under the witness employed as base text. + This confuses the categories of critical and diplomatic editions. + +#### Attributes of `source` + +Each `source` element carries two attributes, as follows: + +- `key`. This identifies the source document by linking to a unique entry in the XML files `Manuscripts.xml`, `PrintedBooks.xml`, or `Inscriptions.xml`. +- `prefix`: This indicates the unit of navigation (folios, pages, or signatures). + +#### Child elements of `source` + +Each `source` element may have the following child elements: + +- `start`. The location at which the text of the witness begins. + Data type: string. +- `end`. The location at which the text of the witness ends. + Data type: string. + +Note: + +- Folio references consisting of a number alone are rectos. + Versos are indicated by 'v' after the folio number. +- The element `start` will appear alone if the text of the witness occupies a single side of one leaf. +- The elements `start` and `end` may repeat in alternation to record discontinuous ranges. + +### Technical direction {#tech-dir-records} + +- Atomize, writing each full `record` element to a separate file within a new sub-directory `records/`, to enable effective use of [git] distributed file history +- Collect partial `record` elements (i.e., those `record` elements serving as cross-references to full `record` elements) in a single separate file, perhaps named `cross-references.xml` +- Delete the element `alpha`. This can be generated on demand by script +- Separate references to @BoffeyNewIndexMiddle2005 and @RinglerBibliographyIndexEnglish1992 + (Cite Ringler's bibliography directly, as an independent authority) +- Supply references to the Middle English Compendium, were available + (This is part of a general expansion of bibliographical reference, beyond the linear tradition of indexes of Middle English verse) +- Restructure repeated `firstLine` and `lastLine` elements to allow for narrower validation by the XSD file +- In the element `name`, disaggregate name suffixes and query marks indicating dubious attributions +- Allow for critical editions to be attached to the `record` element, rather than only the `witness` sub-element +- Supply controlled vocabularies for the content of `subject`, `verseForm`, `verseElement`, and `language` + +## `Manuscripts.xml` + +### Overview {#overview-manuscripts} + +This XML file stores bibliographic information on medieval manuscripts cited as witnesses in the XML file `Records.xml`. +It also includes some individual copies of early printed books, cited within `Records.xml` for manuscript additions entered into them. +Other such copies are recorded instead within `PrintedBooks.xml`. +See [`PrintedBooks.xml`], below. + +Each entry is contained within a uniquely identified `item` element. +`item` elements are serialized as children of the root element `mss`. + +### Document structure {#doc-struct-manuscripts} + +#### Root element {#root-manuscripts} + +- Name: `mss` +- Description: Represents a collection of bibliographic entries for medieval manuscripts (and particular copies of early printed books) +- Content: Contains `item` elements + +#### Child elements of `mss` + +- Name: `item` +- Description: Represents a single bibliographic entry for a medieval manuscript (or a particular copy of an early printed book) +- Attributes: `xml:id`. The attribute value must be unique. It allows bibliographic entries in `Manuscripts.xml` to be referenced within DIMEV's other XML files. +- Content: Child elements `loc`, `repos`, `desc`, and `lang`, as described in the following section + +#### Child elements of `item` + +- `loc`: The city or town in which the manuscript is held at present. + Equivalent to the element `settlement` in @TEIConsortiumTEIP5Guidelines2024, [manuscript description module]. + Data type: Usually string; may contain inline formatting. +- `repos`: The repository in which the manuscript is held at present. + Equivalent to the elements `institution` and `repository` in the TEI [manuscript description module]. + Data type: Usually string; may contain inline formatting. +- `desc`: The present shelfmark of the manuscript. + Equivalent to the elements `collection` and `idno` in the TEI [manuscript description module]. + Previous shelfmarks may be supplied after the current shelfmark; these are usually enclosed in square brackets and prefixed with "*olim*". +- `lang`: Localization of the language of the manuscript, with reference to @LaingLinguisticAtlasEarly2013, @BenskinElectronicVersionLinguistic2013, and subsequent scholarship. + Data type: free text with mixed content (recursively mixed where allowed), including inline formatting, inset elements `langGrid` and `place`, and references to scholarly publications and other manuscript items. + +### Technical direction {#tech-dir-manuscripts} + +- De-duplicate and reconcile with the XML file `MSSIndex.xml` +- Distinguish manuscripts from (particular copies of) early printed books, perhaps by adding a new attribute `type` to `item` elements +- Atomize, writing each full `item` element to a separate file within a new sub-directory `sources/`, to enable effective use of [git] distributed file history +- Rename and restructure element names to align with guidelines of the TEI [manuscript description module] +- Extract previous shelfmarks to a new element, `altIdentifier` +- Record facsimiles, perhaps within a new element `surrogates` + +## `MSSIndex.xml` + +### Overview {#overview-mssindex} + +The XML file `MSSIndex.xml` originated as a transformation of `Manuscripts.xml`. +It was designed to store (in a structure convenient to the XSLT scripts that build the current website) hard-coded counts of DIMEV items transmitted by each manuscript. +The two files are largely redundant, yet they have separate file histories. +In recent years the usual practice has been to enter new manuscripts into `MSSIndex.xml` only; this file now has about 170 manuscripts not recorded in `Manuscripts.xml`. + +### Document structure {#doc-struct-mssindex} + +#### Root element {#root-mss-index} + +- Name: `records` +- Description: Represents a collection of bibliographic entries for medieval manuscripts, with counts of DIMEV items transmitted by each, arranged hierarchically by geographic location and repository name +- Content: Contains `loc` elements, which recursively contain elements `repository` and `item` + +#### `loc`, `repository` and `item` + +The elements `loc`, `repos` and `item` form a tree in which each `loc` element, representing a geographical location, has one or more child elements named `repos`, representing a holding institution, and each `repos` element has one or more child elements named `item`, representing manuscripts. + +The elements `loc`, `repos`, and `item` are semantically equivalent to the elements with those names in the XML file `Manuscripts.xml`, but the semantics are expressed by different structures. +In `Manuscripts.xml` geographical locations and holding institutions are expressed as content of the elements `loc` and `repos`, respectively; in `Manuscripts.xml` these data are expressed as values of the attribute `key`, carried by the elements `loc` and `repos`, respectively. +`item` elements in both files have the identical attribute `xml:id`. + +#### Child elements of `item` + +The element `item` has child elements `desc`, `lang`, and `count`. +The elements `desc` and `lang` are redundant with the synonymous elements in `Manuscripts.xml`. +The element `count` stores a count of DIMEV items transmitted by the manuscript. + +### Technical direction {#tech-dir-mssindex} + +- De-duplicate and reconcile with `Manuscripts.xml` +- Generate counts by script on build; do not hard-code + +## `Inscriptions.xml` + +### Overview {#overview-inscr} + +This XML file stores bibliographic information on inscriptions and other epigraphic texts cited as witnesses in the XML file `Records.xml`. +Each entry is contained within a uniquely identified `item` element. +`item` elements are serialized as children of the root element `inscriptions`. + +### Document structure {#doc-struct-insr} + +The structure of `Inscriptions.xml` is essentially the same as `Manuscripts.xml`, except `Inscriptions.xml` has no element `lang`. + +### Technical direction {#tech-dir-inscr} + +- Add an attribute `type` with value "inscription" to `item` elements +- Atomize, writing each full `item` element to a separate file within a new sub-directory `sources/`, to enable effective use of [git] distributed file history + +## `PrintedBooks.xml` + +### Overview {#overview-printed-books} + +This XML file stores bibliographic information on early printed books cited as witnesses in the XML file `Records.xml`. +Each entry is contained within a uniquely identified `bibl` element. +`bibl` elements are serialized as children of the root element `books`. + +An identical XML structure is employed to record two types of witness: + +1. Printed texts, transmitted in all intact copies of a given edition +2. Manuscript additions to a particular copy + +`bibl` elements of the second type are rare (perhaps only eight in all). +More commonly, references to particular copies are recorded within the files `Manuscripts.xml` and `MSSIndex.xml`. +See the [Overview to `Manuscripts.xml`](#overview-manuscripts), above. + +### Document structure {#doc-struct-printed-books} + +#### Root element {#root-printed-books} + +- Name: `books` +- Description: Represents a collection of bibliographic entries for early printed books +- Content: Contains `bibl` elements + +#### Child elements of `books` + +- Name: `bibl` +- Description: Represents a single bibliographic entry for an early printed book +- Attributes: `xml:id` and `n`. + The value of `xml:id` must be unique; it allows entries in `PrintedBooks.xml` to be referenced within DIMEV's other XML files. + The attribute `n` usually gives the corresponding item number in @PollardShorttitleCatalogueBooks1950, where available. + For non-English books and other items not recorded in the STC, the value of `n` is often a dummy string ("X"). + Sometimes another value is used. +- Content: Child elements `loc`, `DIMEVCount`, `authorstmt`, `titlestmt`, `pubstmt`, `repos`, and `desc`, as described in the following section + +#### Child elements of `bibl` + +- `loc`. This element is effectively unused. Its content is the invariant string "Printed book" in all cases +- `DIMEVCount`. A hard-coded count of DIMEV items transmitted by the item. + Data type: string. +- `authorstmt`. Parent of the element `author`, which may contain an author attribution. + Data type of `author`: string. +- `titlestmt`. Parent of the element `title`, which contains the item title. + Data type of `title`: attribute and string. + The `title` element carries the attribute `level`, the value of which is always "m" (for "monograph"). +- `pubstmt`. Agent, date, and location of printing, as given in the volume or as reconstructed. + The element `pubstmt` carries a required attribute `date`, which gives the year of publication (Gregorian calendar, Common Era). + Data type: attribute and string. +- `repos`. Usually empty, except when the `bibl` element points to a particular copy, in which case the `repos` element gives the holding institution of the cited copy. +- `desc`. Content varies, depending on the reference of the `bibl` element. + When the `bibl` element points to an edition, the `desc` element usually repeats and concatenates the content of `authorstmt`, `title`, and `pubstmt`; + when the `bibl` element points to a particular copy, the `desc` element gives the shelfmark of the copy, sometimes in combination with the cataloging metadata usually given for editions. + Data type: free text with mixed content. + +### Technical direction {#tech-dir-printed-books} + +- Distinguish between references to copies and editions +- Extract the references to particular copies: re-serialize these to conform to the data structure employed for references to manuscripts, and move to the new directory `sources/` +- For references to editions: implement a standard data structure for XML serialization of bibliographic metadata for early printed books +- Disaggregate references to @PollardShorttitleCatalogueBooks1950 from other content of the attribute `n` + +## `Bibliography.xml` + +### Overview {#overview-bibliography} + +This XML file stores bibliographic information for modern scholarly publications. +Each bibliographic entry is contained within a uniquely identified `bibl` element. +`bibl` elements are serialized as children of the root element `bibliography`. + +### Document structure {#doc-struct-bibliography} + +#### Root element {#root-bibliography} + +- Name: `bibliography` +- Description: Represents a collection of bibliographic entries for modern scholarly publications +- Content: Contains `bibl` elements + +#### Child elements of `bibliography` + +- Name: `bibl` +- Description: Represents a bibliographic entry for a modern scholarly publication +- Attributes: `xml:id`. The attribute value must be unique. It allows entries in `Bibliography.xml` to be referenced within DIMEV's other XML files. +- Content: Contains the elements `authorstmt`, `titlestmt`, `pubstmt`, and `index`. + The elements `authorstmt`, `titlestmt`, and `pubstmt` are based loosely on early versions of TEI elements for encoding bibliographic citations: compare @TEIConsortiumTEIP5Guidelines2024, guidelines on [Bibliographic Citations and References](https://tei-c.org/release/doc/tei-p5-doc/en/html/CO.html#COBI). + The element `index` has the child elements `person` and `topic`. + These are designed to store content keywords, loosely equivalent to the `KW` tag in [RIS](https://en.wikipedia.org/wiki/RIS_(file_format)). + +### Technical direction {#tech-dir-bibliography} + +- Migrate to a standard format for bibliographic data +- Supply, as content keywords for items in `Bibliography.xml`, the DIMEV item `id`s by which they are cited (i.e., backlink from modern bibliographic items to the verse item records that cite them) +- Extract links to on-line facsimiles of manuscripts for separate treatment, probably within the data structure for manuscripts +- Import the reformatted bibliographic data to [Zotero](https://www.zotero.org/) for distribution and curation on that platform + +## `Glossary.xml` + +To be supplied. +For now see the XSD file `glossary.xsd`. + +# History and responsibility + +DIMEV incorporates and extends a bibliographic tradition whose principal earlier efforts are @BrownRegisterMiddleEnglish1916, @BrownIndexMiddleEnglish1943, @RobbinsSupplementIndexMiddle1965, and @BoffeyNewIndexMiddle2005, and it shares a point of origin with the last of these. +In 1993 Julia Boffey, A. S. G. Edwards, and Linne R. Mooney obtained funding to produce an updated index of Middle English verse, which would combine @BrownIndexMiddleEnglish1943 and @RobbinsSupplementIndexMiddle1965, correct error, and record materials that had come to light in the previous thirty years. +In the first year of collaboration the team divided; DIMEV subsequently emerged as a distinct venture, distinguished in three respects from prior efforts in its tradition: + +1. Digital medium +2. Return to primary sources +3. Transcription of opening and closing lines of witnesses + +Digital medium has shaped the project fundamentally: +it allows for incremental updates and correction of error; +it also removes constraints of space, allowing DIMEV to give much fuller attention to individual witnesses than was possible in print indexes. +Transcriptions of opening and closing lines provide a snapshot of textual and linguistic variation among witnesses. +They are a product of DIMEV's general survey method, returning to primary sources and verifying each reference anew. +This survey method serves also to remove accreted error from the bibliographic tradition. +The three distinctive features of DIMEV form a coherent working method. + +Mooney's initial work on DIMEV was assisted by Elizabeth Solopova and funded by a grant from the National Endowment for the Humanities (University of Maine, 1995--97). +In 2004 Mooney was offered a position at the University of York, a post that enabled her to resume data collection in UK libraries. +From 2007 onwards she has been aided by Daniel W. Mosser who became involved while in York on a Leverhulme Visiting Professorship (2007--9). +Mosser provided the technical vision and contacts to serialize DIMEV data in XML. +With assistance from Dave Radcliffe, DIMEV went live online in 2011. + +In 2012 Mooney obtained funding from the Modern Humanities Research Association to hire a part-time research assistant. +In this position Deborah Thorpe continued DIMEV's survey of British Library manuscripts. +The survey of British Library manuscripts was completed by Mooney in 2024, supported by an Emeritus Fellowship for two years from the Leverhulme Trust (awarded 2022). + +In fall 2024 Ian Cornelius wrote XSD files to replace machine-generated DTDs as the mechanism for validation of XML. +The XSD files are constructed iteratively, from ground up. +XML syntax errors discovered in this process are corrected one by one, by Cornelius and Mosser. +This is a first step toward modernization of DIMEV data. +Subsequent steps are projected within the section [Data Files], above (see the subsections headed "Technical direction"). +Cornelius is lead author of this documentation, bearing primary responsibility for technical components. + +Since 2007 Mooney and Mosser have served as co-editors of DIMEV, sharing responsibility for content. +Mosser has served as technical editor, maintaining data files and pushing updates to [dimev.net]. +In January 2025 primary responsibility for DIMEV will pass to Cornelius and Michael Johnston. +Mooney and Mosser will continue as advisors to the new editors. + +# Licensing + +DIMEV source data, this documentation, and other contents of the repository that contains them are released under a [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/). + +# Works Cited diff --git a/schemas/bibliography.xsd b/schemas/bibliography.xsd new file mode 100644 index 0000000..3cc299d --- /dev/null +++ b/schemas/bibliography.xsd @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schemas/glossary.xsd b/schemas/glossary.xsd new file mode 100644 index 0000000..1a5f699 --- /dev/null +++ b/schemas/glossary.xsd @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schemas/inscriptions.xsd b/schemas/inscriptions.xsd new file mode 100644 index 0000000..b382ce7 --- /dev/null +++ b/schemas/inscriptions.xsd @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schemas/manuscripts.xsd b/schemas/manuscripts.xsd new file mode 100644 index 0000000..640a9e1 --- /dev/null +++ b/schemas/manuscripts.xsd @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schemas/mssindex.xsd b/schemas/mssindex.xsd new file mode 100644 index 0000000..7bdf308 --- /dev/null +++ b/schemas/mssindex.xsd @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schemas/printedbooks.xsd b/schemas/printedbooks.xsd new file mode 100644 index 0000000..cb59ba7 --- /dev/null +++ b/schemas/printedbooks.xsd @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schemas/records.xsd b/schemas/records.xsd new file mode 100644 index 0000000..2c2a7c7 --- /dev/null +++ b/schemas/records.xsd @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/build-docs.sh b/scripts/build-docs.sh new file mode 100755 index 0000000..07d4c90 --- /dev/null +++ b/scripts/build-docs.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# This script builds the documentation as a PDF from components in the +# directory `doc/`. The script is intended to test local builds; it is +# equivalent to the build performed in the GitHub Action. Dependencies are +# those provided in the GitHub Action. +# +# Run the script from a child directory of root. + +pushd ../docs/ +echo "Building the PDF..." +pandoc --metadata-file ./config/default.yaml --citeproc --number-sections --toc --pdf-engine xelatex documentation.md -o documentation.pdf +popd diff --git a/scripts/validator.py b/scripts/validator.py new file mode 100644 index 0000000..42f2452 --- /dev/null +++ b/scripts/validator.py @@ -0,0 +1,35 @@ +# This Python script validates an XML file against its XSD schema. To specify +# the XML file to be validated, update the value of `xml_file`. Run the script +# from the root directory. + +import xmlschema + +xml_file = 'Manuscripts.xml' + +file_pairs = { + ('Bibliography.xml', 'bibliography.xsd'), + ('Glossary.xml', 'glossary.xsd'), + ('Inscriptions.xml', 'inscriptions.xsd'), + ('Manuscripts.xml', 'manuscripts.xsd'), + ('MSSIndex.xml', 'mssindex.xsd'), + ('PrintedBooks.xml', 'printedbooks.xsd'), + ('Records.xml', 'records.xsd') + } + +for pair in file_pairs: + if pair[0] == xml_file: + schema_file = pair[1] + break + +print(f'Validating XML file {xml_file} with {schema_file}') + +# Load the XSD schema +schema = xmlschema.XMLSchema('schemas/' + schema_file) + +# Validate the XML file +try: + schema.validate('data/' + xml_file) + print("XML is valid") +except xmlschema.XMLSchemaValidationError as e: + print("XML validation failed:") + print(e)