-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathTCPwLigtrs.sh
executable file
·84 lines (67 loc) · 2.4 KB
/
TCPwLigtrs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/sh
### tess-script #############################################
#
# Written by Matt Christy
# for The Early-Modern OCR Project
#
# Copyright 2013 The Early-Modern OCR Project
#
#########################################################
clear
echo " "
echo " "
echo "### TCPwLigtrs ############################################"
echo "# "
echo "# A script to put ligatures and long-ses"
echo "# back into the TCP transcripts for "
echo "# The Early-Modern OCR Project (eMOP) "
echo "# "
echo "# Copyright 2013 - eMOP "
echo "# "
echo "#######################################################"
echo " "
echo " "
#### ------------------mjc: 05212013----------------------------------------------------
# The script takes an input directory name and runs on all text files
# it finds replacing all given two-letter combos with equivalent ligatures. For the
# first half of the text files present it also replaces all 's's with long-ses.
# For now (5/21/13) this only works on one dir input at a time, but may work for multiple
# dirs.
#
# The command looks like this:
## ==============================================================================
## sh ./TCPwLigtrs.sh <inputdir(s)>
## ==============================================================================
#### ------------------mjc: 052113----------------------------------------------------
#Loop through passed params and assign global var values
infile=($@)
len=${#infile[@]}
for ((i=0; i<=$len-1; i++))
do
echo " "
echo "#####################################"
echo "processing ${infile[$i]}/"
echo "#####################################"
echo "create ${infile[$i]}-lig"
echo "#####################################"
mkdir ${infile[$i]}-lig
cp ${infile[$i]}/*.txt ${infile[$i]}-lig/
total=`ls ${infile[$i]}-lig | grep -c ""`
echo "$total pages"
echo "#####################################"
#to count how many files we've changed
cnt=0
for fname in `ls ${infile[$i]}-lig`
do
echo "adding ligatures to $fname"
sed -i '' 's/sh//g;s/st/ſt/g;s/si//g;s/ss//g;s/ssi//g;s/sst//g;s/ff/ff/g;s/fi/fi/g;s/fl/fl/g;s/ffi/ffi/g;s/ct//g;s/ae/æ/g;' ${infile[$i]}-lig/$fname
#add the long-s to only the first half of the page count
if [ $cnt -lt $(($total/2)) ]
then
echo "adding long-s to $fname"
sed -i '' 's/s/ſ/g' ${infile[$i]}-lig/$fname
fi
cnt=$((cnt+1))
echo "#####################################"
done
done