-
Notifications
You must be signed in to change notification settings - Fork 7
/
08__linguist_and_cloc__02__extract.sh
executable file
·133 lines (115 loc) · 4.12 KB
/
08__linguist_and_cloc__02__extract.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env bash
# Copyright 2019-2024 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
##############################################################################################################
# Extract key results from the files generated by ...
# "GitHub Linguist" - https://github.com/github/linguist
# "Count Lines of Code" (CLOC) - https://github.com/AlDanial/cloc
##############################################################################################################
# ----- Please adjust
# ------ Do not modify
VERSION="${LINGUIST_VERSION}"
STEP=$(get_step)
BASE_DIR="${REPORTS_DIR}/${STEP}__LINGUIST"
export LOG_FILE="${BASE_DIR}.log"
INPUT_CLOC_FILE="${BASE_DIR}/_CLOC_results_extracted.csv"
INPUT_LINGUSIT_FILE="${BASE_DIR}/_LINGUIST_results_extracted.csv"
OUTPUT_CLOC_FILE="${BASE_DIR}/_CLOC_results_generated.txt"
OUTPUT_LINGUIST_FILE="${BASE_DIR}/_LINGUIST_results_generated.txt"
function tokenize() {
for WORD in $1; do echo "${WORD}"; done
}
# This method consolidates the CLOC results and prepares them for their visualisation.
function generate_csv_data__CLOC() {
INPUT_FILE="${1}"
OUTPUT_FILE="${2}"
log_console_info "Generate CLOC CSV file"
INDEX_FIRST_TECH=3
ALL_MIXED_LANGS=$(cut -d';' -f${INDEX_FIRST_TECH}- <"${INPUT_FILE}" | tr ' ' '_' | sed 's/\([^\;]*\;[^\;]*\);/\1\ /g' | tr -d '\n')
ALL_LANGS=$(tokenize "${ALL_MIXED_LANGS}" | cut -d';' -f1 | sort | uniq)
HEADER="App"
# /!\ DO NOT DOUBLE QUOTE ${ALL_LANGS}
for ALL_LANG in ${ALL_LANGS}; do
HEADER+=",${ALL_LANG}"
done
echo "${HEADER}" >"${OUTPUT_FILE}"
while read -r LINE; do
VALUES=()
#GROUP=$(echo $LINE | cut -d';' -f1)
APP=$(echo "${LINE}" | cut -d';' -f2)
LANGS=$(echo "${LINE}" | cut -d';' -f${INDEX_FIRST_TECH}- | sed 's/\([^\;]*\;[^\;]*\);/\1\ /g')
# /!\ DO NOT DOUBLE QUOTE ${ALL_LANGS}
for ALL_LANG in ${ALL_LANGS}; do
if echo "${LINE}" | grep -q -v "${ALL_LANG};"; then
VALUES+=("0")
continue
else
TOTAL=$(echo " $LANGS" | grep -o " ${ALL_LANG};[0-9.]*" | cut -d';' -f2 | tr -d '\n')
VALUES+=("${TOTAL}")
fi
done
ENTRY="${APP}"
for VALUE in "${VALUES[@]}"; do
ENTRY="${ENTRY}","${VALUE}"
done
echo "${ENTRY}" >>"${OUTPUT_FILE}"
done < <(sort -r <"${INPUT_FILE}" | awk 'NF' | uniq)
}
function generate_csv_data__LINGUIST() {
INPUT_FILE="${1}"
OUTPUT_FILE="${2}"
log_console_info "Generate Linguist CSV file"
ALL_MIXED_LANGS=$(cut -d';' -f7- <"${INPUT_FILE}" | tr ' ' '_' | sed 's/\([^\;]*\;[^\;]*\);/\1\ /g' | tr -d '\n')
ALL_LANGS=$(tokenize "${ALL_MIXED_LANGS}" | cut -d';' -f1 | sort | uniq)
HEADER="App"
# /!\ DO NOT DOUBLE QUOTE ${ALL_LANGS}
for ALL_LANG in ${ALL_LANGS}; do
HEADER+=",${ALL_LANG}"
done
echo "${HEADER},Other" >"${OUTPUT_FILE}"
while read -r LINE; do
#GROUP=$(echo $LINE | cut -d';' -f1)
#STATS=$(echo "${LINE}" | cut -d';' -f3-5)
APP=$(echo "${LINE}" | cut -d';' -f2)
VALUES=()
LOCS=$(echo "${LINE}" | cut -d';' -f6)
LANGS=$(echo "${LINE}" | cut -d';' -f7- | sed 's/\([^\;]*\;[^\;]*\);/\1\ /g')
# /!\ DO NOT DOUBLE QUOTE ${ALL_LANGS}
NO_MATCH="true"
for ALL_LANG in ${ALL_LANGS}; do
if echo "${LINE}" | grep -q -v "${ALL_LANG};"; then
VALUES+=("0")
continue
else
PERCENT=$(echo " $LANGS" | grep -o " ${ALL_LANG};[0-9.]*%" | cut -d';' -f2 | tr -d '%')
TOTAL=$(awk -v locs="${LOCS}" -v pct="${PERCENT}" 'BEGIN{total=(locs*pct/100); printf "%.0f\n", total}')
VALUES+=("${TOTAL}")
NO_MATCH="false"
fi
done
# Adding entry for "Other" category
if [[ "${NO_MATCH}" == "true" ]]; then
VALUES+=("${LOCS}")
else
VALUES+=("0")
fi
ENTRY="${APP}"
for VALUE in "${VALUES[@]}"; do
ENTRY="${ENTRY}","${VALUE}"
done
echo "${ENTRY}" >>"${OUTPUT_FILE}"
done < <(sort -r <"${INPUT_FILE}" | awk 'NF' | uniq)
}
function main() {
if [[ -f "${INPUT_CLOC_FILE}" ]]; then
generate_csv_data__CLOC "${INPUT_CLOC_FILE}" "${OUTPUT_CLOC_FILE}"
else
log_console_error "CLOC result file missing: ${INPUT_CLOC_FILE}"
fi
if [[ -f "${INPUT_LINGUSIT_FILE}" ]]; then
generate_csv_data__LINGUIST "${INPUT_LINGUSIT_FILE}" "${OUTPUT_LINGUIST_FILE}"
else
log_console_error "Linguist result file missing: ${INPUT_LINGUSIT_FILE}"
fi
}
main