Skip to content

Commit

Permalink
#80 discover fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
yegor256 committed Sep 27, 2023
1 parent e4c5342 commit 1a2f1be
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 20 deletions.
7 changes: 5 additions & 2 deletions steps/clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ rm -rf "${jobs}"
mkdir -p "$(dirname "${jobs}")"
touch "${jobs}"

total=$(wc -l < "${TARGET}/repositories.csv" | xargs)
repos="${TARGET}/temp/repositories.txt"
mkdir -p "$(dirname "${repos}")"
tail -n +2 "${TARGET}/repositories.csv" > "${repos}"
total=$(wc -l < "${repos}" | xargs)

declare -i repo=0
sh="$(dirname "$0")/clone-repo.sh"
Expand All @@ -42,7 +45,7 @@ while IFS=',' read -r r tag; do
else
printf "%s %s %s %s %s\n" "${sh@Q}" "${r@Q}" "${tag@Q}" "${repo@Q}" "${total@Q}" >> "${jobs}"
fi
done < "${TARGET}/repositories.csv"
done < "${repos}"

"${LOCAL}/help/parallel.sh" "${jobs}" 8
wait
Expand Down
34 changes: 22 additions & 12 deletions discover-repos.rb → steps/discover-repos.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
o.integer '--min-stars', 'Minimum GitHub stars in each repo', default: max
o.integer '--max-stars', 'Maximum GitHub stars in each repo', default: 100_000
o.integer '--min-size', 'Minimum size of GitHub repo, in Kb', default: 100
o.string '--path', 'The file name to save the list to', required: true
o.string '--csv', 'The file name to save the list to', required: true
o.string '--tex', 'The file name to save LaTeX summary of the operation', required: true
o.on '--help' do
puts o
Expand All @@ -54,7 +54,7 @@
github = Octokit::Client.new(access_token: opts[:token])
puts 'Accessing GitHub with personal access token!'
end
names = Set.new
found = {}
page = 0
query = [
"stars:#{opts['min-stars']}..#{opts['max-stars']}",
Expand All @@ -73,20 +73,28 @@
end
json = github.search_repositories(query, per_page: size, page: page)
json[:items].each do |i|
names << i[:full_name]
puts "Found #{i[:full_name].inspect} GitHub repo ##{names.count} \
found[i[:full_name]] = {
full_name: i[:full_name],
default_branch: i[:default_branch],
stars: i[:stargazers_count],
forks: i[:forks_count],
created_at: i[:created_at].iso8601,
size: i[:size],
open_issues_count: i[:open_issues_count]
}
puts "Found #{i[:full_name].inspect} GitHub repo ##{found.count} \
(#{i[:forks_count]} forks, #{i[:stargazers_count]} stars)"
end
puts "Found #{json[:items].count} repositories in page #{page}"
break if names.count >= opts[:total]
puts "Let\'s sleep for a few seconds to cool off GitHub API (already found #{names.count} repos)..."
break if found.count >= opts[:total]
puts "Let\'s sleep for a few seconds to cool off GitHub API (already found #{found.count} repos)..."
sleep 10
page += 1
end
puts "Found #{names.count} total repositories in GitHub"
puts "Found #{found.count} total repositories in GitHub"

if names.count > opts[:total]
names = names.first(opts[:total])
if found.count > opts[:total]
found = found.first(opts[:total])
puts "We will use only the first #{opts[:total]} repositories"
end

Expand All @@ -102,7 +110,9 @@
].join
)

path = File.expand_path(opts[:path])
path = File.expand_path(opts[:csv])
FileUtils.mkdir_p(File.dirname(path))
File.write(path, "#{names.join("\n")}\n")
puts "The list of #{names.count} repos saved into #{path}"
File.write(path,
([found.first[1].keys.join(',')] + found.values.map { |m| m.values.join(',') }).join("\n")
)
puts "The list of #{found.count} repos saved into #{path}"
4 changes: 2 additions & 2 deletions steps/discover.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ elif [ -n "${REPO}" ]; then
echo "${REPO}" >> "${csv}"
elif [ -z "${REPOS}" ] || [ ! -e "${REPOS}" ]; then
echo "Using discover-repos.rb..."
ruby "${LOCAL}/discover-repos.rb" \
ruby "${LOCAL}/steps/discover-repos.rb" \
"--token=${TOKEN}" \
"--total=${TOTAL}" \
"--path=${csv}" \
"--csv=${csv}" \
"--tex=${TARGET}/temp/repo-details.tex" \
"--min-stars=400" \
"--max-stars=10000"
Expand Down
4 changes: 2 additions & 2 deletions tests/steps/test-clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ set -o pipefail

temp=$1

echo "yegor256/jaxec" > "${TARGET}/repositories.csv"
echo -e "name\nyegor256/jaxec" > "${TARGET}/repositories.csv"
rm -rf "${TARGET}/github"
"${LOCAL}/steps/clone.sh" >/dev/null
test -e "${TARGET}/github/yegor256/jaxec/pom.xml"
echo "👍🏻 A repo cloned correctly"

TARGET="${TARGET}/another/ж\"' () привет /t"
mkdir -p "${TARGET}"
echo "yegor256/jaxec" > "${TARGET}/repositories.csv"
echo -e "name\nyegor256/jaxec" > "${TARGET}/repositories.csv"
rm -rf "${TARGET}/github"
"${LOCAL}/steps/clone.sh" >/dev/null
test -e "${TARGET}/github/yegor256/jaxec/pom.xml"
Expand Down
36 changes: 36 additions & 0 deletions tests/steps/test-discover-repos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# The MIT License (MIT)
#
# Copyright (c) 2021-2023 Yegor Bugayenko
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set -e
set -o pipefail

csv=${TARGET}/foo.csv
tex=${TARGET}/foo.tex

rm -f "${csv}"
stdout=$("${LOCAL}/steps/discover-repos.rb" --total=3 --min-stars=100 --max-stars=1000 "--csv=${csv}" "--tex=${tex}")
echo "${stdout}" | grep "Found 3 repositories in page 0" >/dev/null
echo "${stdout}" | grep "Found 3 total repositories in GitHub" >/dev/null
test -e "${csv}"
test -e "${tex}"
test $(wc -l < "${csv}" | xargs) = '3'
echo "👍🏻 A few repositories discovered correctly"
2 changes: 1 addition & 1 deletion tests/steps/test-jpeek-repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ set -o pipefail
temp=$1

repo="yegor256/jaxec"
echo "${repo}" > "${TARGET}/repositories.csv"
echo -e "name\n${repo}" > "${TARGET}/repositories.csv"
rm -rf "${TARGET}/github"
"${LOCAL}/steps/clone.sh" >/dev/null
msg=$("${LOCAL}/steps/jpeek-repo.sh" "${repo}" 1 1)
Expand Down
2 changes: 1 addition & 1 deletion tests/steps/test-jpeek.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ set -o pipefail
temp=$1

repo="yegor256/jaxec"
echo "${repo}" > "${TARGET}/repositories.csv"
echo -e "name\n${repo}" > "${TARGET}/repositories.csv"
rm -rf "${TARGET}/github"
"${LOCAL}/steps/clone.sh" >/dev/null
msg=$("${LOCAL}/steps/jpeek.sh")
Expand Down

0 comments on commit 1a2f1be

Please sign in to comment.