Skip to content

Commit

Permalink
generate-srcinfo: use native Python and git
Browse files Browse the repository at this point in the history
Should make things a bit faster, and more reliable.
  • Loading branch information
lazka committed Sep 12, 2021
1 parent fb74887 commit 696f844
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 55 deletions.
99 changes: 48 additions & 51 deletions .ci/ci-generate-srcinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@
import argparse
import os
import json
import shutil
from collections import OrderedDict
import hashlib
import time
import shlex
import subprocess
from concurrent.futures import ThreadPoolExecutor
from functools import partial

from typing import List, Iterator, Tuple, Dict, Optional, Union, Collection
from typing import List, Iterator, Tuple, Dict, Optional, Union, Collection, Sequence, Any


CacheEntry = Dict[str, Union[str, Collection[str]]]
Expand All @@ -50,21 +50,35 @@ def normalize_path(path: str) -> str:
return path.replace("\\", "/")


def check_output_retry(*args, **kwargs):
# XXX: git sometimes crashes when called concurrently,
# so we retry a few times..
run = 0
max_ = 5
while True:
try:
return subprocess.check_output(*args, **kwargs)
except subprocess.CalledProcessError as e:
if run <= max_ and e.returncode == 127:
time.sleep(0.1 * run)
run += 1
continue
else:
raise
def get_mingw_arch_list(msys2_root: str, dir: str, pkgbuild_path: str) -> List[str]:
assert not os.path.isabs(pkgbuild_path)
executable = os.path.join(msys2_root, 'usr', 'bin', 'bash.exe')
sub_commands = [
shlex.join(['source', pkgbuild_path]),
'echo -n "${mingw_arch[@]}"'
]
env = os.environ.copy()
env["CHERE_INVOKING"] = "1"
env["MSYSTEM"] = "MSYS"
env["MSYS2_PATH_TYPE"] = "minimal"
out = subprocess.check_output(
[executable, '-lc', ';'.join(sub_commands)], universal_newlines=True, env=env, cwd=dir)
arch_list = out.strip().split()
if not arch_list:
arch_list = ["mingw32", "mingw64", "ucrt64", "clang64"]
assert arch_list
return arch_list


def check_output_msys(msys2_root: str, args: Sequence[str], **kwargs: Any):
executable = os.path.join(msys2_root, 'usr', 'bin', 'bash.exe')
env = kwargs.pop("env", os.environ.copy())
env["CHERE_INVOKING"] = "1"
env["MSYSTEM"] = "MSYS"
env["MSYS2_PATH_TYPE"] = "minimal"
return subprocess.check_output(
[executable, '-lce'] + [shlex.join([str(a) for a in args])],
env=env, **kwargs)


def get_cache_key(pkgbuild_path: str) -> str:
Expand All @@ -76,12 +90,12 @@ def get_cache_key(pkgbuild_path: str) -> str:
with open(pkgbuild_path, "rb") as f:
h.update(f.read())

fileinfo = check_output_retry(
fileinfo = subprocess.check_output(
["git", "ls-files", "-s", "--full-name", git_path],
cwd=git_cwd).decode("utf-8").strip()
h.update(normalize_path(fileinfo).encode("utf-8"))

repo = check_output_retry(
repo = subprocess.check_output(
["git", "ls-remote", "--get-url", "origin"],
cwd=git_cwd).decode("utf-8").strip()
repo = normalize_repo(repo)
Expand All @@ -90,63 +104,45 @@ def get_cache_key(pkgbuild_path: str) -> str:
return h.hexdigest()


def get_mingw_arch_list(pkgbuild_path: str) -> List[str]:
bash = shutil.which("bash")
assert bash is not None
sub_commands = [
shlex.join(['source', pkgbuild_path]),
'echo -n "${mingw_arch[@]}"'
]
out = subprocess.check_output([bash, '-c', ';'.join(sub_commands)], universal_newlines=True)
arch_list = out.strip().split()
if not arch_list:
arch_list = ["mingw32", "mingw64", "ucrt64", "clang64"]
assert arch_list
return arch_list


def get_srcinfo_for_pkgbuild(args: Tuple[str, str]) -> Optional[CacheTuple]:
def get_srcinfo_for_pkgbuild(msys2_root: str, args: Tuple[str, str]) -> Optional[CacheTuple]:
pkgbuild_path, mode = args
pkgbuild_path = os.path.abspath(pkgbuild_path)
git_cwd = os.path.dirname(pkgbuild_path)
git_path = os.path.relpath(pkgbuild_path, git_cwd)
key = get_cache_key(pkgbuild_path)

bash = shutil.which("bash")
if bash is None:
print("ERROR: bash not found")
return None

print("Parsing %r" % pkgbuild_path)
try:
srcinfos = {}

if mode == "mingw":
for name in get_mingw_arch_list(pkgbuild_path):
for name in get_mingw_arch_list(msys2_root, git_cwd, git_path):
env = os.environ.copy()
env["MINGW_ARCH"] = name
srcinfos[name] = subprocess.check_output(
[bash, "/usr/bin/makepkg-mingw",
srcinfos[name] = check_output_msys(
msys2_root,
["/usr/bin/makepkg-mingw",
"--printsrcinfo", "-p", git_path],
cwd=git_cwd,
env=env).decode("utf-8")
else:
srcinfos["msys"] = subprocess.check_output(
[bash, "/usr/bin/makepkg",
srcinfos["msys"] = check_output_msys(
msys2_root,
["/usr/bin/makepkg",
"--printsrcinfo", "-p", git_path],
cwd=git_cwd).decode("utf-8")

repo = check_output_retry(
repo = subprocess.check_output(
["git", "ls-remote", "--get-url", "origin"],
cwd=git_cwd).decode("utf-8").strip()
repo = normalize_repo(repo)

relpath = check_output_retry(
relpath = subprocess.check_output(
["git", "ls-files", "--full-name", git_path],
cwd=git_cwd).decode("utf-8").strip()
relpath = normalize_path(os.path.dirname(relpath))

date = check_output_retry(
date = subprocess.check_output(
["git", "log", "-1", "--format=%aI", git_path],
cwd=git_cwd).decode("utf-8").strip()

Expand Down Expand Up @@ -179,7 +175,7 @@ def get_srcinfo_from_cache(args: Tuple[str, Cache]) -> Tuple[str, Optional[Cache
return (pkgbuild_path, None)


def iter_srcinfo(repo_path: str, mode: str, cache: Cache) -> Iterator[Optional[CacheTuple]]:
def iter_srcinfo(msys2_root: str, repo_path: str, mode: str, cache: Cache) -> Iterator[Optional[CacheTuple]]:
with ThreadPoolExecutor() as executor:
to_parse: List[Tuple[str, str]] = []
pool_iter = executor.map(
Expand All @@ -191,13 +187,14 @@ def iter_srcinfo(repo_path: str, mode: str, cache: Cache) -> Iterator[Optional[C
to_parse.append((pkgbuild_path, mode))

print("Parsing PKGBUILD files...")
for srcinfo in executor.map(get_srcinfo_for_pkgbuild, to_parse):
for srcinfo in executor.map(partial(get_srcinfo_for_pkgbuild, msys2_root), to_parse):
yield srcinfo


def main(argv: List[str]) -> Optional[Union[int, str]]:
parser = argparse.ArgumentParser(description="Create SRCINFOs for all packages in a repo", allow_abbrev=False)
parser.add_argument('mode', choices=['msys', 'mingw'], help="The type of the repo")
parser.add_argument("msys2_root", help="The path to MSYS2")
parser.add_argument("repo_path", help="The path to GIT repo")
parser.add_argument("json_cache", help="The path to the json file used to fetch/store the results")
parser.add_argument("--time-limit", action="store",
Expand All @@ -216,7 +213,7 @@ def main(argv: List[str]) -> Optional[Union[int, str]]:
pass

srcinfos = []
for entry in iter_srcinfo(args.repo_path, args.mode, cache):
for entry in iter_srcinfo(args.msys2_root, args.repo_path, args.mode, cache):
if entry is None:
continue
srcinfos.append(entry)
Expand Down
16 changes: 12 additions & 4 deletions .github/workflows/generate-srcinfo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,27 @@ jobs:
with:
fetch-depth: 0

- uses: actions/setup-python@v2
with:
python-version: '3.9'

- uses: msys2/setup-msys2@v2
with:
msystem: MSYS
install: python git binutils
install: binutils
update: true

- run: |
- shell: msys2 {0}
run: |
# XXX: linting PKGBUILDs takes a loooong time, this skips it
sed -i s/^lint_pkgbuild/#lint_pkgbuild/g /usr/bin/makepkg
# XXX: we don't need the toolchains for --printsrcinfo, but makepkg-mingw complains if gcc doesn't exist
touch /mingw64/bin/gcc.exe /mingw32/bin/gcc.exe /ucrt64/bin/gcc.exe /clang64/bin/clang.exe /clang32/bin/clang.exe
touch /mingw64/bin/gcc.exe /mingw32/bin/gcc.exe /ucrt64/bin/gcc.exe /clang64/bin/clang.exe /clang32/bin/clang.exe /clangarm64/bin/clang.exe
curl --fail -L --retry 5 -o srcinfo.json "https://github.com/$GITHUB_REPOSITORY/releases/download/srcinfo-cache/srcinfo.json"
python -u .ci/ci-generate-srcinfo.py --time-limit 19800 mingw . srcinfo.json
- run: |
$MSYS2_ROOT=(msys2 -c 'cygpath -w /')
python -u .ci/ci-generate-srcinfo.py --time-limit 19800 mingw "$MSYS2_ROOT" . srcinfo.json
- uses: actions/upload-artifact@v2
with:
Expand Down

0 comments on commit 696f844

Please sign in to comment.