Skip to content

Commit

Permalink
Fixes to SDXL accuracy output, submission preprocessor, fixes #1938 (#…
Browse files Browse the repository at this point in the history
…1948)

* Update generate_final_report.py

* Fix sdxl (#1911)

* Fix typo in fid_score.py, fail_safe for SDXL short runs

* [Automated Commit] Format Codebase

* Fix typo in fid_score.py, fail_safe for SDXL short runs

* Fix dlrmv2 reference implementation | Update run_local.sh

* Fixes for filtering invalid results

* [Automated Commit] Format Codebase

* Update preprocess_submission.py

* Added an option to pass in sample_ids.txt for SDXL accuracy check

* [Automated Commit] Format Codebase

* Update accuracy_coco.py

* [Automated Commit] Format Codebase

* Fix typo

* Not use default for sample_ids.txt

* Update requirements.txt (#1907)

Updating the pip packages

* Fix preprocess_sudbmission for a bug

* Update submission_checker.py | Removed TEST05

* Fix to SDXL accuracy output

* Added exists checks for rmtree in preprocess_submission script

* [Automated Commit] Format Codebase

* Delete .github/workflows/format.yml

* Delete .github/scripts directory

* Update build_wheels.yml | Added src distribution

* Update VERSION.txt

* Update build_wheels.yml

* Update VERSION.txt

* Update pyproject.toml

* Increment version to 4.1.26

* Update MANIFEST.in

* Increment version to 4.1.27

* Update pyproject.toml

* Increment version to 4.1.28

* Update build_wheels.yml

* Update VERSION.txt

* Update accuracy_coco.py

* Making sdxl run thread safe

* Create format.yml | Run format on push instead of PR

---------

Co-authored-by: arjunsuresh <arjunsuresh@users.noreply.github.com>
  • Loading branch information
arjunsuresh and arjunsuresh authored Dec 4, 2024
1 parent feca9f6 commit b6f5a34
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 126 deletions.
26 changes: 0 additions & 26 deletions .github/scripts/format-cpp.sh

This file was deleted.

26 changes: 0 additions & 26 deletions .github/scripts/format-py.sh

This file was deleted.

19 changes: 15 additions & 4 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,20 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-latest, windows-latest, macos-latest, macos-13]

steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v3

- name: Install requirements
run: python -m pip install cibuildwheel twine
run: python -m pip install cibuildwheel twine build

- name: Build src dist
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
python -m build --sdist --outdir wheels loadgen
- name: Build wheels
run: git pull && python -m cibuildwheel loadgen/ --output-dir wheels
Expand Down Expand Up @@ -110,12 +115,18 @@ jobs:
with:
name: wheels-ubuntu-latest
path: wheels
# Download the built wheels from macOS
- name: Download macOS wheels
# Download the built wheels from macOS-latest
- name: Download macOS-latest wheels
uses: actions/download-artifact@v4
with:
name: wheels-macos-latest
path: wheels
# Download the built wheels from macOS-13 (x86)
- name: Download macOS-13 (x86) wheels
uses: actions/download-artifact@v4
with:
name: wheels-macos-13
path: wheels
# Download the built wheels from Windows
- name: Download Windows wheels
uses: actions/download-artifact@v4
Expand Down
58 changes: 34 additions & 24 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
@@ -1,50 +1,60 @@
# Automatic code formatting
name: "format"
name: "Code formatting"
on:
pull_request:
branches: [ master ]
types: [opened, closed, synchronize]

push:
branches:
- "**"

env:
python_version: "3.9"
HEAD_REF: ${{ github.head_ref }}

jobs:
format-code:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
ssh-key: ${{ secrets.DEPLOY_KEY }}
- name: Set up Python ${{ env.python_version }}
uses: actions/setup-python@v3
with:
python-version: ${{ env.python_version }}

- name: Install dependencies
- name: Format modified python files
env:
filter: ${{ github.event.before }}
run: |
python3 -m pip install autopep8
for FILE in $(git diff --name-only $filter | grep -E '.*\.py$')
do
# Check if the file still exists in the working tree
if [ -f "$FILE" ]; then
autopep8 --in-place -a "$FILE"
git add "$FILE"
fi
done
- name: Grant permissions
run: |
chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-cpp.sh"
chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-py.sh"
- name: Format Codebase
- name: Format modified C++ files
env:
filter: ${{ github.event.before }}
run: |
git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }}
git fetch upstream ${{ github.event.pull_request.base.ref }}
".github/scripts/format-cpp.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
".github/scripts/format-py.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
for FILE in $(git diff --name-only $filter | grep -E '.*\.(cc|cpp|h|hpp)$')
do
# Check if the file still exists in the working tree
if [ -f "$FILE" ]; then
clang-format -i -style=file $FILE
git add $FILE
fi
done
- name: Commit
- name: Commit and push changes
run: |
HAS_CHANGES=$(git diff --staged --name-only)
if [ ${#HAS_CHANGES} -gt 0 ]; then
git checkout -B "$HEAD_REF"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
git config --global user.name "${{ github.actor }}"
git config --global user.name mlcommons-bot
git config --global user.email "mlcommons-bot@users.noreply.github.com"
# Commit changes
git commit -m '[Automated Commit] Format Codebase'
git push origin "$HEAD_REF"
fi
git push
fi
1 change: 1 addition & 0 deletions loadgen/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include VERSION.txt
include mlperf.conf
2 changes: 1 addition & 1 deletion loadgen/VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.1.24
4.1.29
23 changes: 13 additions & 10 deletions text_to_image/backend_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import backend
from diffusers import StableDiffusionXLPipeline
from diffusers import EulerDiscreteScheduler
import threading

logging.basicConfig(level=logging.INFO)
log = logging.getLogger("backend-pytorch")
Expand All @@ -24,6 +25,7 @@ def __init__(
):
super(BackendPytorch, self).__init__()
self.model_path = model_path
self.lock = threading.Lock()
if model_id == "xl":
self.model_id = "stabilityai/stable-diffusion-xl-base-1.0"
else:
Expand Down Expand Up @@ -385,15 +387,16 @@ def predict(self, inputs):
pooled_prompt_embeds,
negative_pooled_prompt_embeds,
) = self.prepare_inputs(inputs, i)
generated = self.pipe(
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
guidance_scale=self.guidance,
num_inference_steps=self.steps,
output_type="pt",
latents=latents_input,
).images
with lock:
generated = self.pipe(
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
guidance_scale=self.guidance,
num_inference_steps=self.steps,
output_type="pt",
latents=latents_input,
).images
images.extend(generated)
return images
28 changes: 15 additions & 13 deletions text_to_image/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def run_one_item(self, qitem: Item):
log.error("thread: failed on contentid=%s, %s", src, ex)
# since post_process will not run, fake empty responses
processed_results = [[]] * len(qitem.query_id)
raise
finally:
response_array_refs = []
response = []
Expand Down Expand Up @@ -402,19 +403,20 @@ def main():
#
count = ds.get_item_count()

# warmup
syntetic_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit"
latents_pt = torch.rand(ds.latents.shape, dtype=dtype).to(args.device)
warmup_samples = [
{
"input_tokens": ds.preprocess(syntetic_str, model.pipe.tokenizer),
"input_tokens_2": ds.preprocess(syntetic_str, model.pipe.tokenizer_2),
"latents": latents_pt,
}
for _ in range(args.max_batchsize)
]
for i in range(5):
_ = backend.predict(warmup_samples)
if os.environ.get('FORCE_NO_WARMUP', '').lower() not in [ "yes", "true", "1" ]:
# warmup
syntetic_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit"
latents_pt = torch.rand(ds.latents.shape, dtype=dtype).to(args.device)
warmup_samples = [
{
"input_tokens": ds.preprocess(syntetic_str, model.pipe.tokenizer),
"input_tokens_2": ds.preprocess(syntetic_str, model.pipe.tokenizer_2),
"latents": latents_pt,
}
for _ in range(args.max_batchsize)
]
for i in range(5):
_ = backend.predict(warmup_samples)

scenario = SCENARIO_MAP[args.scenario]
runner_map = {
Expand Down
8 changes: 4 additions & 4 deletions text_to_image/tools/accuracy_coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ def compute_accuracy(
generated_img).item())
fid_score = compute_fid(result_list, statistics_path, device)

result_dict["FID_SCORE"] = fid_score
result_dict["CLIP_SCORE"] = np.mean(clip_scores)
result_dict["FID_SCORE"] = f"{fid_score}"
result_dict["CLIP_SCORE"] = f"{np.mean(clip_scores)}"
print(f"Accuracy Results: {result_dict}")

with open(output_file, "w") as fp:
Expand Down Expand Up @@ -311,8 +311,8 @@ def compute_accuracy_low_memory(

fid_score = calculate_frechet_distance(m1, s1, m2, s2)

result_dict["FID_SCORE"] = fid_score
result_dict["CLIP_SCORE"] = np.mean(clip_scores)
result_dict["FID_SCORE"] = f"{fid_score}"
result_dict["CLIP_SCORE"] = f"{np.mean(clip_scores)}"
print(f"Accuracy Results: {result_dict}")

with open(output_file, "w") as fp:
Expand Down
4 changes: 4 additions & 0 deletions text_to_image/tools/fid/fid_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,10 @@ def compute_fid(
imgs, model, batch_size, dims, device, num_workers
)

# Ensure dimensions match before calculating FID
assert s1.shape == s2.shape, f"Covariance shapes mismatch: {s1.shape} vs {s2.shape}"
assert m1.shape == m2.shape, f"Mean shapes mismatch: {m1.shape} vs {m2.shape}"

fid_value = calculate_frechet_distance(m1, s1, m2, s2)

return fid_value
Loading

0 comments on commit b6f5a34

Please sign in to comment.