Skip to content

build

build #2070

Workflow file for this run

name: build
on:
pull_request: {}
push:
branches:
- main
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
dataset:
description: "Dataset Size"
required: false
default: "small"
type: choice
options:
- tiny
- small
- large
- mnist
- openorca
- used-cars
- youtube-trending
- meta-kaggle
- chest-xray-pneumonia
revs:
description: "Comma-separated list of DVC revisions"
required: false
default: ""
type: string
clouds:
description: "Run s3/gs/azure benchmarks"
required: false
default: false
type: boolean
env:
DVC_TEST: "true"
FORCE_COLOR: "1"
DATASET: ${{ (github.event_name == 'schedule' && 'mnist') || github.event.inputs.dataset || 'small' }}
REVS: ${{ github.event.inputs.revs || 'main,3.53.2,3.10.0,2.58.2' }}
# run on small set of revisions for clouds
CLOUD_REVS: ${{ github.event.inputs.revs || 'main,3.53.2' }}
DVC_REPOSITORY: iterative/dvc
DVC_REF: simplify-benchmarks
DVC_AZURE_REPOSITORY: iterative/dvc-azure
DVC_AZURE_REF: main
DVC_GS_REPOSITORY: iterative/dvc-gs
DVC_GS_REF: main
DVC_S3_REPOSITORY: iterative/dvc-s3
DVC_S3_REF: main
UV_SYSTEM_PYTHON: true
permissions:
contents: read
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: requirements.txt
- name: install requirements
run: uv pip install -r requirements.txt
gen:
runs-on: ubuntu-latest
outputs:
tests: ${{ steps.tests.outputs.tests }}
azure-tests: ${{ steps.azure-tests.outputs.azure-tests }}
gs-tests: ${{ steps.gs-tests.outputs.gs-tests }}
s3-tests: ${{ steps.s3-tests.outputs.s3-tests }}
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_REPOSITORY }}
ref: ${{ env.DVC_REF }}
path: dvc
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_AZURE_REPOSITORY }}
ref: ${{ env.DVC_AZURE_REF }}
path: dvc-azure
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_GS_REPOSITORY }}
ref: ${{ env.DVC_GS_REF }}
path: dvc-gs
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_S3_REPOSITORY }}
ref: ${{ env.DVC_S3_REF }}
path: dvc-s3
fetch-depth: 0
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: |
dvc/pyproject.toml
dvc-azure/pyproject.toml
dvc-gs/pyproject.toml
dvc-s3/pyproject.toml
- name: install reqs
run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" "./dvc-gs[tests]" "./dvc-s3[tests]"
- id: tests
working-directory: dvc/
run: echo "tests=$(../scripts/ci/list_tests.sh dvc/testing/benchmarks)" >> $GITHUB_OUTPUT
- id: azure-tests
working-directory: dvc-azure/
run: echo "azure-tests=$(../scripts/ci/list_tests.sh dvc_azure/tests/benchmarks.py)" >> $GITHUB_OUTPUT
- id: gs-tests
working-directory: dvc-gs/
run: echo "gs-tests=$(../scripts/ci/list_tests.sh dvc_gs/tests/benchmarks.py)" >> $GITHUB_OUTPUT
- id: s3-tests
working-directory: dvc-s3/
run: echo "s3-tests=$(../scripts/ci/list_tests.sh dvc_s3/tests/benchmarks.py)" >> $GITHUB_OUTPUT
build:
needs: [gen]
timeout-minutes: 180
name: run ${{ matrix.test.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
test: ${{fromJson(needs.gen.outputs.tests)}}
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/checkout@v4
with:
path: dvc-bench
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_REPOSITORY }}
ref: ${{ env.DVC_REF }}
path: dvc
fetch-depth: 0
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: dvc/pyproject.toml
- name: install requirements
run: uv pip install "./dvc[tests]"
- uses: actions/cache/restore@v4
id: restore-cache
with:
path: .dvc/cache
key: ${{ env.DATASET }}
- name: run benchmarks
shell: bash
working-directory: dvc/
run: pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func --dvc-revs ${REVS} ${{ matrix.test.path }} --dataset ${DATASET} --dvc-bench-repo ../dvc-bench --dvc-repo $(pwd) -W ignore
- if: ${{ steps.restore-cache.outputs.cache-hit != 'true' && matrix.test.name == 'test_add_copy' }}
uses: actions/cache/save@v4
with:
path: .dvc/cache
key: ${{ steps.restore-cache.outputs.cache-primary-key }}
- name: upload raw results
uses: actions/upload-artifact@v4
with:
name: .benchmarks-${{ matrix.test.name }}
path: .benchmarks
build_s3:
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }}
needs: [gen]
strategy:
fail-fast: false
matrix:
test: ${{fromJson(needs.gen.outputs.s3-tests)}}
runs-on: ubuntu-latest
name: run ${{ matrix.test.name }}
timeout-minutes: 480
continue-on-error: true
permissions:
id-token: write
steps:
- name: configure AWS credentials
if: ${{ github.event_name == 'schedule' }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::342840881361:role/dvc-bench-gha
role-duration-seconds: 28800
aws-region: us-east-1
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/checkout@v4
with:
path: dvc-bench
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_REPOSITORY }}
ref: ${{ env.DVC_REF }}
path: dvc
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_S3_REPOSITORY }}
ref: ${{ env.DVC_S3_REF }}
path: dvc-s3
fetch-depth: 0
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: |
dvc/pyproject.toml
dvc-s3/pyproject.toml
- name: install requirements
run: uv pip install "./dvc[tests]" "./dvc-s3[tests]"
- uses: actions/cache/restore@v4
with:
path: .dvc/cache
key: ${{ env.DATASET }}
- name: configure real S3 DVC env
if: ${{ github.event_name == 'schedule' }}
run: |
echo "DVC_TEST_AWS_REPO_BUCKET=dvc-bench-ci" >> "$GITHUB_ENV"
- name: run benchmarks
shell: bash
working-directory: dvc-s3/
run: pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${CLOUD_REVS} --dvc-install-deps s3 --dataset ${DATASET} --dvc-bench-repo ../dvc-bench ${{ matrix.test.path }} --dvc-repo ../dvc -W ignore
- name: upload raw results
uses: actions/upload-artifact@v4
with:
name: .benchmarks-${{ matrix.test.name }}
path: .benchmarks
build_azure:
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }}
needs: [gen]
strategy:
fail-fast: false
matrix:
test: ${{fromJson(needs.gen.outputs.azure-tests)}}
runs-on: ubuntu-latest
name: run ${{ matrix.test.name }}
timeout-minutes: 480
continue-on-error: true
permissions:
id-token: write
environment: ${{ github.event_name == 'schedule' && 'github-actions' || ''}}
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/checkout@v4
with:
path: dvc-bench
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_REPOSITORY }}
ref: ${{ env.DVC_REF }}
path: dvc
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_AZURE_REPOSITORY }}
ref: ${{ env.DVC_AZURE_REF }}
path: dvc-azure
fetch-depth: 0
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: |
dvc/pyproject.toml
dvc-azure/pyproject.toml
- name: 'Az CLI login'
if: ${{ github.event_name == 'schedule' }}
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
- name: 'Set connection string'
if: ${{ github.event_name == 'schedule' }}
id: set-conn-str
run: |
connection_string=$(az storage account show-connection-string -g dvc-bench-ci -n dvcbenchci | jq -c '.connectionString')
echo "::add-mask::$connection_string"
echo "DVC_TEST_AZURE_CONNECTION_STRING=$connection_string" >> $GITHUB_ENV
- name: install requirements
run: uv pip install "./dvc[tests]" "./dvc-azure[tests]"
- uses: actions/cache/restore@v4
with:
path: .dvc/cache
key: ${{ env.DATASET }}
- name: setup env
if: ${{ github.event_name == 'schedule' }}
run: |
echo "DVC_TEST_AZURE_PATH=az://dvc-bench-ci" >> $GITHUB_ENV
- name: run benchmarks
shell: bash
working-directory: dvc-azure/
run: |
pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${CLOUD_REVS} --dvc-install-deps azure --dataset ${DATASET} --dvc-bench-repo ../dvc-bench ${{ matrix.test.path }} --dvc-repo ../dvc -W ignore
- name: upload raw results
uses: actions/upload-artifact@v4
with:
name: .benchmarks-${{ matrix.test.name }}
path: .benchmarks
build_gs:
if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }}
needs: [gen]
strategy:
fail-fast: false
matrix:
test: ${{fromJson(needs.gen.outputs.gs-tests)}}
runs-on: ubuntu-latest
name: run ${{ matrix.test.name }}
timeout-minutes: 480
continue-on-error: true
permissions:
id-token: write
steps:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/checkout@v4
with:
path: dvc-bench
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_REPOSITORY }}
ref: ${{ env.DVC_REF }}
path: dvc
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_GS_REPOSITORY }}
ref: ${{ env.DVC_GS_REF }}
path: dvc-gs
fetch-depth: 0
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: |
dvc/pyproject.toml
dvc-gs/pyproject.toml
- name: install reqs
run: uv pip install "./dvc[tests]" "./dvc-gs[tests]"
- uses: actions/cache/restore@v4
with:
path: .dvc/cache
key: ${{ env.DATASET }}
- id: 'auth'
if: ${{ github.event_name == 'schedule' }}
name: 'Authenticate to GCP'
uses: 'google-github-actions/auth@v2.1.5'
with:
create_credentials_file: true
workload_identity_provider: 'projects/385088528371/locations/global/workloadIdentityPools/iterative-sandbox/providers/github'
service_account: 'dvc-bench@iterative-sandbox.iam.gserviceaccount.com'
- name: configure real GS DVC env
if: ${{ github.event_name == 'schedule' }}
run: |
echo "DVC_TEST_GS_BUCKET=dvc-bench" >> "$GITHUB_ENV"
- name: run benchmarks
shell: bash
working-directory: dvc-gs/
run: pytest --benchmark-save ${{ matrix.test.name}} --benchmark-group-by func --dvc-revs ${CLOUD_REVS} --dvc-install-deps gs --dataset ${DATASET} --dvc-bench-repo ../dvc-bench ${{ matrix.test.path }} --dvc-repo ../dvc -W ignore
- name: upload raw results
uses: actions/upload-artifact@v4
with:
name: .benchmarks-${{ matrix.test.name }}
path: .benchmarks
notify:
if: github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main' && failure()
needs: [build, build_s3, build_azure, build_gs]
runs-on: ubuntu-latest
steps:
- name: Slack Notification
uses: rtCamp/action-slack-notify@v2.3.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: 'Benchmarks failed on main :confused_dog:'
SLACK_TITLE: ":dvc:"
SLACK_USERNAME: dvc-bench
compare:
name: join results and publish
needs: [build, build_s3, build_azure, build_gs]
if: always() && !contains(needs.*.result, 'failure')
runs-on: ubuntu-latest
environment: aws
permissions:
pages: write
pull-requests: write
contents: write
id-token: write
steps:
- uses: iterative/setup-cml@v3
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: ${{ env.DVC_REPOSITORY }}
ref: ${{ env.DVC_REF }}
path: dvc
fetch-depth: 0
- uses: hynek/setup-cached-uv@v2
with:
cache-dependency-path: |
requirements.txt
dvc/pyproject.toml
- name: install requirements
run: uv pip install -r requirements.txt "./dvc[testing]"
- name: download ubuntu results
uses: actions/download-artifact@v4
with:
pattern: .benchmarks-*
merge-multiple: true
path: .benchmarks
- name: compare results
shell: bash
run: ./scripts/ci/gen_html.sh
- name: configure AWS credentials
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::260760892802:role/dvc-bench
aws-region: us-east-2
- name: upload to s3
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
run: aws s3 cp html/index.html s3://dvc-bench/pages/${{ github.event_name }}/${{ github.run_id }}_${{ github.run_attempt }}.html
- name: generate pages
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
run: |
mv html/index.html html/latest.html
mkdir html/workflow_dispatch html/schedule
LATEST_SCHEDULE="$(aws s3 ls s3://dvc-bench/pages/schedule/ --recursive | sort -r | head -n 30 | awk '{print $4}')"
LATEST_DISPATCH="$(aws s3 ls s3://dvc-bench/pages/workflow_dispatch/ --recursive | sort -r | head -n 30 | awk '{print $4}')"
echo "$LATEST_SCHEDULE" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/schedule
echo "$LATEST_DISPATCH" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/workflow_dispatch
cp html/schedule/$(echo "$LATEST_SCHEDULE" | head -1 | xargs basename) html/schedule/latest.html
cp html/workflow_dispatch/$(echo "$LATEST_DISPATCH" | head -1 | xargs basename) html/workflow_dispatch/latest.html
- name: generate listings
uses: jayanta525/github-pages-directory-listing@v4.0.0
with:
FOLDER: html
- name: post comment
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
if: ${{ github.event_name == 'pull_request' && ! github.event.pull_request.head.repo.fork }}
run: |
cml comment update --watermark-title='dvc-bench report' report.md
- name: Setup Pages
id: pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: ./html
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
permissions:
pages: write
id-token: write
runs-on: ubuntu-latest
needs: compare
if: always() && !contains(needs.*.result, 'failure') && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4