Skip to content

New tasks and evaluation code for LLMs #527

New tasks and evaluation code for LLMs

New tasks and evaluation code for LLMs #527

Workflow file for this run

name: Main
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
on:
pull_request:
branches:
- main
push:
branches:
- main
tags:
- 'v*.*.*'
env:
# Change this to invalidate existing cache.
CACHE_PREFIX: v1
PYTHON_PATH: ./
DEFAULT_PYTHON: 3.9
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
BEAKER_WORKSPACE: ai2/catwalk-tests
BEAKER_IMAGE: petew/catwalk-testing # to rebuild this image, run 'make docker-testing'
jobs:
checks:
name: ${{ matrix.task.name }} (python ${{ matrix.python }})
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python: ['3.8', '3.9', '3.10']
task:
- name: Build
run: python setup.py check && python setup.py bdist_wheel sdist
- name: Type check
run: mypy .
- name: Style
run: |
isort --check catwalk
black --check catwalk
include:
- task:
name: Docs
run: cd docs && make html SPHINXOPTS="-W --keep-going"
python: '3.10'
steps:
- uses: actions/checkout@v3
- name: Setup Python environment
uses: ./.github/actions/setup-venv
with:
python-version: ${{ matrix.python }}
cache-prefix: ${{ env.CACHE_PREFIX }}
- name: ${{ matrix.task.name }}
run: |
. .venv/bin/activate
${{ matrix.task.run }}
- name: Upload package distribution files
if: matrix.task.name == 'Build' && matrix.python == env.DEFAULT_PYTHON
uses: actions/upload-artifact@v3
with:
name: package
path: dist
- name: Clean up
if: always()
run: |
. .venv/bin/activate
pip uninstall -y ai2-catwalk
tests:
name: Test - suite ${{ matrix.test_suite.name }}
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
test_suite:
- name: A
mark: "not suite_B and not suite_C and not suite_D"
- name: B
mark: "suite_B"
- name: C
mark: "suite_C"
- name: D
mark: "suite_D"
steps:
- name: Determine current commit SHA (pull request)
if: github.event_name == 'pull_request'
run: |
echo "COMMIT_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
- name: Determine current commit SHA (push)
if: github.event_name != 'pull_request'
run: |
echo "COMMIT_SHA=$GITHUB_SHA" >> $GITHUB_ENV
- name: Tests
uses: allenai/beaker-run-action@v1.1
with:
spec: |
version: v2
description: Catwalk tests - suite ${{ matrix.test_suite.name }}
tasks:
- name: tests
image:
beaker: ${{ env.BEAKER_IMAGE }}
envVars:
- name: COMMIT_SHA
value: ${{ env.COMMIT_SHA }}
command:
- "/entrypoint.sh"
- "pytest"
- "-v"
- "--forked"
- "-n4"
- "--durations=5"
- "--color=yes"
- "tests/"
- "-m"
- "${{ matrix.test_suite.mark }}"
constraints:
cluster:
- ai2/general-cirrascale
- ai2/allennlp-cirrascale
- ai2/aristo-cirrascale
- ai2/mosaic-cirrascale
- ai2/s2-cirrascale
context:
priority: preemptible
datasets:
- mountPath: /root/.cache
source:
hostPath: /net/nfs/allennlp/catwalk-cache
result:
path: /unused
token: ${{ secrets.BEAKER_TOKEN }}
workspace: ${{ env.BEAKER_WORKSPACE }}
release:
name: Release
runs-on: ubuntu-latest
needs: [checks]
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/checkout@v1
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install requirements
run: |
pip install --upgrade pip setuptools wheel
pip install -r dev-requirements.txt
- name: Prepare environment
run: |
echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
- name: Download package distribution files
uses: actions/download-artifact@v3
with:
name: package
path: dist
- name: Generate release notes
run: |
python scripts/release_notes.py > ${{ github.workspace }}-RELEASE_NOTES.md
- name: Publish package to PyPI
run: |
twine upload -u ${{ secrets.PYPI_USERNAME }} -p ${{ secrets.PYPI_PASSWORD }} dist/*
- name: Publish GitHub release
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
body_path: ${{ github.workspace }}-RELEASE_NOTES.md
prerelease: ${{ contains(env.TAG, 'rc') }}
files: |
dist/*