chore(🤖): Bump `NVIDIA/Megatron-LM` to `bed7dbd...` (2025-05-21) #14563

Workflow file for this run

.github/workflows/cicd-main.yml at 1b8d330

	# Copyright (c) 2025, NVIDIA CORPORATION.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	name: CICD NeMo
	on:
	schedule:
	- cron: 0 0 * * *
	pull_request:
	branches:
	- main
	- r**
	- weekly-bump*
	types: [labeled]
	workflow_dispatch:
	inputs:
	test_to_run:
	required: false
	default: all
	type: string
	description: Comma-separated list of tests to run. Use "all" to run the full test suite.

	concurrency:
	# group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id \|\| github.event.pull_request.number \|\| github.ref }}-${{ github.event_name }}
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}-${{ github.event.label.name \|\| 'main' }}-${{ github.event_name }}
	cancel-in-progress: true

	jobs:
	pre-flight:
	runs-on: ubuntu-latest
	outputs:
	test_to_run: ${{ steps.test_to_run.outputs.main }}
	is_ci_workload: ${{ steps.is_ci_workload.outputs.main }}
	no_fail_fast: ${{ steps.no_fail_fast.outputs.main }}
	components_to_run: ${{ steps.components_to_run.outputs.main }}
	env:
	TESTS_TO_RUN: ${{ inputs.test_to_run }}
	EVENT_NAME: ${{ github.event_name }}
	HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }}
	steps:
	- name: Checkout branch
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Select components to run
	id: components_to_run
	run: \|
	pip install -U pip
	pip install git-python

	if [[ "$EVENT_NAME" == "pull_request" ]]; then
	python .github/scripts/components_to_run.py --source-sha ${{ github.event.pull_request.head.sha }} --target-sha ${{ github.event.pull_request.base.sha }}
	else
	echo '["nemo2", "automodel", "export-deploy", "speech"]' \| tee -a test_modules.json
	fi

	components_to_run=$(cat test_modules.json)

	echo "main=${components_to_run}" \| tee -a "$GITHUB_OUTPUT"

	- name: Select tests to run
	id: test_to_run
	run: \|
	# For manual dispatch, we replace `all` with the actual job names
	if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then
	TESTS_TO_RUN=$TESTS_TO_RUN

	# For correctly labeled PR, we replace `all` with the actual job names
	elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then
	TESTS_TO_RUN=all

	# For incorrectly labeled PR, run no tests
	elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then
	TESTS_TO_RUN=""

	# For push events, run all tests. This is so that we can generate coverage
	# on branch `main`.
	elif [[ "$EVENT_NAME" == "push" \|\| "$EVENT_NAME" == "schedule" ]]; then
	TESTS_TO_RUN=all

	else
	echo "Unsupported event_name $EVENT_NAME provided".
	exit 1
	fi

	parsed_string=$(echo "$TESTS_TO_RUN" \| jq -c --raw-input 'split(",")')
	echo "main=${parsed_string}" \| tee -a "$GITHUB_OUTPUT"

	- name: Check if this is a CI workload
	shell: bash
	id: is_ci_workload
	run: \|
	branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}

	if [[ "$branch_name" =~ ^bump-ci-container \|\| "$EVENT_NAME" == "schedule" ]]; then
	is_ci_workload=true
	echo "main=true" \| tee -a "$GITHUB_OUTPUT"
	else
	is_ci_workload=false
	fi

	echo "main=$is_ci_workload" \| tee -a "$GITHUB_OUTPUT"

	- name: Check if no-fail-fast is set
	shell: bash
	id: no_fail_fast
	env:
	HAS_FAIL_FAST_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-fail-fast') }}
	run: \|
	if [[ "$HAS_FAIL_FAST_LABEL" == "true" \|\| "$EVENT_NAME" == "schedule" ]]; then
	no_fail_fast=true
	else
	no_fail_fast=false
	fi

	echo "main=$no_fail_fast" \| tee -a "$GITHUB_OUTPUT"

	code-linting:
	if: needs.pre-flight.outputs.test_to_run != '[]'
	needs: [pre-flight]
	uses: ./.github/workflows/code-linting.yml

	cicd-wait-in-queue:
	needs: [pre-flight, code-linting]
	runs-on: ubuntu-latest
	environment: test
	if: \|
	needs.pre-flight.outputs.test_to_run != '[]'
	&& needs.pre-flight.outputs.components_to_run != '[]'
	&& needs.pre-flight.outputs.is_ci_workload == 'false'
	steps:
	- name: Running CI tests
	run: \|
	echo "Running CI tests"

	cicd-test-container-build:
	uses: ./.github/workflows/_build_container.yml
	needs: [pre-flight, code-linting, cicd-wait-in-queue]
	if: \|
	needs.pre-flight.outputs.test_to_run != '[]'
	&& needs.pre-flight.outputs.components_to_run != '[]'
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	with:
	image-name: nemo_container
	dockerfile: docker/Dockerfile.ci

	cicd-import-tests:
	if: \|
	needs.pre-flight.outputs.test_to_run != '[]'
	&& needs.pre-flight.outputs.components_to_run != '[]'
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	needs: [cicd-test-container-build, pre-flight]
	runs-on: self-hosted-azure-gpus-1
	steps:
	- name: Create UUID
	id: uuid
	run: \|
	echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT"

	- name: Checkout NeMo
	uses: actions/checkout@v2
	with:
	repository: NVIDIA/NeMo
	path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo

	- name: Run some checks
	run: \|
	docker run \
	--rm \
	--device=/dev/nvidia0 \
	--gpus all \
	--shm-size=8g \
	--volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo:/workspace \
	--env TRANSFORMERS_OFFLINE=0 \
	--env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\
	# PyTorch Lightning version
	python -c "import lightning.pytorch; print(lightning.pytorch.__version__)"

	# PyTorch Lightning DDP Checks
	CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"

	# Basic Import Checks
	python tests/core_ptl/check_imports.py --domain asr
	python tests/core_ptl/check_imports.py --domain nlp
	python tests/core_ptl/check_imports.py --domain tts
	'

	L0_Setup_Test_Data_And_Models:
	needs: [pre-flight, cicd-test-container-build, cicd-wait-in-queue]
	runs-on: self-hosted-azure
	if: \|
	needs.pre-flight.outputs.test_to_run != '[]'
	&& needs.pre-flight.outputs.components_to_run != '[]'
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	path: ${{ github.run_id }}

	- name: main
	uses: NVIDIA/NeMo/.github/actions/test-template@main
	with:
	runner: ${{ runner.name }}
	script: L0_Setup_Test_Data_And_Models
	tests_to_run: '["L0_Setup_Test_Data_And_Models"]'

	cicd-main-unit-tests:
	needs: [pre-flight, cicd-test-container-build]
	uses: ./.github/workflows/cicd-main-unit-tests.yml
	if: \|
	needs.pre-flight.outputs.test_to_run != '[]'
	&& needs.pre-flight.outputs.components_to_run != '[]'
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	with:
	test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}

	cicd-main-export-deploy:
	needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
	uses: ./.github/workflows/cicd-main-export-deploy.yml
	if: \|
	(
	needs.pre-flight.outputs.test_to_run != '[]'
	&& (
	contains(fromJson(needs.pre-flight.outputs.components_to_run), 'export-deploy')
	)
	)
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	with:
	test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}

	cicd-main-speech:
	needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
	uses: ./.github/workflows/cicd-main-speech.yml
	if: \|
	(
	needs.pre-flight.outputs.test_to_run != '[]'
	&& (
	contains(fromJson(needs.pre-flight.outputs.components_to_run), 'speech')
	)
	)
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	with:
	test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}

	cicd-main-automodel:
	needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
	uses: ./.github/workflows/cicd-main-automodel.yml
	if: \|
	(
	needs.pre-flight.outputs.test_to_run != '[]'
	&& (
	contains(fromJson(needs.pre-flight.outputs.components_to_run), 'automodel')
	)
	)
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	with:
	test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}

	cicd-main-nemo2:
	needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
	uses: ./.github/workflows/cicd-main-nemo2.yml
	if: \|
	(
	needs.pre-flight.outputs.test_to_run != '[]'
	&& (
	contains(fromJson(needs.pre-flight.outputs.components_to_run), 'nemo2')
	\|\| needs.pre-flight.outputs.components_to_run == '["all"]'
	)
	)
	&& (
	success()
	\|\| (
	needs.cicd-wait-in-queue.result == 'skipped'
	&& needs.pre-flight.outputs.is_ci_workload == 'true'
	)
	)
	&& !cancelled()
	with:
	test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}

	Nemo_CICD_Test:
	needs:
	- pre-flight
	- cicd-test-container-build
	- cicd-import-tests
	- L0_Setup_Test_Data_And_Models
	- cicd-main-unit-tests
	- cicd-main-nemo2
	- cicd-main-export-deploy
	- cicd-main-automodel
	- cicd-main-speech
	if: always()
	runs-on: ubuntu-latest
	permissions: write-all
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Get workflow result
	id: result
	env:
	GH_TOKEN: ${{ github.token }}
	RUN_ID: ${{ github.run_id }}
	HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }}
	IS_SCHEDULED: ${{ github.event_name == 'schedule' }}
	run: \|
	# Get workflow run details and check job conclusions
	LATEST_ATTEMPT=$(gh run view $RUN_ID --json jobs -q '[.jobs[] \| select(.conclusion != null) \| .conclusion] \| last')
	NUM_FAILED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] \| select(.conclusion == "failure") \| .name] \| length')
	NUM_CANCELLED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] \| select(.conclusion == "cancelled") \| .name] \| length')

	if [[ $NUM_FAILED -eq 0 && $NUM_CANCELLED -eq 0 && ("$HAS_LABEL" == "true" \|\| "$IS_SCHEDULED" == "true") ]]; then
	RESULT="success"
	elif [[ $NUM_CANCELLED -gt 0 ]]; then
	RESULT="cancelled"
	else
	RESULT="failure"
	fi

	# Output the final status
	echo "code=$RESULT" \| tee -a $GITHUB_OUTPUT

	- name: Checkout for GH CLI
	uses: actions/checkout@v4

	- name: Remove label if not cancelled
	if: \|
	steps.result.outputs.code != 'cancelled'
	&& github.event.label.name == 'Run CICD'
	&& github.event.pull_request.head.repo.full_name == github.repository
	env:
	GH_TOKEN: ${{ github.token }}
	PR_NUMBER: ${{ github.event.number }}
	run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD"

	- name: Pipeline successful, add PR comment
	if: \|
	steps.result.outputs.code == 'success'
	&& github.event_name == 'pull_request'
	&& env.SLACK_WEBHOOK != ''
	uses: peter-evans/create-or-update-comment@v4
	env:
	SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	REPOSITORY: ${{ github.repository }}
	RUN_ID: ${{ github.run_id }}
	with:
	issue-number: ${{ github.event.number }}
	body: \|
	[🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,

	We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully.

	So it might be time to merge this PR or get some approvals.

	//cc @chtruong814 @ko3n1g @pablo-garay @thomasdhc

	- name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
	if: \|
	steps.result.outputs.code == 'failure'
	&& github.event.label.name == 'Run CICD'
	&& env.SLACK_WEBHOOK != ''
	env:
	SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	REPOSITORY: ${{ github.repository }}
	RUN_ID: ${{ github.run_id }}
	PR_NUMBER: ${{ github.event.number }}
	SERVER_URL: ${{ github.server_url }}
	run: \|
	set -x
	pip install PyGithub
	export BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}

	python .github/scripts/notify.py

	- name: Exit
	if: ${{ always() }}
	env:
	RESULT: ${{ steps.result.outputs.code }}
	run: \|
	if [ $RESULT == "success" ]; then
	exit 0
	else
	exit 1
	fi

	Coverage:
	runs-on: ubuntu-latest
	needs: [pre-flight, Nemo_CICD_Test]
	if: \|
	needs.pre-flight.outputs.test_to_run != '[]'
	&& needs.pre-flight.outputs.components_to_run != '[]'
	&& (
	success()
	\|\| needs.Nemo_CICD_Test.result == 'success'
	)
	&& !cancelled()
	strategy:
	matrix:
	flag: [unit-test, e2e]
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Download coverage reports of current branch
	uses: actions/download-artifact@v4
	with:
	pattern: coverage-${{ matrix.flag }}-*

	- name: Get total coverage of current branch
	shell: bash -x -e -u -o pipefail {0}
	if: always()
	run: \|
	pip install coverage

	ls -al .
	ls -al coverage-*/
	coverage combine --keep $(ls coverage-*/.coverage)
	coverage report -i
	rm -rf coverage-*
	ls -al

	- name: Upload coverage reports to Codecov
	uses: codecov/codecov-action@v5
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	verbose: true
	flags: ${{ matrix.flag }}

	- name: Upload artifacts
	uses: actions/upload-artifact@v4
	with:
	name: coverage-${{ matrix.flag }}-aggregated
	path: \|
	.coverage
	include-hidden-files: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore(🤖): Bump `NVIDIA/Megatron-LM` to `bed7dbd...` (2025-05-21) #14563

Workflow file

chore(🤖): Bump `NVIDIA/Megatron-LM` to `bed7dbd...` (2025-05-21) #14563

Jobs

Run details

Workflow file for this run

chore(🤖): Bump NVIDIA/Megatron-LM to bed7dbd... (2025-05-21) #14563

Workflow file

Workflow file for this run

chore(🤖): Bump `NVIDIA/Megatron-LM` to `bed7dbd...` (2025-05-21) #14563