From 20848553e8fa6af3fbdb192c87e634f540a23e67 Mon Sep 17 00:00:00 2001 From: Syakyr Surani Date: Tue, 27 Aug 2024 14:34:51 +0800 Subject: [PATCH 1/6] fix #41: added test_dummy to src/tests for base template to pass CI/CD --- {{cookiecutter.repo_name}}/src/tests/test_dummy.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 {{cookiecutter.repo_name}}/src/tests/test_dummy.py diff --git a/{{cookiecutter.repo_name}}/src/tests/test_dummy.py b/{{cookiecutter.repo_name}}/src/tests/test_dummy.py new file mode 100644 index 0000000..4e373b9 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/tests/test_dummy.py @@ -0,0 +1,4 @@ +def test_dummy(): + """A test dummy such that the test CI/CD doesn't fail.""" + + assert True == True \ No newline at end of file From 804f84f41ad94db37c75062ff6f5da93b1892132 Mon Sep 17 00:00:00 2001 From: Syakyr Surani Date: Fri, 30 Aug 2024 17:30:30 +0800 Subject: [PATCH 2/6] fix: added extra section on issues with macos installs --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 1e3ccc4..df108fc 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,13 @@ $ pip install "cookiecutter>=2.2" $ cookiecutter https://github.com/aisingapore/kapitan-hull ``` +> For some reason, macOS would not install the `cookiecutter` CLI. If +> that's the case, you would either need to use `conda install` (if +> you're using Conda), or follow the instructions on the +> [`cookieccutter` guide site][ccutter-inst] using `pipx`. + +[ccutter-inst]: https://cookiecutter.readthedocs.io/en/stable/README.html#installation + If you want to run a specific version of Kapitan Hull for compatibility reasons, you can specify the `-c` parameter for the specific tag/branch we have: From 947d0464745c43dc0d3988e58c9f2236b7de06b8 Mon Sep 17 00:00:00 2001 From: Syakyr Surani Date: Tue, 3 Sep 2024 02:56:34 +0000 Subject: [PATCH 3/6] chore: updated miniconda3 Docker image, aggregate under default, added rules + optional needs when building pages --- {{cookiecutter.repo_name}}/.gitlab-ci.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/{{cookiecutter.repo_name}}/.gitlab-ci.yml b/{{cookiecutter.repo_name}}/.gitlab-ci.yml index 159a9bd..4c19502 100644 --- a/{{cookiecutter.repo_name}}/.gitlab-ci.yml +++ b/{{cookiecutter.repo_name}}/.gitlab-ci.yml @@ -9,6 +9,8 @@ default: key: $CI_COMMIT_REF_SLUG paths: - ./conda + image: + name: continuumio/miniconda3:24.7.1-0 stages: - test @@ -17,8 +19,6 @@ stages: test:conda-build: stage: test - image: - name: continuumio/miniconda3:23.10.0-1 script: - conda env create -f {{cookiecutter.repo_name}}-conda-env.yaml -p ./conda/{{cookiecutter.repo_name}} -y rules: @@ -38,8 +38,6 @@ test:conda-build: test:pylint-pytest: stage: test - image: - name: continuumio/miniconda3:23.10.0-1 before_script: - source activate ./conda/{{cookiecutter.repo_name}} - pip install -r dev-requirements.txt @@ -66,8 +64,6 @@ test:pylint-pytest: pages: stage: deploy-docs - image: - name: continuumio/miniconda3:23.10.0-1 before_script: - source activate ./conda/{{cookiecutter.repo_name}} - pip install -r docs-requirements.txt @@ -77,10 +73,14 @@ pages: artifacts: paths: - public - only: - - main + rules: + - if: $CI_COMMIT_BRANCH == "main" + changes: + - docs/**/* + - src/**/* needs: - - test:conda-build + - job: test:conda-build + optional: true build:data-prep-image: stage: build From cf81bd93c4ee170f9175b370ac66843e96b5ea2a Mon Sep 17 00:00:00 2001 From: Syakyr Surani Date: Tue, 3 Sep 2024 02:59:54 +0000 Subject: [PATCH 4/6] chore: bump versions of dependent packages --- {{cookiecutter.repo_name}}/README.md | 2 +- .../03-mlops-components-platform.md | 8 ++++---- .../guide-site/mkdocs-requirements.txt | 12 ++++++------ .../dev-requirements.txt | 4 ++-- .../{{cookiecutter.repo_name}}-cpu.Dockerfile | 2 +- .../{{cookiecutter.repo_name}}-gpu.Dockerfile | 4 ++-- .../docs-requirements.txt | 2 +- {{cookiecutter.repo_name}}/docs/conf.py | 2 +- .../03-mlops-components-platform.md | 8 ++++---- .../{{cookiecutter.repo_name}}-cpu.Dockerfile | 2 +- .../{{cookiecutter.repo_name}}-gpu.Dockerfile | 4 ++-- .../cv/pytorch-cpu-requirements.txt | 4 ++-- .../cv/pytorch-gpu-requirements.txt | 4 ++-- .../problem-templates/cv/requirements.txt | 18 +++++++++--------- ...cookiecutter.repo_name}}-conda-env-gpu.yaml | 10 +++++----- .../{{cookiecutter.repo_name}}-conda-env.yaml | 10 +++++----- {{cookiecutter.repo_name}}/requirements.txt | 18 +++++++++--------- .../{{cookiecutter.repo_name}}-conda-env.yaml | 4 ++-- 18 files changed, 59 insertions(+), 59 deletions(-) diff --git a/{{cookiecutter.repo_name}}/README.md b/{{cookiecutter.repo_name}}/README.md index c62d9e8..97db523 100644 --- a/{{cookiecutter.repo_name}}/README.md +++ b/{{cookiecutter.repo_name}}/README.md @@ -76,7 +76,7 @@ To spin up the site on your local machine, you can create a virtual environment to install the dependencies first: ```bash -$ conda create -n aisg-kh-guide python=3.11.7 +$ conda create -n aisg-kh-guide python=3.12.4 $ conda activate aisg-kh-guide $ pip install -r aisg-context/guide-site/mkdocs-requirements.txt ``` diff --git a/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md b/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md index eeb1ea9..7b926ba 100644 --- a/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md +++ b/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md @@ -719,9 +719,9 @@ MLflow Tracking server. === "Linux/macOS" ```bash - conda create -n mlflow-test python=3.11.7 + conda create -n mlflow-test python=3.12.4 conda activate mlflow-test - pip install mlflow==2.9.2 + pip install mlflow==2.15.1 # Install boto3 or google-cloud-storage packages if # custom object storage is used export MLFLOW_TRACKING_USERNAME= @@ -732,9 +732,9 @@ MLflow Tracking server. === "Windows PowerShell" ```powershell - conda create -n mlflow-test python=3.11.7 + conda create -n mlflow-test python=3.12.4 conda activate mlflow-test - pip install mlflow==2.9.2 + pip install mlflow==2.15.1 # Install boto3 or google-cloud-storage packages if # custom object storage is used $MLFLOW_TRACKING_USERNAME= diff --git a/{{cookiecutter.repo_name}}/aisg-context/guide-site/mkdocs-requirements.txt b/{{cookiecutter.repo_name}}/aisg-context/guide-site/mkdocs-requirements.txt index d079c6a..241a9c0 100644 --- a/{{cookiecutter.repo_name}}/aisg-context/guide-site/mkdocs-requirements.txt +++ b/{{cookiecutter.repo_name}}/aisg-context/guide-site/mkdocs-requirements.txt @@ -1,8 +1,8 @@ -mkdocs==1.5.3 -mkdocs-material==9.5.3 -mkdocs-minify-plugin==0.7.2 +mkdocs==1.6.1 +mkdocs-material==9.5.34 +mkdocs-minify-plugin==0.8.0 mkdocs-redirects==1.2.1 mkdocs-ezlinks-plugin==0.1.14 -Pygments==2.17.2 -jinja2==3.1.2 -termynal==0.11.1 \ No newline at end of file +Pygments==2.18.0 +jinja2==3.1.4 +termynal==0.12.1 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/dev-requirements.txt b/{{cookiecutter.repo_name}}/dev-requirements.txt index e46077c..6e3f0e1 100644 --- a/{{cookiecutter.repo_name}}/dev-requirements.txt +++ b/{{cookiecutter.repo_name}}/dev-requirements.txt @@ -1,2 +1,2 @@ -pylint==2.16.2 -pytest==7.4.0 \ No newline at end of file +pylint==3.2.7 +pytest==8.3.2 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile b/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile index 9a22a43..21cd291 100644 --- a/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile +++ b/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11-slim +FROM python:3.12-slim ARG DEBIAN_FRONTEND="noninteractive" diff --git a/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile b/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile index b0fcf7b..42ca0df 100644 --- a/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile +++ b/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile @@ -1,6 +1,6 @@ # Use this if deployed outside RunAI -#FROM nvcr.io/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 -FROM python:3.11-slim +#FROM nvcr.io/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 +FROM python:3.12-slim ARG DEBIAN_FRONTEND="noninteractive" diff --git a/{{cookiecutter.repo_name}}/docs-requirements.txt b/{{cookiecutter.repo_name}}/docs-requirements.txt index 571c5c6..c603372 100644 --- a/{{cookiecutter.repo_name}}/docs-requirements.txt +++ b/{{cookiecutter.repo_name}}/docs-requirements.txt @@ -1 +1 @@ -sphinx==7.2.5 \ No newline at end of file +sphinx==8.0.2 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/docs/conf.py b/{{cookiecutter.repo_name}}/docs/conf.py index 9f616d0..045b427 100644 --- a/{{cookiecutter.repo_name}}/docs/conf.py +++ b/{{cookiecutter.repo_name}}/docs/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- project = "{{cookiecutter.project_name}}" -copyright = "2022, {{cookiecutter.author_name}}" +copyright = "2024, {{cookiecutter.author_name}}" author = "{{cookiecutter.author_name}}" # The full version, including alpha/beta/rc tags diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md b/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md index 7607f26..935d95e 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/03-mlops-components-platform.md @@ -898,9 +898,9 @@ MLflow Tracking server. === "Linux/macOS" ```bash - conda create -n mlflow-test python=3.11.7 + conda create -n mlflow-test python=3.12.4 conda activate mlflow-test - pip install mlflow==2.9.2 + pip install mlflow==2.15.1 # Install boto3 or google-cloud-storage packages if # custom object storage is used export MLFLOW_TRACKING_USERNAME= @@ -911,9 +911,9 @@ MLflow Tracking server. === "Windows PowerShell" ```powershell - conda create -n mlflow-test python=3.11.7 + conda create -n mlflow-test python=3.12.4 conda activate mlflow-test - pip install mlflow==2.9.2 + pip install mlflow==2.15.1 # Install boto3 or google-cloud-storage packages if # custom object storage is used $MLFLOW_TRACKING_USERNAME= diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile b/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile index f5ceced..d27235d 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11-slim +FROM python:3.12-slim ARG DEBIAN_FRONTEND="noninteractive" diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile b/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile index 4bf27d2..a5f2136 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile @@ -1,6 +1,6 @@ # Use this if deployed outside RunAI -#FROM nvcr.io/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 -FROM python:3.11-slim +#FROM nvcr.io/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 +FROM python:3.12-slim ARG DEBIAN_FRONTEND="noninteractive" diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-cpu-requirements.txt b/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-cpu-requirements.txt index fc30d00..75593eb 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-cpu-requirements.txt +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-cpu-requirements.txt @@ -1,3 +1,3 @@ --index-url https://download.pytorch.org/whl/cpu -torch==2.1.2 -torchvision==0.16.2 \ No newline at end of file +torch==2.3.1 +torchvision==0.18.1 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-gpu-requirements.txt b/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-gpu-requirements.txt index 3e26e4b..0b9c733 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-gpu-requirements.txt +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/pytorch-gpu-requirements.txt @@ -1,3 +1,3 @@ --index-url https://download.pytorch.org/whl/cu121 -torch==2.1.2 -torchvision==0.16.2 \ No newline at end of file +torch==2.3.1 +torchvision==0.18.1 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/requirements.txt b/{{cookiecutter.repo_name}}/problem-templates/cv/requirements.txt index f774cce..630964c 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/requirements.txt +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/requirements.txt @@ -1,13 +1,13 @@ -mlflow-skinny==2.9.2 +mlflow-skinny==2.15.1 hydra-core==1.3.2 hydra-optuna-sweeper==1.2.0 python-json-logger==2.0.7 -fastapi==0.109.0 -uvicorn[standard]==0.25.0 -python-multipart==0.0.6 +fastapi==0.112.2 +uvicorn[standard]==0.30.6 +python-multipart==0.0.9 jsonlines==4.0.0 -pandas==2.1.4 -gunicorn==21.2.0 -pydantic==2.5.3 -pydantic-settings==2.1.0 -ipykernel==6.25.0 \ No newline at end of file +pandas==2.2.2 +gunicorn==23.0.0 +pydantic==2.8.2 +pydantic-settings==2.4.0 +ipykernel==6.29.5 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env-gpu.yaml b/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env-gpu.yaml index 2b2bef2..83a4824 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env-gpu.yaml +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env-gpu.yaml @@ -1,14 +1,14 @@ name: {{cookiecutter.repo_name}}-gpu channels: - - defaults - pytorch + - defaults - nvidia - conda-forge dependencies: - - pytorch=2.1.2 - - torchvision=0.16.2 + - pytorch=2.3.1 + - torchvision=0.18.1 - pytorch-cuda=12.1 - - python=3.11.7 - - pip=23.3.2 + - python=3.12.4 + - pip=24.2 - pip: - -r requirements.txt \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env.yaml b/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env.yaml index 8a31fc0..089e049 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env.yaml +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/{{cookiecutter.repo_name}}-conda-env.yaml @@ -1,13 +1,13 @@ name: {{cookiecutter.repo_name}} channels: - - defaults - pytorch + - defaults - conda-forge dependencies: - - pytorch=2.1.2 - - torchvision=0.16.2 + - pytorch=2.3.1 + - torchvision=0.18.1 - cpuonly=2.0 - - python=3.11.7 - - pip=23.3.2 + - python=3.12.4 + - pip=24.2 - pip: - -r requirements.txt diff --git a/{{cookiecutter.repo_name}}/requirements.txt b/{{cookiecutter.repo_name}}/requirements.txt index f774cce..630964c 100644 --- a/{{cookiecutter.repo_name}}/requirements.txt +++ b/{{cookiecutter.repo_name}}/requirements.txt @@ -1,13 +1,13 @@ -mlflow-skinny==2.9.2 +mlflow-skinny==2.15.1 hydra-core==1.3.2 hydra-optuna-sweeper==1.2.0 python-json-logger==2.0.7 -fastapi==0.109.0 -uvicorn[standard]==0.25.0 -python-multipart==0.0.6 +fastapi==0.112.2 +uvicorn[standard]==0.30.6 +python-multipart==0.0.9 jsonlines==4.0.0 -pandas==2.1.4 -gunicorn==21.2.0 -pydantic==2.5.3 -pydantic-settings==2.1.0 -ipykernel==6.25.0 \ No newline at end of file +pandas==2.2.2 +gunicorn==23.0.0 +pydantic==2.8.2 +pydantic-settings==2.4.0 +ipykernel==6.29.5 \ No newline at end of file diff --git a/{{cookiecutter.repo_name}}/{{cookiecutter.repo_name}}-conda-env.yaml b/{{cookiecutter.repo_name}}/{{cookiecutter.repo_name}}-conda-env.yaml index e5b8b30..8989efc 100644 --- a/{{cookiecutter.repo_name}}/{{cookiecutter.repo_name}}-conda-env.yaml +++ b/{{cookiecutter.repo_name}}/{{cookiecutter.repo_name}}-conda-env.yaml @@ -3,7 +3,7 @@ channels: - defaults - conda-forge dependencies: - - python=3.11.7 - - pip=23.3.2 + - python=3.12.4 + - pip=24.2 - pip: - -r requirements.txt \ No newline at end of file From b51a3df9c3a9a05468ce8a4396cc3283bf4b3778 Mon Sep 17 00:00:00 2001 From: Syakyr Surani Date: Tue, 3 Sep 2024 03:07:18 +0000 Subject: [PATCH 5/6] refactor: changes to image naming scheme + more readable runai commands due to version 2.17 update --- {{cookiecutter.repo_name}}/.gitlab-ci.yml | 14 +-- .../guide-for-user/07-job-orchestration.md | 90 +++++++++------- .../guide-for-user/07-job-orchestration.md | 102 +++++++++++------- 3 files changed, 127 insertions(+), 79 deletions(-) diff --git a/{{cookiecutter.repo_name}}/.gitlab-ci.yml b/{{cookiecutter.repo_name}}/.gitlab-ci.yml index 4c19502..774efad 100644 --- a/{{cookiecutter.repo_name}}/.gitlab-ci.yml +++ b/{{cookiecutter.repo_name}}/.gitlab-ci.yml @@ -82,7 +82,7 @@ pages: - job: test:conda-build optional: true -build:data-prep-image: +build:cpu-image: stage: build image: name: gcr.io/kaniko-project/executor:debug @@ -108,7 +108,7 @@ build:data-prep-image: /kaniko/executor --context "${CI_PROJECT_DIR}" --dockerfile "${CI_PROJECT_DIR}/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile" - --destination "{{cookiecutter.registry_project_path}}/data-prep:${CI_COMMIT_SHORT_SHA}" + --destination "{{cookiecutter.registry_project_path}}/cpu:${CI_COMMIT_SHORT_SHA}" rules: - if: $CI_MERGE_REQUEST_IID changes: @@ -148,7 +148,7 @@ build:model-training-image: /kaniko/executor --context "${CI_PROJECT_DIR}" --dockerfile "${CI_PROJECT_DIR}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile" - --destination "{{cookiecutter.registry_project_path}}/model-training:${CI_COMMIT_SHORT_SHA}" + --destination "{{cookiecutter.registry_project_path}}/gpu:${CI_COMMIT_SHORT_SHA}" rules: - if: $CI_MERGE_REQUEST_IID changes: @@ -182,12 +182,12 @@ build:retag-images: script: {%- if cookiecutter.platform == 'onprem' %} - cat $HARBOR_ROBOT_CREDS_JSON > /root/.docker/config.json - - crane tag {{cookiecutter.registry_project_path}}/data-prep:${CI_COMMIT_SHORT_SHA} ${CI_COMMIT_TAG} - - crane tag {{cookiecutter.registry_project_path}}/model-training:${CI_COMMIT_SHORT_SHA} ${CI_COMMIT_TAG} + - crane tag {{cookiecutter.registry_project_path}}/cpu:${CI_COMMIT_SHORT_SHA} ${CI_COMMIT_TAG} + - crane tag {{cookiecutter.registry_project_path}}/gpu:${CI_COMMIT_SHORT_SHA} ${CI_COMMIT_TAG} {%- elif cookiecutter.platform == 'gcp' %} - cat $GCP_SERVICE_ACCOUNT_KEY > /gcp-sa.json - - gcloud container images add-tag "{{cookiecutter.registry_project_path}}/data-prep:${CI_COMMIT_SHORT_SHA}" "{{cookiecutter.registry_project_path}}/data-prep:${CI_COMMIT_TAG}" - - gcloud container images add-tag "{{cookiecutter.registry_project_path}}/model-training:${CI_COMMIT_SHORT_SHA}" "{{cookiecutter.registry_project_path}}/model-training:${CI_COMMIT_TAG}" + - gcloud container images add-tag "{{cookiecutter.registry_project_path}}/cpu:${CI_COMMIT_SHORT_SHA}" "{{cookiecutter.registry_project_path}}/data-prep:${CI_COMMIT_TAG}" + - gcloud container images add-tag "{{cookiecutter.registry_project_path}}/gpu:${CI_COMMIT_SHORT_SHA}" "{{cookiecutter.registry_project_path}}/model-training:${CI_COMMIT_TAG}" {%- endif %} rules: - if: $CI_COMMIT_TAG && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH diff --git a/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md b/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md index 65993ee..2b2388d 100644 --- a/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md +++ b/{{cookiecutter.repo_name}}/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md @@ -80,7 +80,7 @@ provided in this template: ```bash docker build \ - -t {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + -t {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ -f docker/{{cookiecutter.repo_name}}-cpu.Dockerfile \ --platform linux/amd64 . ``` @@ -89,7 +89,7 @@ provided in this template: ```powershell docker build ` - -t {{cookiecutter.registry_project_path}}/data-prep:0.1.0 ` + -t {{cookiecutter.registry_project_path}}/cpu:0.1.0 ` -f docker/{{cookiecutter.repo_name}}-cpu.Dockerfile ` --platform linux/amd64 . ``` @@ -101,11 +101,12 @@ provided in this template: # Run this in the base of your project repository, and change accordingly khull kaniko --context $(pwd) \ --dockerfile $(pwd)/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile \ - --destination {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + --destination {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ {%- if cookiecutter.platform == 'gcp' %} --gcp \ -{%- endif %} +{%- elif cookiecutter.platform == 'onprem' %} --cred-file /path/to/docker/config.json \ +{%- endif %} -v :/path/to/pvc/mount ``` @@ -118,7 +119,7 @@ After building the image, you can run the script through Docker: docker run --rm \ -v ./data:/home/aisg/{{cookiecutter.repo_name}}/data \ -w /home/aisg/{{cookiecutter.repo_name}} \ - {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ bash -c "python -u src/process_data.py" ``` @@ -128,7 +129,7 @@ After building the image, you can run the script through Docker: docker run --rm ` -v .\data:/home/aisg/{{cookiecutter.repo_name}}/data ` -w /home/aisg/{{cookiecutter.repo_name}} ` - {{cookiecutter.registry_project_path}}/data-prep:0.1.0 ` + {{cookiecutter.registry_project_path}}/cpu:0.1.0 ` bash -c "python -u src/process_data.py" ``` @@ -143,13 +144,13 @@ Docker registry: === "Linux/macOS" ```bash - docker push {{cookiecutter.registry_project_path}}/data-prep:0.1.0 + docker push {{cookiecutter.registry_project_path}}/cpu:0.1.0 ``` === "Windows PowerShell" ```powershell - docker push {{cookiecutter.registry_project_path}}/data-prep:0.1.0 + docker push {{cookiecutter.registry_project_path}}/cpu:0.1.0 ``` ### Run:ai @@ -163,11 +164,13 @@ a job using that image to Run:ai\: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -data-prep \ - -i {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ - --command -- '/bin/bash -c "python -u src/process_data.py raw_data_dir_path=//workspaces//data/raw processed_data_dir_path=//workspaces//data/processed"' + --command -- /bin/bash -c "python -u src/process_data.py \ + raw_data_dir_path=//workspaces//data/raw \ + processed_data_dir_path=//workspaces//data/processed" ``` === "Windows PowerShell" @@ -176,11 +179,11 @@ a job using that image to Run:ai\: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit ` --job-name-prefix -data-prep ` - -i {{cookiecutter.registry_project_path}}/data-prep:0.1.0 ` + -i {{cookiecutter.registry_project_path}}/cpu:0.1.0 ` --working-dir /home/aisg/{{cookiecutter.repo_name}} ` --existing-pvc claimname=,path=/ ` --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 ` - --command -- "/bin/bash -c 'python -u src/process_data.py raw_data_dir_path=//workspaces//data/raw processed_data_dir_path=//workspaces//data/processed'" + --command -- /bin/bash -c 'python -u src/process_data.py raw_data_dir_path=//workspaces//data/raw processed_data_dir_path=//workspaces//data/processed' ``` === "VSCode Server Terminal" @@ -189,11 +192,13 @@ a job using that image to Run:ai\: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -data-prep \ - -i {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ - --command -- '/bin/bash -c "python -u src/process_data.py raw_data_dir_path=//workspaces//data/raw processed_data_dir_path=//workspaces//data/processed"' + --command -- /bin/bash -c "python -u src/process_data.py \ + raw_data_dir_path=//workspaces//data/raw \ + processed_data_dir_path=//workspaces//data/processed" ``` After some time, the data processing job should conclude and we can @@ -309,7 +314,7 @@ We shall build the Docker image from the Docker file ```bash docker build \ - -t {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -t {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ -f docker/{{cookiecutter.repo_name}}-gpu.Dockerfile \ --platform linux/amd64 . ``` @@ -318,7 +323,7 @@ We shall build the Docker image from the Docker file ```powershell docker build ` - -t {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + -t {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` -f docker/{{cookiecutter.repo_name}}-gpu.Dockerfile ` --platform linux/amd64 . ``` @@ -330,11 +335,12 @@ We shall build the Docker image from the Docker file # Run this in the base of your project repository, and change accordingly khull kaniko --context $(pwd) \ --dockerfile $(pwd)/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile \ - --destination {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + --destination {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ {%- if cookiecutter.platform == 'gcp' %} --gcp \ -{%- endif %} +{%- elif cookiecutter.platform == 'onprem' %} --cred-file /path/to/docker/config.json \ +{%- endif %} -v :/path/to/pvc/mount ``` @@ -352,7 +358,7 @@ After building the image, you can run the script through Docker: -v ./mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns \ -v ./models:/home/aisg/{{cookiecutter.repo_name}}/models \ -w /home/aisg/{{cookiecutter.repo_name}} \ - {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ bash -c "python -u src/train_model.py" ``` @@ -364,7 +370,7 @@ After building the image, you can run the script through Docker: -v .\mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns ` -v .\models:/home/aisg/{{cookiecutter.repo_name}}/models ` -w /home/aisg/{{cookiecutter.repo_name}} ` - {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` bash -c "python -u src/train_model.py" ``` @@ -403,13 +409,13 @@ Docker registry: === "Linux/macOS" ```bash - docker push {{cookiecutter.registry_project_path}}/model-training:0.1.0 + docker push {{cookiecutter.registry_project_path}}/gpu:0.1.0 ``` === "Windows PowerShell" ```powershell - docker push {{cookiecutter.registry_project_path}}/model-training:0.1.0 + docker push {{cookiecutter.registry_project_path}}/gpu:0.1.0 ``` ### Run:ai @@ -423,14 +429,17 @@ job using it: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -train \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ -e MLFLOW_TRACKING_USERNAME= \ -e MLFLOW_TRACKING_PASSWORD= \ -e OMP_NUM_THREADS=2 \ - --command -- '/bin/bash -c "python -u src/train_model.py data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri="' + --command -- /bin/bash -c "python -u src/train_model.py \ + data_dir_path=//workspaces//data/processed \ + artifact_dir_path=//workspaces//models \ + mlflow_tracking_uri=" ``` === "Windows PowerShell" @@ -439,14 +448,14 @@ job using it: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit ` --job-name-prefix -train ` - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` --working-dir /home/aisg/{{cookiecutter.repo_name}} ` --existing-pvc claimname=,path=/ ` --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 ` -e MLFLOW_TRACKING_USERNAME= ` -e MLFLOW_TRACKING_PASSWORD= ` -e OMP_NUM_THREADS=2 ` - --command -- "/bin/bash -c 'python src/train_model.py data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri='" + --command -- /bin/bash -c 'python src/train_model.py data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri=' ``` === "VSCode Server Terminal" @@ -455,14 +464,17 @@ job using it: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC $ runai submit \ --job-name-prefix -train \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ -e MLFLOW_TRACKING_USERNAME= \ -e MLFLOW_TRACKING_PASSWORD= \ -e OMP_NUM_THREADS=2 \ - --command -- '/bin/bash -c "python -u src/train_model.py data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri="' + --command -- /bin/bash -c "python -u src/train_model.py \ + data_dir_path=//workspaces//data/processed \ + artifact_dir_path=//workspaces//models \ + mlflow_tracking_uri=" ``` Once you have successfully run an experiment, you may inspect the run @@ -617,7 +629,7 @@ by default. -v ./mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns \ -v ./models:/home/aisg/{{cookiecutter.repo_name}}/models \ -w /home/aisg/{{cookiecutter.repo_name}} \ - {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ python -u src/train_model.py --multirun ``` @@ -629,7 +641,7 @@ by default. -v .\mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns ` -v .\models:/home/aisg/{{cookiecutter.repo_name}}/models ` -w /home/aisg/{{cookiecutter.repo_name}} ` - {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` python -u src/train_model.py --multirun ``` @@ -641,7 +653,7 @@ by default. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -train-hp \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ @@ -649,7 +661,10 @@ by default. -e MLFLOW_TRACKING_PASSWORD= \ -e MLFLOW_HPTUNING_TAG=$(date +%s) \ -e OMP_NUM_THREADS=2 \ - --command -- "/bin/bash -c 'python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri='" + --command -- /bin/bash -c 'python -u src/train_model.py --multirun \ + data_dir_path=//workspaces//data/processed \ + artifact_dir_path=//workspaces//models \ + mlflow_tracking_uri=' ``` === "Windows PowerShell" @@ -658,7 +673,7 @@ by default. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit ` --job-name-prefix -train-hp ` - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` --working-dir /home/aisg/{{cookiecutter.repo_name}} ` --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ @@ -666,7 +681,7 @@ by default. -e MLFLOW_TRACKING_PASSWORD= ` -e MLFLOW_HPTUNING_TAG=$(Get-Date -UFormat %s -Millisecond 0) ` -e OMP_NUM_THREADS=2 ` - --command -- "/bin/bash -c 'python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri='" + --command -- /bin/bash -c 'python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri=' ``` === "VSCode Server Terminal" @@ -675,7 +690,7 @@ by default. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -train-hp \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ @@ -683,7 +698,10 @@ by default. -e MLFLOW_TRACKING_PASSWORD= \ -e MLFLOW_HPTUNING_TAG=$(date +%s) \ -e OMP_NUM_THREADS=2 \ - --command -- "/bin/bash -c 'python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed artifact_dir_path=//workspaces//models mlflow_tracking_uri='" + --command -- /bin/bash -c 'python -u src/train_model.py --multirun \ + data_dir_path=//workspaces//data/processed \ + artifact_dir_path=//workspaces//models \ + mlflow_tracking_uri=' ``` ![MLflow Tracking Server - Hyperparameter Tuning Runs](assets/screenshots/mlflow-tracking-hptuning-runs.png) diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md b/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md index b7da56f..5590c13 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/aisg-context/guide-site/docs/guide-for-user/07-job-orchestration.md @@ -81,7 +81,7 @@ To process the sample raw data, there are 3 main ways to do so: ```bash docker build \ - -t {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + -t {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ -f docker/{{cookiecutter.repo_name}}-cpu.Dockerfile \ --platform linux/amd64 . ``` @@ -90,7 +90,7 @@ To process the sample raw data, there are 3 main ways to do so: ```powershell docker build ` - -t {{cookiecutter.registry_project_path}}/data-prep:0.1.0 ` + -t {{cookiecutter.registry_project_path}}/cpu:0.1.0 ` -f docker/{{cookiecutter.repo_name}}-cpu.Dockerfile ` --platform linux/amd64 . ``` @@ -104,7 +104,7 @@ To process the sample raw data, there are 3 main ways to do so: docker run --rm \ -v ./data:/home/aisg/{{cookiecutter.repo_name}}/data \ -w /home/aisg/{{cookiecutter.repo_name}} \ - {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ bash -c "python -u src/process_data.py" ``` @@ -114,7 +114,7 @@ To process the sample raw data, there are 3 main ways to do so: docker run --rm ` -v .\data:/home/aisg/{{cookiecutter.repo_name}}/data ` -w /home/aisg/{{cookiecutter.repo_name}} ` - {{cookiecutter.registry_project_path}}/data-prep:0.1.0 ` + {{cookiecutter.registry_project_path}}/cpu:0.1.0 ` bash -c "python -u src/process_data.py" ``` @@ -124,13 +124,13 @@ To process the sample raw data, there are 3 main ways to do so: === "Linux/macOS" ```bash - docker push {{cookiecutter.registry_project_path}}/data-prep:0.1.0 + docker push {{cookiecutter.registry_project_path}}/cpu:0.1.0 ``` === "Windows PowerShell" ```powershell - docker push {{cookiecutter.registry_project_path}}/data-prep:0.1.0 + docker push {{cookiecutter.registry_project_path}}/cpu:0.1.0 ``` === "Run:ai" @@ -147,11 +147,12 @@ To process the sample raw data, there are 3 main ways to do so: # Run this in the base of your project repository, and change accordingly khull kaniko --context $(pwd) \ --dockerfile $(pwd)/docker/{{cookiecutter.repo_name}}-cpu.Dockerfile \ - --destination {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + --destination {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ {%- if cookiecutter.platform == 'gcp' %} --gcp \ - {%- endif %} + {%- elif cookiecutter.platform == 'onprem' %} --cred-file /path/to/docker/config.json \ + {%- endif %} -v :/path/to/pvc/mount ``` @@ -164,11 +165,13 @@ To process the sample raw data, there are 3 main ways to do so: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -data-prep \ - -i {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ - --command -- '/bin/bash -c "python -u src/process_data.py raw_data_dir_path=//workspaces//data/mnist-pngs-data-aisg processed_data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed"' + --command -- /bin/bash -c "python -u src/process_data.py \ + raw_data_dir_path=//workspaces//data/mnist-pngs-data-aisg \ + processed_data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed" ``` === "Windows PowerShell" @@ -177,11 +180,11 @@ To process the sample raw data, there are 3 main ways to do so: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit ` --job-name-prefix -data-prep ` - -i {{cookiecutter.registry_project_path}}/data-prep:0.1.0 ` + -i {{cookiecutter.registry_project_path}}/cpu:0.1.0 ` --working-dir /home/aisg/{{cookiecutter.repo_name}} ` --existing-pvc claimname=,path=/ ` --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 ` - --command -- '/bin/bash -c "python -u src/process_data.py raw_data_dir_path=//workspaces//data/mnist-pngs-data-aisg processed_data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed"' + --command -- /bin/bash -c "python -u src/process_data.py raw_data_dir_path=//workspaces//data/mnist-pngs-data-aisg processed_data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed" ``` === "VSCode Server Terminal" @@ -190,11 +193,13 @@ To process the sample raw data, there are 3 main ways to do so: # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -data-prep \ - -i {{cookiecutter.registry_project_path}}/data-prep:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/cpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ - --command -- '/bin/bash -c "python -u src/process_data.py raw_data_dir_path=//workspaces//data/mnist-pngs-data-aisg processed_data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed"' + --command -- /bin/bash -c "python -u src/process_data.py \ + raw_data_dir_path=//workspaces//data/mnist-pngs-data-aisg \ + processed_data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed" ``` After some time, the data processing job should conclude and we can @@ -326,7 +331,7 @@ artifacts without explicitly knowing the {{objstg}} credentials. ```bash docker build \ - -t {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -t {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ -f docker/{{cookiecutter.repo_name}}-gpu.Dockerfile \ --platform linux/amd64 . ``` @@ -335,7 +340,7 @@ artifacts without explicitly knowing the {{objstg}} credentials. ```powershell docker build ` - -t {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + -t {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` -f docker/{{cookiecutter.repo_name}}-gpu.Dockerfile ` --platform linux/amd64 . ``` @@ -354,7 +359,7 @@ artifacts without explicitly knowing the {{objstg}} credentials. -v ./mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns \ -v ./models:/home/aisg/{{cookiecutter.repo_name}}/models \ -w /home/aisg/{{cookiecutter.repo_name}} \ - {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ bash -c "python -u src/train_model.py" ``` @@ -366,7 +371,7 @@ artifacts without explicitly knowing the {{objstg}} credentials. -v .\mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns ` -v .\models:/home/aisg/{{cookiecutter.repo_name}}/models ` -w /home/aisg/{{cookiecutter.repo_name}} ` - {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` bash -c "python -u src/train_model.py" ``` @@ -405,13 +410,13 @@ artifacts without explicitly knowing the {{objstg}} credentials. === "Linux/macOS" ```bash - docker push {{cookiecutter.registry_project_path}}/model-training:0.1.0 + docker push {{cookiecutter.registry_project_path}}/gpu:0.1.0 ``` === "Windows PowerShell" ```powershell - docker push {{cookiecutter.registry_project_path}}/model-training:0.1.0 + docker push {{cookiecutter.registry_project_path}}/gpu:0.1.0 ``` === "Run:ai" @@ -428,11 +433,12 @@ artifacts without explicitly knowing the {{objstg}} credentials. # Run this in the base of your project repository, and change accordingly khull kaniko --context $(pwd) \ --dockerfile $(pwd)/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile \ - --destination {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + --destination {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ {%- if cookiecutter.platform == 'gcp' %} --gcp \ - {%- endif %} + {%- elif cookiecutter.platform == 'onprem' %} --cred-file /path/to/docker/config.json \ + {%- endif %} -v :/path/to/pvc/mount ``` @@ -445,14 +451,20 @@ artifacts without explicitly knowing the {{objstg}} credentials. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -train \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ -e MLFLOW_TRACKING_USERNAME= \ -e MLFLOW_TRACKING_PASSWORD= \ -e OMP_NUM_THREADS=2 \ - --command -- '/bin/bash -c "python -u src/train_model.py data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3"' + --command -- /bin/bash -c "python -u src/train_model.py \ + data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed \ + setup_mlflow=true \ + mlflow_tracking_uri= \ + mlflow_exp_name= \ + model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models \ + epochs=3" ``` === "Windows PowerShell" @@ -461,14 +473,14 @@ artifacts without explicitly knowing the {{objstg}} credentials. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit ` --job-name-prefix -train ` - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` --working-dir /home/aisg/{{cookiecutter.repo_name}} ` --existing-pvc claimname=,path=/ ` --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 ` -e MLFLOW_TRACKING_USERNAME= ` -e MLFLOW_TRACKING_PASSWORD= ` -e OMP_NUM_THREADS=2 ` - --command -- '/bin/bash -c "python -u src/train_model.py data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3"' + --command -- /bin/bash -c "python -u src/train_model.py data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3" ``` === "VSCode Server Terminal" @@ -477,14 +489,20 @@ artifacts without explicitly knowing the {{objstg}} credentials. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC $ runai submit \ --job-name-prefix -train \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ -e MLFLOW_TRACKING_USERNAME= \ -e MLFLOW_TRACKING_PASSWORD= \ -e OMP_NUM_THREADS=2 \ - --command -- '/bin/bash -c "python -u src/train_model.py data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3"' + --command -- /bin/bash -c "python -u src/train_model.py \ + data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed \ + setup_mlflow=true \ + mlflow_tracking_uri= \ + mlflow_exp_name= \ + model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models \ + epochs=3" ``` Once you have successfully run an experiment, you may inspect the run @@ -639,7 +657,7 @@ by default. -v ./mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns \ -v ./models:/home/aisg/{{cookiecutter.repo_name}}/models \ -w /home/aisg/{{cookiecutter.repo_name}} \ - {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ python -u src/train_model.py --multirun ``` @@ -651,7 +669,7 @@ by default. -v .\mlruns:/home/aisg/{{cookiecutter.repo_name}}/mlruns ` -v .\models:/home/aisg/{{cookiecutter.repo_name}}/models ` -w /home/aisg/{{cookiecutter.repo_name}} ` - {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` python -u src/train_model.py --multirun ``` @@ -663,7 +681,7 @@ by default. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -train-hp \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ @@ -671,7 +689,13 @@ by default. -e MLFLOW_TRACKING_PASSWORD= \ -e MLFLOW_HPTUNING_TAG=$(date +%s) \ -e OMP_NUM_THREADS=2 \ - --command -- '/bin/bash -c "python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3"' + --command -- /bin/bash -c "python -u src/train_model.py --multirun \ + data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed \ + setup_mlflow=true \ + mlflow_tracking_uri= \ + mlflow_exp_name= \ + model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models \ + epochs=3" ``` === "Windows PowerShell" @@ -680,7 +704,7 @@ by default. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit ` --job-name-prefix -train ` - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 ` + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 ` --working-dir /home/aisg/{{cookiecutter.repo_name}} ` --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ @@ -688,7 +712,7 @@ by default. -e MLFLOW_TRACKING_PASSWORD= ` -e MLFLOW_HPTUNING_TAG=$(Get-Date -UFormat %s -Millisecond 0) ` -e OMP_NUM_THREADS=2 ` - --command -- '/bin/bash -c "python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3"' + --command -- /bin/bash -c "python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3" ``` === "VSCode Server Terminal" @@ -697,7 +721,7 @@ by default. # Switch working-dir to //workspaces//{{cookiecutter.repo_name}} to use the repo in the PVC runai submit \ --job-name-prefix -train-hp \ - -i {{cookiecutter.registry_project_path}}/model-training:0.1.0 \ + -i {{cookiecutter.registry_project_path}}/gpu:0.1.0 \ --working-dir /home/aisg/{{cookiecutter.repo_name}} \ --existing-pvc claimname=,path=/ \ --cpu 2 --cpu-limit 2 --memory 4G --memory-limit 4G --backoff-limit 1 \ @@ -705,7 +729,13 @@ by default. -e MLFLOW_TRACKING_PASSWORD= \ -e MLFLOW_HPTUNING_TAG=$(date +%s) \ -e OMP_NUM_THREADS=2 \ - --command -- '/bin/bash -c "python -u src/train_model.py --multirun data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed setup_mlflow=true mlflow_tracking_uri= mlflow_exp_name= model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models epochs=3"' + --command -- /bin/bash -c "python -u src/train_model.py --multirun \ + data_dir_path=//workspaces//data/processed/mnist-pngs-data-aisg-processed \ + setup_mlflow=true \ + mlflow_tracking_uri= \ + mlflow_exp_name= \ + model_checkpoint_dir_path=//workspaces//{{cookiecutter.repo_name}}/models \ + epochs=3" ``` ![MLflow Tracking Server - Hyperparameter Tuning Runs](assets/screenshots/mlflow-tracking-hptuning-runs.png) From cc5ba2be425d862c8f1a07613cb037848f2ac162 Mon Sep 17 00:00:00 2001 From: Syakyr Surani Date: Tue, 3 Sep 2024 03:07:59 +0000 Subject: [PATCH 6/6] fix: added micromamba option for gpu images built for outside RunAI use --- .../docker/{{cookiecutter.repo_name}}-gpu.Dockerfile | 11 +++++++++-- .../docker/{{cookiecutter.repo_name}}-gpu.Dockerfile | 12 ++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile b/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile index 42ca0df..7d9ac44 100644 --- a/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile +++ b/{{cookiecutter.repo_name}}/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile @@ -13,12 +13,13 @@ ARG REPO_DIR="." RUN useradd -l -m -s /bin/bash -u ${NON_ROOT_UID} ${NON_ROOT_USER} -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - RUN apt update && \ apt -y install curl git && \ apt clean +# Use this if deployed outside RunAI +#RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + ENV PYTHONIOENCODING utf8 ENV LANG "C.UTF-8" ENV LC_ALL "C.UTF-8" @@ -34,3 +35,9 @@ COPY --chown=${NON_ROOT_USER}:${NON_ROOT_GID} ${REPO_DIR} {{cookiecutter.repo_na # Install pip requirements RUN pip install -r {{cookiecutter.repo_name}}/requirements.txt + +# Use this if deployed outside RunAI +#RUN micromamba shell init -s bash -p ~/micromamba +#RUN micromamba install python=3.12.4 -c defaults -n base -y +#RUN micromamba run -n base pip install -r {{cookiecutter.repo_name}}/requirements.txt +#RUN echo 'alias python="micromamba run -n base python"' >> "${HOME_DIR}/.bashrc" diff --git a/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile b/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile index a5f2136..1cac6b5 100644 --- a/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile +++ b/{{cookiecutter.repo_name}}/problem-templates/cv/docker/{{cookiecutter.repo_name}}-gpu.Dockerfile @@ -13,12 +13,13 @@ ARG REPO_DIR="." RUN useradd -l -m -s /bin/bash -u ${NON_ROOT_UID} ${NON_ROOT_USER} -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - RUN apt update && \ apt -y install curl git && \ apt clean +# Use this if deployed outside RunAI +#RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + ENV PYTHONIOENCODING utf8 ENV LANG "C.UTF-8" ENV LC_ALL "C.UTF-8" @@ -35,3 +36,10 @@ COPY --chown=${NON_ROOT_USER}:${NON_ROOT_GID} ${REPO_DIR} {{cookiecutter.repo_na # Install pip requirements RUN pip install -r {{cookiecutter.repo_name}}/requirements.txt RUN pip install -r {{cookiecutter.repo_name}}/pytorch-gpu-requirements.txt + +# Use this if deployed outside RunAI +#RUN micromamba shell init -s bash -p ~/micromamba +#RUN micromamba install python=3.12.4 -c defaults -n base -y +#RUN micromamba run -n base pip install -r {{cookiecutter.repo_name}}/requirements.txt +#RUN micromamba run -n base pip install -r {{cookiecutter.repo_name}}/pytorch-gpu-requirements.txt +#RUN echo 'alias python="micromamba run -n base python"' >> "${HOME_DIR}/.bashrc"