Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

control-service: Clean up old data job configurations #2075

Merged
merged 3 commits into from
May 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,54 +133,6 @@ Also, we can't use a single if because lazy evaluation is not an option
{{- end -}}
{{- end -}}

{{/*
doks5 marked this conversation as resolved.
Show resolved Hide resolved
Return the proper Control Service data jobs base image where data job run in.
*/}}
{{- define "pipelines-control-service.deploymentDataJobBaseImage" -}}
{{- $globalRegistryOverrideName := .Values.deploymentDataJobBaseImage.globalRegistryOverride -}}
{{- $registryName := .Values.deploymentDataJobBaseImage.registry -}}
{{- $repositoryName := .Values.deploymentDataJobBaseImage.repository -}}
{{- $tag := .Values.deploymentDataJobBaseImage.tag | toString -}}
{{/*
Helm 2.11 supports the assignment of a value to a variable defined in a different scope,
but Helm 2.9 and 2.10 doesn't support it, so we need to implement this if-else logic.
Also, we can't use a single if because lazy evaluation is not an option
*/}}
{{- if $globalRegistryOverrideName }}
{{- printf "%s/%s:%s" $globalRegistryOverrideName $repositoryName $tag -}}
{{- else if .Values.global.imageRegistry }}
{{- printf "%s/%s:%s" .Values.global.imageRegistry $repositoryName $tag -}}
{{- else -}}
{{- printf "%s/%s:%s" $registryName $repositoryName $tag -}}
{{- end -}}
{{- end -}}

{{/*
Return the proper Control Service vdk image where the vdk (sdk) is installed and used during data job runs.
*/}}
{{- define "pipelines-control-service.deploymentVdkDistributionImageRepository" -}}
{{- $globalRegistryOverrideName := .Values.deploymentVdkDistributionImage.globalRegistryOverride -}}
{{- $registryName := .Values.deploymentVdkDistributionImage.registry -}}
{{- $repositoryName := .Values.deploymentVdkDistributionImage.repository -}}

{{/*
Helm 2.11 supports the assignment of a value to a variable defined in a different scope,
but Helm 2.9 and 2.10 doesn't support it, so we need to implement this if-else logic.
Also, we can't use a single if because lazy evaluation is not an option
*/}}
{{- if $globalRegistryOverrideName }}
{{- printf "%s/%s" $globalRegistryOverrideName $repositoryName -}}
{{- else if .Values.global.imageRegistry }}
{{- printf "%s/%s" .Values.global.imageRegistry $repositoryName -}}
{{- else -}}
{{- printf "%s/%s" $registryName $repositoryName -}}
{{- end -}}
{{- end -}}

{{- define "pipelines-control-service.deploymentVdkDistributionImage" -}}
doks5 marked this conversation as resolved.
Show resolved Hide resolved
{{- $tag := .Values.deploymentVdkDistributionImage.tag | toString -}}
{{- printf "%s:%s" (include "pipelines-control-service.deploymentVdkDistributionImageRepository" .) $tag -}}
{{- end -}}

{{/*
Create the name of the deployment Kubernetes namespace used by Data Jobs
Expand Down Expand Up @@ -274,7 +226,3 @@ Image Pull Secret in json format
{{- define "builderPullSecretJson" }}
{{ include "buildImagePullSecretJson" (list .Values.deploymentBuilderImage.registry .Values.deploymentBuilderImage.username .Values.deploymentBuilderImage.password) }}
{{- end }}

{{- define "basePullSecretJson" }}
{{ include "buildImagePullSecretJson" (list .Values.deploymentDataJobBaseImage.registry .Values.deploymentDataJobBaseImage.username .Values.deploymentDataJobBaseImage.password) }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,8 @@ spec:
{{- end }}
- name: DOCKER_REPOSITORY_URL
value: "{{ .Values.deploymentDockerRepository }}"
- name: DOCKER_VDK_BASE_IMAGE
value: {{ template "pipelines-control-service.deploymentVdkDistributionImage" . }}
- name: DATAJOBS_BUILDER_IMAGE
value: {{ template "pipelines-control-service.deploymentBuilderImage" . }}
- name: DATAJOBS_DEPLOYMENT_DATAJOBBASEIMAGE
value: {{ template "pipelines-control-service.deploymentDataJobBaseImage" . }}
- name: SERVER_MAX_HTTP_HEADER_SIZE
value: "{{ .Values.server.maxHttpHeaderSize }}"
- name: DB_JDBC_URL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,10 @@ deploymentDockerRegistryPassword: ""
deploymentDockerRegistryUsernameReadOnly: ""
deploymentDockerRegistryPasswordReadOnly: ""

## Image name of VDK which will be used to run the data jobs.
## It is recommended to use image with same tag (e.g release or latest)
## As it will be pulled before any execution thus making sure all jobs use the same and latest version of VDK.
## The image should contain installation of VDK (vdk) which will be used to run the data jobs (vdk run command)
## Only the installed python modules (vdk and its dependencies) will be used from the image.
## Everything else is effectively discarded since another image is used as base during execution.
## Override if you build VDK SDK yourself instead of using one provided by default.

## The registry and associated credentials needed to pull the vdk sdk image.
## These are stored in a kubernetes secret.
## IMPORTANT: The registry should be the same as the one specified in the deploymentSupportedPythonVersions
deploymentVdkDistributionImage:
registry: registry.hub.docker.com/versatiledatakit

Expand All @@ -270,19 +267,12 @@ deploymentVdkDistributionImage:
registryUsernameReadOnly: ""
registryPasswordReadOnly: ""

repository: quickstart-vdk
tag: "release"


# The base image which will be used to create the image where data job would be run.
# On top of it the job source and its dependencies are installed for each job.
# It can be customized further by installing other native dependencies necessary to run different kinds of libraries
# For example the below default image adds support for Oracle client on top of standard python image.
# The SDK (vdk) is installed separate image (to enable zero time upgrades) see deploymentVdkDistributionImage
## The registry URL and associated credentials needed to pull the data job base image.
## These are stored in a kubernetes secret.
## IMPORTANT: The registry should be the same as the one specified in the deploymentSupportedPythonVersions
deploymentDataJobBaseImage:
doks5 marked this conversation as resolved.
Show resolved Hide resolved
registry: registry.hub.docker.com/versatiledatakit
doks5 marked this conversation as resolved.
Show resolved Hide resolved
repository: data-job-base-python-3.7
tag: "latest"
username:
password:

Expand All @@ -307,13 +297,16 @@ deploymentDataJobBaseImage:
## 3.7:
## baseImage: "registry.hub.docker.com/versatiledatakit/data-job-base-python-3.7:latest"
## vdkImage: "registry.hub.docker.com/versatiledatakit/quickstart-vdk:release"
deploymentSupportedPythonVersions: {}
deploymentSupportedPythonVersions:
3.9:
baseImage: "registry.hub.docker.com/versatiledatakit/data-job-base-python-3.9:latest"
vdkImage: "registry.hub.docker.com/versatiledatakit/quickstart-vdk:release"


## Default python version to be used for data job deployments. The value should be a string and would be
## used when a user has not provided a specific python_version for their data job's deployment.
## Example: "3.9"
deploymentDefaultPythonVersion: ""
deploymentDefaultPythonVersion: "3.9"

## ini formatted options that provides default and per-job VDK options.
## Allows to provide VDK configuration via environment variables.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@
@TestPropertySource(
properties = {
"datajobs.control.k8s.k8sSupportsV1CronJob=true",
"datajobs.vdk.image=",
"datajobs.deployment.dataJobBaseImage="
})
@SpringBootTest(
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ datajobs.aws.secretAccessKey=
# Path to an ini config file that contains vdk runtime options
# src/main/resources/vdk_options.ini can be used for testing
datajobs.vdk_options_ini=
datajobs.vdk.image=registry.hub.docker.com/versatiledatakit/quickstart-vdk:release

datajobs.deployment.k8s.kubeconfig=${DEPLOYMENT_K8S_KUBECONFIG:}
datajobs.deployment.k8s.namespace=${DEPLOYMENT_K8S_NAMESPACE:default}
Expand Down Expand Up @@ -59,7 +58,6 @@ datajobs.builder.registrySecret=${DATAJOBS_BUILDER_REGISTRY_SECRET:}

datajobs.builder.image=ghcr.io/versatile-data-kit-dev/versatiledatakit/job-builder:1.3.3
datajobs.proxy.repositoryUrl=${DOCKER_REGISTRY_URL}
datajobs.deployment.dataJobBaseImage=versatiledatakit/data-job-base-python-3.7:latest

datajobs.deployment.builder.extraArgs=--force

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ public class JobImageBuilder {
@Value("${datajobs.docker.registryPassword:}")
private String registryPassword;

@Value("${datajobs.deployment.dataJobBaseImage:python:3.9-slim}")
private String deploymentDataJobBaseImage;

@Value("${datajobs.deployment.builder.extraArgs:}")
private String builderJobExtraArgs;

Expand Down Expand Up @@ -138,10 +135,8 @@ public boolean buildImage(
}
}

// TODO: Remove when deploymentDataJobBaseImage deprecated.
if (jobDeployment.getPythonVersion() == null && deploymentDataJobBaseImage == null) {
log.warn(
"Missing pythonVersion and deploymentDataJobBaseImage. Data Job cannot be deployed.");
if (jobDeployment.getPythonVersion() == null) {
log.warn("Missing pythonVersion. Data Job cannot be deployed.");
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@
@Slf4j
public class JobImageDeployer {

// TODO: Out of the image we pick only the python vdk module. Remove when property deprecated
// It may make sense to just pass the module name as argument instead of an image ???
@Value("${datajobs.vdk.image}")
private String vdkImage;

@Value("${datajobs.docker.registrySecret:}")
private String dockerRegistrySecret = "";

Expand Down Expand Up @@ -258,7 +253,7 @@ private void updateCronJob(DataJob dataJob, JobDeployment jobDeployment, String
"-c",
"cp -r $(python -c \"from distutils.sysconfig import get_python_lib;"
+ " print(get_python_lib())\") /vdk/. && cp /usr/local/bin/vdk /vdk/.");
var jobVdkImage = getJobVdkImage(jobDeployment);
var jobVdkImage = supportedPythonVersions.getVdkImage(jobDeployment.getPythonVersion());
var jobInitContainer =
KubernetesService.container(
"vdk",
Expand Down Expand Up @@ -311,21 +306,6 @@ private void updateCronJob(DataJob dataJob, JobDeployment jobDeployment, String
}
}

private String getJobVdkImage(JobDeployment jobDeployment) {
// TODO: Refactor when vdkImage is deprecated.
if (!supportedPythonVersions.getSupportedPythonVersions().isEmpty()
&& supportedPythonVersions.isPythonVersionSupported(jobDeployment.getPythonVersion())) {
return supportedPythonVersions.getVdkImage(jobDeployment.getPythonVersion());
} else {
if (StringUtils.isNotBlank(jobDeployment.getVdkVersion())
&& StringUtils.isNotBlank(vdkImage)) {
return DockerImageName.updateImageWithTag(vdkImage, jobDeployment.getVdkVersion());
} else {
return vdkImage;
}
}
}

/**
* Returns the environment variables set to vdk that are based on job configuration. Those are
* automatically injected during cloud runs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ public class SupportedPythonVersions {
@Value("${datajobs.deployment.defaultPythonVersion}")
private String defaultPythonVersion;

// TODO: Remove when property is deprecated.
@Value("${datajobs.deployment.dataJobBaseImage:python:3.9-slim}")
private String deploymentDataJobBaseImage;

/**
* Check if the pythonVersion passed by the user is supported by the Control Service.
*
Expand Down Expand Up @@ -72,8 +68,6 @@ public Set<String> getSupportedPythonVersions() {
public String getJobBaseImage(String pythonVersion) {
if (supportedPythonVersions != null && isPythonVersionSupported(pythonVersion)) {
return supportedPythonVersions.get(pythonVersion).get(BASE_IMAGE);
} else if (deploymentDataJobBaseImage != null && !deploymentDataJobBaseImage.isBlank()) {
return deploymentDataJobBaseImage;
} else {
log.warn(
"An issue with the passed pythonVersion or supportedPythonVersions configuration has"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ datajobs.aws.region=${AWS_REGION}
datajobs.aws.accessKeyId=${AWS_ACCESS_KEY_ID}
datajobs.aws.secretAccessKey=${AWS_ACCESS_KEY_SECRET}
datajobs.docker.repositoryUrl=${DOCKER_REPOSITORY_URL}
datajobs.vdk.image=${DOCKER_VDK_BASE_IMAGE}
datajobs.docker.registryType=${DOCKER_REGISTRY_TYPE}
datajobs.docker.registryUsername=${DOCKER_REGISTRY_USERNAME}
datajobs.docker.registryPassword=${DOCKER_REGISTRY_PASSWORD}
Expand All @@ -57,7 +56,6 @@ datajobs.kadmin_password=${KADMIN_PASSWORD}

# This is the full image name of the data job builder
datajobs.builder.image=registry.hub.docker.com/versatiledatakit/job-builder:1.3.3
datajobs.deployment.dataJobBaseImage=registry.hub.docker.com/versatiledatakit/data-job-base-python-3.7:latest

# Path to an ini config file that contains vdk runtime options
datajobs.vdk_options_ini=${VDK_OPTIONS_INI}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ datajobs.deployment.dataJobBaseImage=python:3.9-slim

# The map of python version and respective data job base and vdk images that would be
# used for data job deployments
datajobs.deployment.supportedPythonVersions=${DATAJOBS_DEPLOYMENT_SUPPORTED_PYTHON_VERSIONS:{}}
datajobs.deployment.supportedPythonVersions=${DATAJOBS_DEPLOYMENT_SUPPORTED_PYTHON_VERSIONS:{3.9: {vdkImage: 'registry.hub.docker.com/versatiledatakit/quickstart-vdk:release', baseImage: 'python:3.9-slim'}}}
# The default python version, which is to be used when selecting the vdk and job base images from
# the supportedPythonVersions for data job deployments
datajobs.deployment.defaultPythonVersion=${DATAJOBS_DEPLOYMENT_DEFAULT_PYTHON_VERSION:#{null}}
datajobs.deployment.defaultPythonVersion=${DATAJOBS_DEPLOYMENT_DEFAULT_PYTHON_VERSION:3.9}

#Configuration variables used for data job execution cleanup
#This is a spring cron expression, used to schedule the clean up job / default is every 3 hours
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ public class JobImageBuilderTest {
public void setUp() {
ReflectionTestUtils.setField(jobImageBuilder, "dockerRepositoryUrl", "test-docker-repository");
ReflectionTestUtils.setField(jobImageBuilder, "registryType", "ecr");
ReflectionTestUtils.setField(jobImageBuilder, "deploymentDataJobBaseImage", "python:3.7-slim");
ReflectionTestUtils.setField(jobImageBuilder, "builderJobExtraArgs", "");

when(awsCredentialsService.createTemporaryCredentials())
Expand Down Expand Up @@ -97,6 +96,7 @@ public void buildImage_notExist_success() throws InterruptedException, ApiExcept
jobDeployment.setDataJobName(TEST_JOB_NAME);
jobDeployment.setGitCommitSha("test-commit");
jobDeployment.setEnabled(true);
jobDeployment.setPythonVersion("3.7");

var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);

Expand Down Expand Up @@ -140,6 +140,7 @@ public void buildImage_builderRunning_oldBuilderDeleted()
jobDeployment.setDataJobName(TEST_JOB_NAME);
jobDeployment.setGitCommitSha("test-commit");
jobDeployment.setEnabled(true);
jobDeployment.setPythonVersion("3.7");

var result = jobImageBuilder.buildImage(TEST_IMAGE_NAME, testDataJob, jobDeployment, true);

Expand Down Expand Up @@ -175,6 +176,7 @@ public void buildImage_imageExists_buildSkipped()
jobDeployment.setDataJobName(TEST_JOB_NAME);
jobDeployment.setGitCommitSha("test-commit");
jobDeployment.setEnabled(true);
jobDeployment.setPythonVersion("3.7");

var result = jobImageBuilder.buildImage(TEST_IMAGE_NAME, testDataJob, jobDeployment, true);

Expand Down Expand Up @@ -216,6 +218,7 @@ public void buildImage_jobFailed_failure()
jobDeployment.setDataJobName(TEST_JOB_NAME);
jobDeployment.setGitCommitSha("test-commit");
jobDeployment.setEnabled(true);
jobDeployment.setPythonVersion("3.7");

var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);

Expand Down Expand Up @@ -254,8 +257,6 @@ public void buildImage_jobFailed_failure()
public void
buildImage_deploymentDataJobBaseImageNullAndSupportedPythonVersions_shouldCreateCronjobUsingSupportedPythonVersions()
throws InterruptedException, ApiException, IOException {
// Set base image property to null
ReflectionTestUtils.setField(jobImageBuilder, "deploymentDataJobBaseImage", null);

when(dockerRegistryService.builderImage()).thenReturn(TEST_BUILDER_IMAGE_NAME);
when(kubernetesService.listJobs()).thenReturn(Collections.emptySet());
Expand Down Expand Up @@ -301,62 +302,8 @@ public void buildImage_jobFailed_failure()
}

@Test
public void
buildImage_deploymentDataJobBaseImageNotNull_shouldCreateCronjobUsingSupportedPythonVersions()
throws InterruptedException, ApiException, IOException {
ReflectionTestUtils.setField(
supportedPythonVersions, "deploymentDataJobBaseImage", "python:3.7-slim");
ReflectionTestUtils.setField(
supportedPythonVersions, "supportedPythonVersions", generateSupportedPythonVersionsConf());
when(dockerRegistryService.builderImage()).thenReturn(TEST_BUILDER_IMAGE_NAME);
when(kubernetesService.listJobs()).thenReturn(Collections.emptySet());
var builderJobResult =
new KubernetesService.JobStatusCondition(true, "type", "test-reason", "test-message", 0);
when(kubernetesService.watchJob(any(), anyInt(), any())).thenReturn(builderJobResult);
when(supportedPythonVersions.isPythonVersionSupported("3.11")).thenReturn(true);
when(supportedPythonVersions.getJobBaseImage(any())).thenCallRealMethod();

JobDeployment jobDeployment = new JobDeployment();
jobDeployment.setDataJobName(TEST_JOB_NAME);
jobDeployment.setGitCommitSha("test-commit");
jobDeployment.setEnabled(true);
jobDeployment.setPythonVersion("3.11");

ArgumentCaptor<Map<String, String>> captor = ArgumentCaptor.forClass(Map.class);

var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);

verify(kubernetesService)
.createJob(
eq(TEST_BUILDER_JOB_NAME),
eq(TEST_BUILDER_IMAGE_NAME),
eq(false),
eq(false),
captor.capture(),
any(),
any(),
any(),
any(),
any(),
any(),
anyLong(),
anyLong(),
anyLong(),
any(),
any());

Map<String, String> capturedEnvs = captor.getValue();
Assertions.assertEquals("python:3.11-slim", capturedEnvs.get("BASE_IMAGE"));

verify(kubernetesService).deleteJob(TEST_BUILDER_JOB_NAME);
Assertions.assertTrue(result);
}

@Test
public void buildImage_deploymentDataJobBaseImageNullAndPythonVersionNull_shouldNotCreateCronjob()
public void buildImage_PythonVersionNull_shouldNotCreateCronjob()
throws InterruptedException, ApiException, IOException {
// Set base image property to null
ReflectionTestUtils.setField(jobImageBuilder, "deploymentDataJobBaseImage", null);

JobDeployment jobDeployment = new JobDeployment();
jobDeployment.setDataJobName(TEST_JOB_NAME);
Expand Down
Loading