Hi all I have trouble running this custom prefect docker image.
FROM prefecthq/prefect:2-python3.11
COPY requirements.txt .
RUN pip install -r requirements.txt --trusted-host pypi.python.org --no-cache-dir
RUN pip install -i https://pypi.cloud.soda.io soda-snowflake
RUN pip install -i https://pypi.cloud.soda.io soda-duckdb
COPY flows /opt/prefect/flows
CMD ["python", "flows/migration_flow.py"]
This is my docker image and this is my flow
from prefect_soda_core.tasks import soda_scan_execute
from prefect import flow, task
from prefect.artifacts import create_markdown_artifact
from prefect_soda_core.soda_configuration import SodaConfiguration
from prefect_soda_core.sodacl_check import SodaCLCheck
from prefect.blocks.system import Secret
from jinja2 import Environment, FileSystemLoader
import duckdb
from pathlib import Path
import json
motherduck_secret_block = Secret.load("sodamotherducktoken")
soda_id_secret_block = Secret.load("soda-api-key-id")
soda_secret_block = Secret.load("soda-api-key-secret")
snowflake_pass_block = Secret.load("snowflake-pass")
global_con = duckdb.connect(
f"md:my_db?motherduck_token={motherduck_secret_block.get()}"
)
def run_migration(file_name: Path):
"""Runs the data contract and stores the results in a file
Args:
file_name (str): The name of the file containing the data contract
"""
configuration_yaml_path = (
Path("./soda/config/configuration.yml").absolute().as_posix()
)
soda_config_block = SodaConfiguration(
configuration_yaml_path=configuration_yaml_path
)
soda_check_block = SodaCLCheck(sodacl_yaml_path=file_name)
scan_results = soda_scan_execute(
data_source_name="students_md",
configuration=soda_config_block,
checks=soda_check_block,
variables={
"TOKEN": motherduck_secret_block.get(),
"API_KEY": soda_id_secret_block.get(),
"API_SECRET": soda_secret_block.get(),
"SNOWPWD": snowflake_pass_block.get(),
},
verbose=True,
return_scan_result_file_content=False,
)
create_markdown_artifact_task(scan_results)
def json_to_markdown(data: str):
"""Converts json to markdown
Args:
data (str): json data
Returns:
str: markdown data
"""
data = json.loads(data)
failed_checks = []
for check in data.get("checks", []):
if check["outcome"] == "fail":
check_info = {
"name": check["name"],
"outcome": check["outcome"],
"check_value": check["diagnostics"]["value"],
"source": check["dataSource"],
"source_table": check["table"],
"column": check["column"],
}
failed_checks.append(check_info)
amount_failed = len(failed_checks)
path = Path("./markdown").absolute().as_posix()
environment = Environment(loader=FileSystemLoader(path))
template = environment.get_template("soda_report.md")
markdown = template.render(
data=data, failed_checks=failed_checks, amount_failed=amount_failed
)
return markdown
@task(name="create_markdown_artifact_migration")
def create_markdown_artifact_task(scan_output: str):
"""Builds a markdown artifact from the data contract results
Args:
data (str): file containing the data contract results
"""
markdown = json_to_markdown(scan_output)
create_markdown_artifact(
key="data-quality-report-data-contract",
markdown=markdown,
description="Soda Data Contract Results",
)
@flow
def migration_flow() -> bool:
"""Prefect flow for executing data contract
Returns:
bool: returns True
"""
raw_path = Path("./soda/checks/migration_checks.yml").absolute().as_posix()
run_migration(file_name=raw_path)
return True
if __name__ == "__main__":
migration_flow()
But i keep getting this error
TypeError: cannot pickle ‘classmethod’ object
It complains about my first import but this works fine when I deploy this on prefect with
prefect deploy
docker run migration_soda
I use this docker run command btw
Thanks in advance!