Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import airflow
- import os
- from airflow import DAG
- from airflow.operators.bash import BashOperator
- from datetime import date, datetime
- # прописываем пути
- os.environ['HADOOP_CONF_DIR'] = '/etc/hadoop/conf'
- os.environ['YARN_CONF_DIR'] = '/etc/hadoop/conf'
- os.environ['JAVA_HOME']='/usr'
- os.environ['SPARK_HOME'] ='/usr/lib/spark'
- os.environ['PYTHONPATH'] ='/usr/local/lib/python3.8'
- # задаём базовые аргументы
- default_args = {
- 'start_date': datetime.today(),
- 'owner': 'airflow'
- }
- # вызываем DAG
- dag = DAG("example_bash_dag",
- schedule_interval='@daily',
- default_args=default_args
- )
- # объявляем задачу с Bash-командой, которая распечатывает дату
- t1 = BashOperator(
- task_id='print_date',
- bash_command="/usr/lib/spark/bin/spark-submit --master yarn --deploy-mode cluster /lessons/partition.py '2022-05-31' '/user/master/data/events' '/user/kotlyarovb/data/events'",
- retries=3,
- dag=dag
- )
- t1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement