Advertisement
PeachLemonade

dag_bash

Mar 14th, 2024 (edited)
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.05 KB | None | 0 0
  1. import airflow
  2. import os
  3. from airflow import DAG
  4. from airflow.operators.bash import BashOperator
  5. from datetime import date, datetime
  6.  
  7. # прописываем пути
  8. os.environ['HADOOP_CONF_DIR'] = '/etc/hadoop/conf'
  9. os.environ['YARN_CONF_DIR'] = '/etc/hadoop/conf'
  10. os.environ['JAVA_HOME']='/usr'
  11. os.environ['SPARK_HOME'] ='/usr/lib/spark'
  12. os.environ['PYTHONPATH'] ='/usr/local/lib/python3.8'
  13.  
  14. # задаём базовые аргументы
  15. default_args = {
  16.     'start_date': datetime.today(),
  17.     'owner': 'airflow'
  18. }
  19.  
  20. # вызываем DAG
  21. dag = DAG("example_bash_dag",
  22.           schedule_interval='@daily',
  23.           default_args=default_args
  24.          )
  25.  
  26. # объявляем задачу с Bash-командой, которая распечатывает дату
  27. t1 = BashOperator(
  28.     task_id='print_date',
  29.     bash_command="/usr/lib/spark/bin/spark-submit --master yarn --deploy-mode cluster /lessons/partition.py '2022-05-31' '/user/master/data/events' '/user/kotlyarovb/data/events'",
  30.         retries=3,
  31.         dag=dag
  32. )
  33.  
  34. t1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement