Advertisement
fmartinelli

Current PhEDEx datasets in a CMS site

May 28th, 2015
661
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.50 KB | None | 0 0
  1. #!/bin/env python
  2.  
  3. # by Derek Feichtinger
  4.  
  5. import os, sys
  6. import xml.dom.minidom
  7. from xml.dom.minidom import Node
  8. import datetime
  9. from optparse import OptionParser
  10.  
  11. def formatDate(timestamp):
  12.     #return datetime.datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
  13.     return datetime.datetime.fromtimestamp(float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
  14.    
  15.  
  16. ##################
  17. ### Options
  18. usage = "This tool queries the PhEDEx data service and creates a table containing "
  19. usage +="information on subscriptions to a site. Group selection is also possible.\n\n"
  20. usage +="Usage example: python "+sys.argv[0]+" --site T2_CH_CSCS --group local"
  21.  
  22. parser = OptionParser(usage = usage)
  23. parser.add_option("--site",
  24.                   action="store", dest="Site", default="",
  25.                   help="Site selection, e.g. T2_CH_CSCS")
  26. parser.add_option("--group",
  27.                   action="store", dest="Group", default="",
  28.                   help="PhEDEx group selection")
  29. (options, args) = parser.parse_args()
  30.  
  31.  
  32. #################
  33. ### Options check
  34. SITE= options.Site
  35. GROUP= options.Group
  36. CREATED_SINCE="0"
  37.  
  38. if SITE=="":
  39.     print "[ERROR] Please select a site using the --site option"
  40.     sys.exit(1)
  41.  
  42.  
  43. ################
  44. ### Getting the XML file from PhEDEx data service
  45. xmlFilename = "data.xml"
  46. command = "wget --no-check-certificate -O "+xmlFilename+" 'https://cmsweb.cern.ch/phedex/datasvc/xml/prod/Subscriptions?node="+SITE+"&create_since="+CREATED_SINCE+"&group="+GROUP+"' &> /dev/null"
  47. #print command
  48. print "Getting the data from the data service..."
  49. os.system(command)
  50.  
  51. ###############
  52. ### Opening the XML file
  53. xmlFile = open(xmlFilename)
  54. doc = xml.dom.minidom.parse(xmlFile)
  55.  
  56. #key is the subscription ID, each subscription is a dict, too
  57. subscriptionList = {}
  58.  
  59. ################
  60. ### Looping on datasets, using the Subscriptions data
  61. for node in doc.getElementsByTagName("dataset"):
  62.     size = 0
  63.     group = ""
  64.     reqID = 0
  65.     dataset =  node.attributes['name'].value
  66.     totalSize = node.attributes['bytes'].value
  67.     reqTime = ""
  68.     for subscr in node.getElementsByTagName("subscription"):
  69.         reqID = int(subscr.attributes['request'].value)
  70.         group =  subscr.attributes['group'].value
  71.         reqTime = formatDate(subscr.attributes['time_create'].value)
  72.         if subscr.attributes['node_bytes'].value != "":
  73.             size += float(subscr.attributes['node_bytes'].value)
  74.  
  75.     #Filling a dict
  76.     sub = {'group':group,
  77.            'size':(size)/(1024*1024*1024), 'totalSize':totalSize,
  78.            'dataset':dataset,
  79.            'created':reqTime,
  80.            'comments':'',
  81.            'comments2':'',
  82.            'name':'',
  83.            'email':''}
  84.     subscriptionList[reqID] = sub
  85.  
  86. requests = subscriptionList.keys()
  87. requests.sort()
  88.  
  89.  
  90. ####################
  91. ### Getting extra info for subscriptions (requestor, etc)
  92. ### One call for each dataset, sigh...
  93. for i in requests:
  94.     command = "wget --no-check-certificate -O req.xml 'https://cmsweb.cern.ch/phedex/datasvc/xml/prod/TransferRequests?node="+SITE+"&request="+str(i)+"' &> /dev/null"
  95.     os.system(command)
  96.     xmlReqFile = open("req.xml")
  97.     docReq = xml.dom.minidom.parse(xmlReqFile)
  98.     name = ""
  99.     email = ""
  100.     comment = ""
  101.     for subscr in docReq.getElementsByTagName("request"):
  102.         for rb in subscr.getElementsByTagName("requested_by"):
  103.             name = rb.attributes['name'].value
  104.             email = rb.attributes['email'].value
  105.             if len(rb.firstChild.childNodes)!=0:
  106.                 comment =  rb.firstChild.firstChild.data.replace("\n"," ")
  107.     subscriptionList[i]['name'] = name
  108.     subscriptionList[i]['email']= email
  109.     subscriptionList[i]['comments']= comment
  110.     myI = 0
  111.     for cmt in docReq.getElementsByTagName("comments"):
  112.         if len(cmt.childNodes)!=0:
  113.             if myI==0: subscriptionList[i]['comments'] = cmt.firstChild.data.replace("\n"," ")
  114.             else: subscriptionList[i]['comments2'] = cmt.firstChild.data.replace("\n"," ")
  115.         myI+=1
  116.     os.system("rm req.xml")
  117.    
  118.  
  119. ################
  120. ### Finally, printing the result
  121. print     "\n|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|%10s|" %("*keep?*","*ID*","*Dataset*","*Size(GB)*","*Group*","*Requested on*","*Requested by*","*Comments*","*Comments2*")
  122. for i in requests:
  123.     r = subscriptionList[i]
  124.     print "|%10s|%10s|%10s|%2.1f|%10s|%10s|%10s|%10s|%10s|" %("",str(i),r["dataset"], r["size"],r["group"],r["created"],r["name"],r["comments"],r['comments2'])
  125.  
  126. os.system("rm "+xmlFilename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement