123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
-
- import subprocess
- import sys
- import os
- from collections import defaultdict
- import re
- import time
-
-
- class zonalBenchmark:
- '''
- The script will provide a straight forward way to compare
- different zonal statistic implementations.
- '''
-
- def __init__(self):
- '''
- Current state of implementation:
-
- 1. oft-stat
- 2. pktools
- 3. GRASS
- 4. SAGA
- ( ^ those are implemented)
-
- 5. QGIS
- 6. rasterstats
- 7. starspan
- 8. R
- 9. R - multicore
- 10. PostGIS
- 11. GeoTools
- ( ^ those not yet)
-
- '''
-
- def checkInstallStatus():
- '''
- Check if the selected provider(s) are available on the
- (linux) machine.
- '''
-
- def getArguments(self, args):
- '''
- Get the links to the data which has been provided by the CLI
- and hand it to the execution. Checks also if the data exists.
- '''
-
- # show help for no arguments
- if len(args[0]) < 2:
- zonalBenchmark().showUsage()
- sys.exit()
- try:
- if args[0][1] == '-h' or args[0][1] == '--help':
- zonalBenchmark().showUsage()
- sys.exit()
- except IndexError:
- zonalBenchmark().showUsage()
- sys.exit()
-
- # get arguments for tools, split them, convert to int and sort
- try:
- toolsArgs = args[0][1].split('-')
- tools = sorted(map(int, toolsArgs), key=int)
- except (TypeError, ValueError) as e:
- zonalBenchmark(args).showUsage()
- sys.exit()
-
- # dict of implemented tools
- availableTools = {1: 'Open Foris Tools',
- 2: 'pktools',
- 3: 'GRASS',
- 4: 'SAGA'}
- selectedTools = {}
-
- # create dict for selected tools
- for item in tools:
- if item in availableTools:
- selectedTools.update({item: availableTools[item]})
-
- # get input file
- workingDir = os.path.dirname(os.path.realpath(__file__))
- try:
- inputArg = args[0][2]
- inputFile = os.path.join(workingDir, inputArg)
- if not os.path.isfile(inputFile):
- print 'ERROR: Input file does not exist\You can run \'{0} --help\' for usage of the program.\n'.format(str(args[0][0]).lstrip('./'))
- sys.exit()
- except (IndexError, ValueError) as e:
- zonalBenchmark().showUsage()
-
- # get mask file
- try:
- maskArg = args[0][3]
- maskFile = os.path.join(workingDir, maskArg)
- if not os.path.isfile(maskFile):
- print 'ERROR: Mask file does not exist\nYou can run \'{0} --help\' for usage of the program.\n'.format(str(args[0][0]).lstrip('./'))
- sys.exit()
- except (IndexError, ValueError) as e:
- zonalBenchmark().showUsage()
-
- # get number of repetitions
- try:
- repArg = args[0][4]
- except (IndexError, ValueError) as e:
- zonalBenchmark().showUsage()
-
- return selectedTools, inputFile, maskFile, repArg
-
- def setCommands(self, selectedTools, inputFile, maskFile):
- '''
- Constructs the actual command(s) for the selected providers.
- '''
- # create a raster from the shape
- maskFileName = os.path.basename(maskFile)
- maskTifName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".tif"
- maskShpName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".shp"
- f = open('bash_rasterize.sh', 'w')
- f.write('''resX=$(pkinfo -i "{0}" -ns | cut -d ' ' -f 2);echo $resX; resY=$(pkinfo -i "{0}" -nl | cut -d ' ' -f 2);echo $resY;xmin=$(pkinfo -i "{0}" -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i "{0}" -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i "{0}" -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i "{0}" -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l "{2}" "{1}" "{3}"'''.format(inputFile, maskShpName, maskFileName[:-4], maskTifName))
- subprocess.call('chmod +x ./bash_rasterize.sh', shell=True, stderr=subprocess.STDOUT)
-
- selectedCommands = {}
- if 1 in selectedTools:
- maskFileTif = maskFile.replace('shp', 'tif')
- oftCMD = 'oft-stat -i "{0}" -o /tmp/zonal_oft_out.txt -um "{1}"'.format(inputFile, maskFileTif)
- selectedCommands.update({1: oftCMD})
-
- if 2 in selectedTools:
- #~ maskFileShp = maskFile.replace('tif','shp')
- pkCMD = 'pkextract -f \'ESRI Shapefile\' -s "{0}" -i "{1}" -o /tmp/zonal_pkt_out.shp -polygon --rule mean'.format(maskFile, inputFile)
- selectedCommands.update({2: pkCMD})
-
- if 3 in selectedTools:
- s = open('bash_grass_set_cmd.sh', 'w')
- s.write('''resX=$(pkinfo -i "{0}" -ns | cut -d ' ' -f 2);resY=$(pkinfo -i "{0}" -nl | cut -d ' ' -f 2);xmin=$(pkinfo -i "{0}" -ulx | cut -d '=' -f 2);xmin_abs=${{xmin#-}};xmax=$(pkinfo -i "{0}" -lrx | cut -d '=' -f 2);ymax=$(pkinfo -i "{0}" -uly | cut -d '=' -f 2);ymin=$(pkinfo -i "{0}" -lry | cut -d '=' -f 2);ymin_abs=${{ymin#-}}
- INDIR="{1}"
- g.region n=$ymax e=$xmax s=$ymin w=$xmin rows=$resY cols=$resX
- r.in.gdal input={0} output=tmp_raster --overwrite
- r.in.gdal input={2} output=tmp_zones --overwrite
- r.univar -t map=tmp_raster zones=tmp_zones separator=comma output=/tmp/zonal_grass_out.csv --overwrite'''.format(inputFile, os.path.dirname(maskFile), maskTifName))
-
- t = open('bash_grass_exec_cmd.sh', 'w')
- t.write('''INDIR="{0}"
- epsg=$(pkinfo -i "{1}" -a_srs | tail -c -18 | cut -d '"' -f 4)
- grass72 -c epsg:$epsg "$INDIR"/grassdata/templocation -e
- export GRASS_BATCH_JOB="$INDIR/bash_grass_set_cmd.sh"
- export GRASS_VERBOSE=0
- grass72 "$INDIR"/grassdata/templocation/PERMANENT
- rm -rf "$INDIR"/grassdata/templocation'''.format(os.path.dirname(os.path.realpath(__file__)), inputFile))
-
- grassCMD = '/bin/bash -c "{0}/bash_grass_exec_cmd.sh > /dev/null 2>&1"'.format(os.path.dirname(os.path.realpath(__file__)))
- subprocess.call('chmod +x ./bash_grass_exec_cmd.sh', shell=True, stderr=subprocess.STDOUT)
- subprocess.call('chmod +x ./bash_grass_set_cmd.sh', shell=True, stderr=subprocess.STDOUT)
- selectedCommands.update({3: grassCMD})
-
- if 4 in selectedTools:
- maskFileTif = maskFile.replace('shp', 'tif')
- sagaCMD = 'saga_cmd statistics_grid 5 -ZONES "{0}" -STATLIST "{1}" -OUTTAB /tmp/zonal_saga_out.txt'.format(maskFileTif, inputFile)
- selectedCommands.update({4: sagaCMD})
-
- return selectedCommands
-
- def executeCommands(self, commands, repetitions):
- '''
- Executes the command for the selected provider.
- '''
- timing = {}
- secondsAvg = []
- cpuAvg = []
-
- # run the rasterization if neccessary
- rastRuns = 0
- for key in commands.items():
- if ((key[0] == 1 or key[0] == 3 or key[0] == 4) and rastRuns == 0):
- try:
- rasterize = subprocess.check_output('/bin/bash -c ./bash_rasterize.sh', shell=True, stderr=subprocess.STDOUT)
- rastRuns += 1
- except OSError:
- print "Bash file for rasterization not found."
- sys.exit()
-
- for key, value in commands.items():
- reps = 0
- for i in range(0, int(repetitions)):
- try:
- output = subprocess.check_output(
- '/usr/bin/time -f "seconds: %e \ncpu-load: %P" {0} | tail -2'.format(value),
- shell=True,
- stderr=subprocess.STDOUT
- )
- # play nice with programs which do not handle fast calls to well
- time.sleep(0.1)
- except subprocess.CalledProcessError as grepexc:
- print "error code", grepexc.returncode, grepexc.output
-
- # grep seconds and cpu-load from the string
- seconds = float((re.split(":|\n", output[0:28].replace(" ", "")))[1])
- cpuload = int(((re.split(":|\n", output[0:28].replace(" ", "")))[3]).replace("%", ""))
-
- # set output to 0 to prevent any 'misappropriation' of older outputs
- output = "sec: 0 \n cpu:0"
-
- # increment reps and fill lists for timing & cpu load
- secondsAvg.append(seconds)
- cpuAvg.append(cpuload)
- reps += 1
-
- # update the dict with the average timing and cpuload, as well as a repetition counter
- timing.update({key: [round(sum(secondsAvg) / len(secondsAvg), 3), sum(cpuAvg) / len(cpuAvg), reps]})
-
- # clear the lists for the next values
- secondsAvg[:] = []
- cpuAvg[:] = []
-
- # print timing
- # sys.exit()
-
- return timing
-
- def showOutput(self, output, selectedTools):
- '''
- Construct a pretty table to show a comparison of the programs
- on the command line.
- '''
- # merge the names of the selected tools with the numerical representation
- # save as a new dict
- dd = defaultdict(list)
- for d in (selectedTools, output):
- for key, value in d.iteritems():
- dd[key].extend([value])
-
- out = ''
-
- out += "All selected tools ran successfully.\n\n"
- # print with underscore
- out += "\033[4m{:<8} {:<17} {:<15} {:<15} {:<15}\033[0m\n".format('ID', 'Tool', 'seconds (avg)', 'CPU_load (avg)', 'No. of runs')
-
- for k, v in dd.iteritems():
- out += "{:<8} {:<17} {:<15} {:<15} {:<15}\n".format(k, dd[k][0], dd[k][1][0], dd[k][1][1], dd[k][1][2])
-
- print out
- # return out
-
- def showUsage(self):
- # print zonalBenchmark.__init__.__doc__
- print u'''Usage:
- python zonalStatBenchmark [tools] [input raster] [input mask / shape] [number of runs]
-
- [tools] expects a chain of numbers, seperated by a hyphen;
- e.g. 1-2-3 for selecting oft-stat, pktools and GRASS
-
- [input raster] the raster file for input
-
- [input mask / shape] the mask shape; expects a vector file which will be
- automatically rasterized if the selected toos require it
-
- [number of runs] the number of runs / repetitions per tool, e.g. "5" means
- every tool will run 5 times and the average of the runs will
- be calculated
-
- All arguments are mandatory.
-
- The following tools are available:
-
- 1. oft-stat
- 2. pktools
- 3. GRASS
- 4. SAGA
- '''
-
- # userArgs[0] = selected providers
- # userArgs[1] = input TIF
- # userArgs[2] = masking shape
- # userArgs[3] = number of repetitions
-
- args = []
- args.append(sys.argv)
-
- userArgs = zonalBenchmark().getArguments(args)
-
- commands = zonalBenchmark().setCommands(userArgs[0], userArgs[1], userArgs[2])
-
- timeOutput = zonalBenchmark().executeCommands(commands, userArgs[3])
-
- zonalBenchmark().showOutput(timeOutput, userArgs[0])
|