Browse Source

Initial commit.

Thomas Kandler 3 years ago
parent
commit
bfd32658c5
2 changed files with 268 additions and 0 deletions
  1. 1
    0
      bash_oft.sh
  2. 267
    0
      zonalStatBenchmark.py

+ 1
- 0
bash_oft.sh View File

@@ -0,0 +1 @@
1
+resX=$(pkinfo -i "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/wc2.0_10m_tavg_07.tif" -ns | cut -d ' ' -f 2);echo $resX; resY=$(pkinfo -i "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/wc2.0_10m_tavg_07.tif" -nl | cut -d ' ' -f 2);echo $resY;xmin=$(pkinfo -i "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/wc2.0_10m_tavg_07.tif" -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/wc2.0_10m_tavg_07.tif" -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/wc2.0_10m_tavg_07.tif" -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/wc2.0_10m_tavg_07.tif" -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l "mask" "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/mask.shp" "/home/thomas/SpiderOak Hive/work_projects/zonal_benchmark/test_data/mask.tif"

+ 267
- 0
zonalStatBenchmark.py View File

@@ -0,0 +1,267 @@
1
+#!/usr/bin/python
2
+# -*- coding: utf-8 -*-
3
+
4
+import subprocess
5
+import sys
6
+import os
7
+from collections import defaultdict
8
+import re
9
+
10
+
11
+class zonalBenchmark:
12
+    '''
13
+    The script will provide a straight forward way to compare
14
+    different zonal statistic implementations.
15
+    '''  
16
+    
17
+    def __init__(self):
18
+        '''
19
+        Current state of implementation:
20
+        
21
+        1. oft-stat
22
+        2. pktools
23
+        3. Python/GDAL (perrygeo - gist)
24
+        4. GRASS
25
+        5. R
26
+        6. SAGA
27
+        ( ^ those are implemented)
28
+
29
+        7. QGIS
30
+        8. rasterstats
31
+        9. starspan
32
+        10. R - multicore
33
+        11. PostGIS
34
+        12. GeoTools
35
+        ( ^ those not yet)
36
+        
37
+        '''
38
+
39
+
40
+    def checkInstallStatus():
41
+        '''
42
+        Check if the selected provider(s) are available on the 
43
+        (linux) machine.
44
+        '''
45
+        
46
+    def getArguments(self, args):
47
+        '''
48
+        Get the links to the data which has been provided by the CLI 
49
+        and hand it to the execution. Checks also if the data exists.
50
+        ''' 
51
+        
52
+        # show help for no arguments
53
+        if len(args[0]) < 2:
54
+            zonalBenchmark().showUsage()
55
+            sys.exit()
56
+        try:
57
+            if args[0][1] == '-h' or args[0][1] == '--help':
58
+                zonalBenchmark().showUsage()
59
+                sys.exit()
60
+        except IndexError:
61
+            zonalBenchmark().showUsage()
62
+            sys.exit()
63
+             
64
+        # get arguments for tools, split them, convert to int and sort
65
+        try:
66
+            toolsArgs = args[0][1].split('-')
67
+            tools = sorted(map(int, toolsArgs), key=int)
68
+        except (TypeError, ValueError) as e:
69
+            zonalBenchmark(args).showUsage()
70
+            sys.exit() 
71
+        
72
+        # dict of implemented tools
73
+        availableTools = {1:'Open Foris Tools',
74
+                            2: 'pktools',
75
+                            3: 'perrygeo',
76
+                            4: 'GRASS',
77
+                            5: 'R',
78
+                            6: 'SAGA'}
79
+        selectedTools = {}
80
+        
81
+        # create dict for selected tools
82
+        for item in tools:
83
+            if item in availableTools:
84
+                selectedTools.update({item: availableTools[item]})
85
+        
86
+        # get input file
87
+        workingDir = os.path.dirname(os.path.realpath(__file__))
88
+        try:
89
+            inputArg = args[0][2]
90
+            inputFile = os.path.join(workingDir, inputArg)
91
+            if not os.path.isfile(inputFile):
92
+                print 'ERROR: Input file does not exist\You can run \'{0} --help\' for usage of the program.\n'.format(str(args[0][0]).lstrip('./'))
93
+                sys.exit()
94
+        except (IndexError, ValueError) as e:
95
+            zonalBenchmark().showUsage()
96
+        
97
+        # get mask file        
98
+        try:
99
+            maskArg = args[0][3]
100
+            maskFile = os.path.join(workingDir, maskArg)
101
+            if not os.path.isfile(maskFile):
102
+                print 'ERROR: Mask file does not exist\nYou can run \'{0} --help\' for usage of the program.\n'.format(str(args[0][0]).lstrip('./'))
103
+                sys.exit()
104
+        except (IndexError, ValueError) as e:
105
+            zonalBenchmark().showUsage()
106
+            
107
+        # get number of repetitions
108
+        try:
109
+            repArg = args[0][4]
110
+        except (IndexError, ValueError) as e:
111
+            zonalBenchmark().showUsage()
112
+            
113
+
114
+        return selectedTools, inputFile, maskFile, repArg
115
+        
116
+        
117
+    def setCommands(self, selectedTools, inputFile, maskFile):
118
+        '''
119
+        Constructs the actual command(s) for the selected providers.
120
+        '''   
121
+        selectedCommands = {}
122
+        if 1 in selectedTools:
123
+            
124
+            maskFileName = os.path.basename(maskFile)
125
+            maskTifName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".tif"
126
+            maskShpName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".shp"
127
+            
128
+            f = open('bash_oft.sh', 'w')
129
+            f.write('''resX=$(pkinfo -i "{0}" -ns | cut -d ' ' -f 2);echo $resX; resY=$(pkinfo -i "{0}" -nl | cut -d ' ' -f 2);echo $resY;xmin=$(pkinfo -i "{0}" -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i "{0}" -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i "{0}" -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i "{0}" -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l "{2}" "{1}" "{3}"'''.format(inputFile, maskShpName, maskFileName[:-4], maskTifName)) 
130
+
131
+            selectedCommands.update({1:'bash_script'})
132
+            
133
+        if 2 in selectedTools:
134
+            #~ maskFileShp = maskFile.replace('tif','shp')
135
+            pkCMD = 'pkextract -f \'ESRI Shapefile\' -s {0} -i {1} -o /tmp/pk_out.shp -polygon --rule mean'.format(maskFile, inputFile)
136
+            selectedCommands.update({2:pkCMD})    
137
+        
138
+        if 6 in selectedTools:
139
+            sagaCMD = 'saga_cmd statistics_grid 5 -ZONES {0} -STATLIST {1} -OUTTAB /tmp/saga_out.txt'.format(maskFile, inputFile)
140
+            selectedCommands.update({6:sagaCMD})
141
+                     
142
+        
143
+        return selectedCommands
144
+        
145
+   
146
+        
147
+    def executeCommands(self, commands, repetitions): 
148
+        '''
149
+        Executes the command for the selected provider.
150
+        '''
151
+        timing = {}
152
+        secondsAvg = []
153
+        cpuAvg = []
154
+        
155
+        #~ '''/usr/bin/time -f "seconds: %e \ncpu-load: %P" /bin/bash -c 'resX=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -ns | cut -d '\'' '\'' -f 2);echo $resX; resY=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -nl | cut -d '\'' '\'' -f 2);echo $resY;xmin=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l mask test_data/mask.shp test_data/out.tif'
156
+        #~ '''
157
+        for key, value in commands.items():
158
+            reps = 0
159
+            for i in range(0,int(repetitions)): 
160
+                try:
161
+                    # check if the command should be run via .sh / bash script
162
+                    if commands[key] == 'bash_script':
163
+                        
164
+                        output = subprocess.check_output(
165
+                        '''/usr/bin/time -f "seconds: %e \ncpu-load: %P" /bin/bash -c ./bash_oft.sh | tail -2'''.format(value),
166
+                        shell=True,
167
+                        stderr=subprocess.STDOUT
168
+                        )
169
+                    else:
170
+                        print "nope?"                           
171
+                except subprocess.CalledProcessError as grepexc:
172
+                    print "error code", grepexc.returncode, grepexc.output
173
+                
174
+                # grep seconds and cpu-load from the string
175
+                seconds = float((re.split(":|\n", output[0:28].replace(" ", "")))[1])
176
+                cpuload = int(((re.split(":|\n", output[0:28].replace(" ", "")))[3]).replace("%", ""))
177
+                
178
+                # set output to 0 to prevent any 'misappropriation' of older outputs
179
+                output = "sec: 0 \n cpu:0"
180
+                
181
+                # increment reps and fill lists for timing & cpu load
182
+                secondsAvg.append(seconds)
183
+                cpuAvg.append(cpuload)
184
+                reps += 1
185
+                
186
+                
187
+            # update the dict with the average timing and cpuload, as well as a repetition counter    
188
+            timing.update({key:[round(sum(secondsAvg) / len(secondsAvg), 3), sum(cpuAvg) / len(cpuAvg), reps]})
189
+            
190
+            # clear the lists for the next values
191
+            secondsAvg[:] = []
192
+            cpuAvg[:] = []
193
+                        
194
+        print timing 
195
+        sys.exit()
196
+
197
+        return timing
198
+        
199
+        
200
+    def showOutput(self, output, selectedTools):
201
+        '''
202
+        Construct a pretty table to show a comparison of the programs
203
+        on the command line.
204
+        '''
205
+        # merge the names of the selected tools with the numerical representation
206
+        # save as a new dict        
207
+        dd = defaultdict(list)
208
+        for d in (selectedTools, output):
209
+            for key, value in d.iteritems():
210
+                dd[key].extend([value])  
211
+
212
+        print dd
213
+        out = ''
214
+        
215
+        out += "All selected tools ran successfully.\n\n"
216
+        # print with underscore
217
+        out += "\033[4m{:<8} {:<17} {:<10} {:<15}\033[0m\n".format('ID','Tool','Time', 'CPU_load')
218
+        
219
+        for k, v in dd.iteritems():
220
+            out += "{:<8} {:<17} {:<10} {:<10}\n".format(k, dd[k][0], dd[k][1][0], dd[k][1][1])          
221
+        
222
+        print out
223
+        #return out
224
+    
225
+    def showUsage(self):
226
+        #print zonalBenchmark.__init__.__doc__
227
+        print u'''Usage:
228
+        python zonalStatBenchmark [tools] [input raster] [input mask / shape]
229
+        
230
+        [tools]                expects a chain of numbers, seperated by a hyphen; 
231
+                               e.g. 1-2-6 for selecting oft-stat, pktools and SAGA
232
+        [input raster]         the raster file for input
233
+        [input mask / shape]   the raster or shape mask; be aware: some 
234
+                               tools allow only raster input, some only 
235
+                               vector input - so save a vector file and 
236
+                               its rasterization with the same name in 
237
+                               the same dir
238
+        
239
+        The following tools are available:
240
+        
241
+        1. oft-stat
242
+        2. pktools
243
+        3̶.̶ ̶P̶y̶t̶h̶o̶n̶/̶G̶D̶A̶L̶ ̶(̶p̶e̶r̶r̶y̶g̶e̶o̶ ̶-̶ ̶g̶i̶s̶t̶)̶
244
+        4̶.̶ ̶G̶R̶A̶S̶S̶
245
+        5̶.̶ ̶R̶
246
+        6. SAGA
247
+        '''
248
+        
249
+# userArgs[0] = selected providers
250
+# userArgs[1] = input TIF
251
+# userArgs[2] = masking shape
252
+# userArgs[3] = number of repetitions
253
+
254
+args = []
255
+args.append(sys.argv)
256
+
257
+userArgs = zonalBenchmark().getArguments(args)
258
+
259
+commands = zonalBenchmark().setCommands(userArgs[0], userArgs[1], userArgs[2])
260
+
261
+timeOutput = zonalBenchmark().executeCommands(commands, userArgs[3])
262
+
263
+zonalBenchmark().showOutput(timeOutput, userArgs[0])
264
+
265
+
266
+
267
+