Browse Source

New tools

Thomas Kandler 4 years ago
parent
commit
b98a987698
4 changed files with 146 additions and 112 deletions
  1. 7
    0
      bash_grass_exec_cmd.sh
  2. 6
    0
      bash_grass_set_cmd.sh
  3. 1
    0
      bash_rasterize.sh
  4. 132
    112
      zonalStatBenchmark.py

+ 7
- 0
bash_grass_exec_cmd.sh View File

@@ -0,0 +1,7 @@
1
+INDIR="/home/thomas/tmp/zonalbenchmark"
2
+epsg=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -a_srs | tail -c -18 | cut -d '"' -f 4)
3
+grass72 -c epsg:$epsg "$INDIR"/grassdata/templocation -e
4
+export GRASS_BATCH_JOB="$INDIR/bash_grass_set_cmd.sh"
5
+export GRASS_VERBOSE=0
6
+grass72 "$INDIR"/grassdata/templocation/PERMANENT
7
+rm -rf "$INDIR"/grassdata/templocation

+ 6
- 0
bash_grass_set_cmd.sh View File

@@ -0,0 +1,6 @@
1
+resX=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -ns | cut -d ' ' -f 2);resY=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -nl | cut -d ' ' -f 2);xmin=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -ulx | cut -d '=' -f 2);xmin_abs=${xmin#-};xmax=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -lrx | cut -d '=' -f 2);ymax=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -uly | cut -d '=' -f 2);ymin=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -lry | cut -d '=' -f 2);ymin_abs=${ymin#-}
2
+INDIR="/home/thomas/tmp/zonalbenchmark/test_data"
3
+g.region n=$ymax e=$xmax s=$ymin w=$xmin rows=$resY cols=$resX
4
+r.in.gdal input=/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif output=tmp_raster --overwrite 
5
+r.in.gdal input=/home/thomas/tmp/zonalbenchmark/test_data/mask.tif output=tmp_zones --overwrite 
6
+r.univar -t map=tmp_raster zones=tmp_zones separator=comma output=/tmp/zonal_grass_out.csv --overwrite

+ 1
- 0
bash_rasterize.sh View File

@@ -0,0 +1 @@
1
+resX=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -ns | cut -d ' ' -f 2);echo $resX; resY=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -nl | cut -d ' ' -f 2);echo $resY;xmin=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i "/home/thomas/tmp/zonalbenchmark/test_data/wc2.0_10m_tavg_07.tif" -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l "mask" "/home/thomas/tmp/zonalbenchmark/test_data/mask.shp" "/home/thomas/tmp/zonalbenchmark/test_data/mask.tif"

+ 132
- 112
zonalStatBenchmark.py View File

@@ -6,49 +6,48 @@ import sys
6 6
 import os
7 7
 from collections import defaultdict
8 8
 import re
9
+import time
9 10
 
10 11
 
11 12
 class zonalBenchmark:
12 13
     '''
13 14
     The script will provide a straight forward way to compare
14 15
     different zonal statistic implementations.
15
-    '''  
16
-    
16
+    '''
17
+
17 18
     def __init__(self):
18 19
         '''
19 20
         Current state of implementation:
20
-        
21
+
21 22
         1. oft-stat
22 23
         2. pktools
23
-        3. Python/GDAL (perrygeo - gist)
24
-        4. GRASS
25
-        5. R
26
-        6. SAGA
24
+        3. GRASS
25
+        4. SAGA
27 26
         ( ^ those are implemented)
28 27
 
29
-        7. QGIS
30
-        8. rasterstats
31
-        9. starspan
32
-        10. R - multicore
33
-        11. PostGIS
34
-        12. GeoTools
28
+        5. QGIS
29
+        6. rasterstats
30
+        7. starspan
31
+        8. R
32
+        9. R - multicore
33
+        10. PostGIS
34
+        11. GeoTools
35 35
         ( ^ those not yet)
36
-        
37
-        '''
38 36
 
37
+        '''
39 38
 
40 39
     def checkInstallStatus():
41 40
         '''
42 41
         Check if the selected provider(s) are available on the 
43 42
         (linux) machine.
44 43
         '''
45
-        
44
+
46 45
     def getArguments(self, args):
47 46
         '''
48 47
         Get the links to the data which has been provided by the CLI 
49 48
         and hand it to the execution. Checks also if the data exists.
50
-        ''' 
51
-        
49
+        '''
50
+
52 51
         # show help for no arguments
53 52
         if len(args[0]) < 2:
54 53
             zonalBenchmark().showUsage()
@@ -60,29 +59,27 @@ class zonalBenchmark:
60 59
         except IndexError:
61 60
             zonalBenchmark().showUsage()
62 61
             sys.exit()
63
-             
62
+
64 63
         # get arguments for tools, split them, convert to int and sort
65 64
         try:
66 65
             toolsArgs = args[0][1].split('-')
67 66
             tools = sorted(map(int, toolsArgs), key=int)
68 67
         except (TypeError, ValueError) as e:
69 68
             zonalBenchmark(args).showUsage()
70
-            sys.exit() 
71
-        
69
+            sys.exit()
70
+
72 71
         # dict of implemented tools
73
-        availableTools = {1:'Open Foris Tools',
74
-                            2: 'pktools',
75
-                            3: 'perrygeo',
76
-                            4: 'GRASS',
77
-                            5: 'R',
78
-                            6: 'SAGA'}
72
+        availableTools = {1: 'Open Foris Tools',
73
+                          2: 'pktools',
74
+                          3: 'GRASS',
75
+                          4: 'SAGA'}
79 76
         selectedTools = {}
80
-        
77
+
81 78
         # create dict for selected tools
82 79
         for item in tools:
83 80
             if item in availableTools:
84 81
                 selectedTools.update({item: availableTools[item]})
85
-        
82
+
86 83
         # get input file
87 84
         workingDir = os.path.dirname(os.path.realpath(__file__))
88 85
         try:
@@ -93,8 +90,8 @@ class zonalBenchmark:
93 90
                 sys.exit()
94 91
         except (IndexError, ValueError) as e:
95 92
             zonalBenchmark().showUsage()
96
-        
97
-        # get mask file        
93
+
94
+        # get mask file
98 95
         try:
99 96
             maskArg = args[0][3]
100 97
             maskFile = os.path.join(workingDir, maskArg)
@@ -103,149 +100,176 @@ class zonalBenchmark:
103 100
                 sys.exit()
104 101
         except (IndexError, ValueError) as e:
105 102
             zonalBenchmark().showUsage()
106
-            
103
+
107 104
         # get number of repetitions
108 105
         try:
109 106
             repArg = args[0][4]
110 107
         except (IndexError, ValueError) as e:
111 108
             zonalBenchmark().showUsage()
112
-            
113 109
 
114 110
         return selectedTools, inputFile, maskFile, repArg
115
-        
116
-        
111
+
117 112
     def setCommands(self, selectedTools, inputFile, maskFile):
118 113
         '''
119 114
         Constructs the actual command(s) for the selected providers.
120
-        '''   
115
+        '''
116
+        # create a raster from the shape
117
+        maskFileName = os.path.basename(maskFile)
118
+        maskTifName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".tif"
119
+        maskShpName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".shp"
120
+        f = open('bash_rasterize.sh', 'w')
121
+        f.write('''resX=$(pkinfo -i "{0}" -ns | cut -d ' ' -f 2);echo $resX; resY=$(pkinfo -i "{0}" -nl | cut -d ' ' -f 2);echo $resY;xmin=$(pkinfo -i "{0}" -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i "{0}" -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i "{0}" -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i "{0}" -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l "{2}" "{1}" "{3}"'''.format(inputFile, maskShpName, maskFileName[:-4], maskTifName))
122
+        subprocess.call('chmod +x ./bash_rasterize.sh', shell=True, stderr=subprocess.STDOUT)
123
+
121 124
         selectedCommands = {}
122 125
         if 1 in selectedTools:
123
-            
124
-            maskFileName = os.path.basename(maskFile)
125
-            maskTifName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".tif"
126
-            maskShpName = os.path.dirname(maskFile) + '/' + maskFileName[:-4] + ".shp"
127
-            
128
-            f = open('bash_oft.sh', 'w')
129
-            f.write('''resX=$(pkinfo -i "{0}" -ns | cut -d ' ' -f 2);echo $resX; resY=$(pkinfo -i "{0}" -nl | cut -d ' ' -f 2);echo $resY;xmin=$(pkinfo -i "{0}" -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i "{0}" -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i "{0}" -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i "{0}" -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l "{2}" "{1}" "{3}"'''.format(inputFile, maskShpName, maskFileName[:-4], maskTifName)) 
130
-
131
-            selectedCommands.update({1:'bash_script'})
132
-            
126
+            maskFileTif = maskFile.replace('shp', 'tif')
127
+            oftCMD = 'oft-stat -i "{0}" -o /tmp/zonal_oft_out.txt -um "{1}"'.format(inputFile, maskFileTif)
128
+            selectedCommands.update({1: oftCMD})
129
+
133 130
         if 2 in selectedTools:
134 131
             #~ maskFileShp = maskFile.replace('tif','shp')
135
-            pkCMD = 'pkextract -f \'ESRI Shapefile\' -s {0} -i {1} -o /tmp/pk_out.shp -polygon --rule mean'.format(maskFile, inputFile)
136
-            selectedCommands.update({2:pkCMD})    
137
-        
138
-        if 6 in selectedTools:
139
-            sagaCMD = 'saga_cmd statistics_grid 5 -ZONES {0} -STATLIST {1} -OUTTAB /tmp/saga_out.txt'.format(maskFile, inputFile)
140
-            selectedCommands.update({6:sagaCMD})
141
-                     
142
-        
132
+            pkCMD = 'pkextract -f \'ESRI Shapefile\' -s "{0}" -i "{1}" -o /tmp/zonal_pkt_out.shp -polygon --rule mean'.format(maskFile, inputFile)
133
+            selectedCommands.update({2: pkCMD})
134
+
135
+        if 3 in selectedTools:
136
+            s = open('bash_grass_set_cmd.sh', 'w')
137
+            s.write('''resX=$(pkinfo -i "{0}" -ns | cut -d ' ' -f 2);resY=$(pkinfo -i "{0}" -nl | cut -d ' ' -f 2);xmin=$(pkinfo -i "{0}" -ulx | cut -d '=' -f 2);xmin_abs=${{xmin#-}};xmax=$(pkinfo -i "{0}" -lrx | cut -d '=' -f 2);ymax=$(pkinfo -i "{0}" -uly | cut -d '=' -f 2);ymin=$(pkinfo -i "{0}" -lry | cut -d '=' -f 2);ymin_abs=${{ymin#-}}
138
+INDIR="{1}"
139
+g.region n=$ymax e=$xmax s=$ymin w=$xmin rows=$resY cols=$resX
140
+r.in.gdal input={0} output=tmp_raster --overwrite 
141
+r.in.gdal input={2} output=tmp_zones --overwrite 
142
+r.univar -t map=tmp_raster zones=tmp_zones separator=comma output=/tmp/zonal_grass_out.csv --overwrite'''.format(inputFile, os.path.dirname(maskFile), maskTifName))
143
+
144
+            t = open('bash_grass_exec_cmd.sh', 'w')
145
+            t.write('''INDIR="{0}"
146
+epsg=$(pkinfo -i "{1}" -a_srs | tail -c -18 | cut -d '"' -f 4)
147
+grass72 -c epsg:$epsg "$INDIR"/grassdata/templocation -e
148
+export GRASS_BATCH_JOB="$INDIR/bash_grass_set_cmd.sh"
149
+export GRASS_VERBOSE=0
150
+grass72 "$INDIR"/grassdata/templocation/PERMANENT
151
+rm -rf "$INDIR"/grassdata/templocation'''.format(os.path.dirname(os.path.realpath(__file__)), inputFile))
152
+
153
+            grassCMD = '/bin/bash -c "{0}/bash_grass_exec_cmd.sh > /dev/null 2>&1"'.format(os.path.dirname(os.path.realpath(__file__)))
154
+            subprocess.call('chmod +x ./bash_grass_exec_cmd.sh', shell=True, stderr=subprocess.STDOUT)
155
+            subprocess.call('chmod +x ./bash_grass_set_cmd.sh', shell=True, stderr=subprocess.STDOUT)
156
+            selectedCommands.update({3: grassCMD})
157
+
158
+        if 4 in selectedTools:
159
+            maskFileTif = maskFile.replace('shp', 'tif')
160
+            sagaCMD = 'saga_cmd statistics_grid 5 -ZONES "{0}" -STATLIST "{1}" -OUTTAB /tmp/zonal_saga_out.txt'.format(maskFileTif, inputFile)
161
+            selectedCommands.update({4: sagaCMD})
162
+
143 163
         return selectedCommands
144
-        
145
-   
146
-        
147
-    def executeCommands(self, commands, repetitions): 
164
+
165
+    def executeCommands(self, commands, repetitions):
148 166
         '''
149 167
         Executes the command for the selected provider.
150 168
         '''
151 169
         timing = {}
152 170
         secondsAvg = []
153 171
         cpuAvg = []
154
-        
155
-        #~ '''/usr/bin/time -f "seconds: %e \ncpu-load: %P" /bin/bash -c 'resX=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -ns | cut -d '\'' '\'' -f 2);echo $resX; resY=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -nl | cut -d '\'' '\'' -f 2);echo $resY;xmin=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -ulx | cut -d '=' -f 2);echo $xmin;xmax=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -lrx | cut -d '=' -f 2);echo $xmax;ymax=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -uly | cut -d '=' -f 2);echo $ymax;ymin=$(pkinfo -i test_data/wc2.0_10m_tavg_07.tif -lry | cut -d '=' -f 2);echo $ymin;gdal_rasterize -ts $resX $resY -te $xmin $ymin $xmax $ymax -a_nodata 0 -ot Byte -burn 1 -l mask test_data/mask.shp test_data/out.tif'
156
-        #~ '''
172
+
173
+        # run the rasterization if neccessary
174
+        rastRuns = 0
175
+        for key in commands.items():
176
+            if ((key[0] == 1 or key[0] == 3 or key[0] == 4) and rastRuns == 0):
177
+                try:
178
+                    rasterize = subprocess.check_output('/bin/bash -c ./bash_rasterize.sh', shell=True, stderr=subprocess.STDOUT)
179
+                    rastRuns += 1
180
+                except OSError:
181
+                    print "Bash file for rasterization not found."
182
+                    sys.exit()
183
+
157 184
         for key, value in commands.items():
158 185
             reps = 0
159
-            for i in range(0,int(repetitions)): 
186
+            for i in range(0, int(repetitions)):
160 187
                 try:
161
-                    # check if the command should be run via .sh / bash script
162
-                    if commands[key] == 'bash_script':
163
-                        
164
-                        output = subprocess.check_output(
165
-                        '''/usr/bin/time -f "seconds: %e \ncpu-load: %P" /bin/bash -c ./bash_oft.sh | tail -2'''.format(value),
188
+                    output = subprocess.check_output(
189
+                        '/usr/bin/time -f "seconds: %e \ncpu-load: %P" {0} | tail -2'.format(value),
166 190
                         shell=True,
167 191
                         stderr=subprocess.STDOUT
168
-                        )
169
-                    else:
170
-                        print "nope?"                           
192
+                    )
193
+                    # play nice with programs which do not handle fast calls to well
194
+                    time.sleep(0.1)
171 195
                 except subprocess.CalledProcessError as grepexc:
172 196
                     print "error code", grepexc.returncode, grepexc.output
173
-                
197
+
174 198
                 # grep seconds and cpu-load from the string
175 199
                 seconds = float((re.split(":|\n", output[0:28].replace(" ", "")))[1])
176 200
                 cpuload = int(((re.split(":|\n", output[0:28].replace(" ", "")))[3]).replace("%", ""))
177
-                
201
+
178 202
                 # set output to 0 to prevent any 'misappropriation' of older outputs
179 203
                 output = "sec: 0 \n cpu:0"
180
-                
204
+
181 205
                 # increment reps and fill lists for timing & cpu load
182 206
                 secondsAvg.append(seconds)
183 207
                 cpuAvg.append(cpuload)
184 208
                 reps += 1
185
-                
186
-                
187
-            # update the dict with the average timing and cpuload, as well as a repetition counter    
188
-            timing.update({key:[round(sum(secondsAvg) / len(secondsAvg), 3), sum(cpuAvg) / len(cpuAvg), reps]})
189
-            
209
+
210
+            # update the dict with the average timing and cpuload, as well as a repetition counter
211
+            timing.update({key: [round(sum(secondsAvg) / len(secondsAvg), 3), sum(cpuAvg) / len(cpuAvg), reps]})
212
+
190 213
             # clear the lists for the next values
191 214
             secondsAvg[:] = []
192 215
             cpuAvg[:] = []
193
-                        
194
-        print timing 
195
-        sys.exit()
216
+
217
+        # print timing
218
+        # sys.exit()
196 219
 
197 220
         return timing
198
-        
199
-        
221
+
200 222
     def showOutput(self, output, selectedTools):
201 223
         '''
202 224
         Construct a pretty table to show a comparison of the programs
203 225
         on the command line.
204 226
         '''
205 227
         # merge the names of the selected tools with the numerical representation
206
-        # save as a new dict        
228
+        # save as a new dict
207 229
         dd = defaultdict(list)
208 230
         for d in (selectedTools, output):
209 231
             for key, value in d.iteritems():
210
-                dd[key].extend([value])  
232
+                dd[key].extend([value])
211 233
 
212
-        print dd
213 234
         out = ''
214
-        
235
+
215 236
         out += "All selected tools ran successfully.\n\n"
216 237
         # print with underscore
217
-        out += "\033[4m{:<8} {:<17} {:<10} {:<15}\033[0m\n".format('ID','Tool','Time', 'CPU_load')
218
-        
238
+        out += "\033[4m{:<8} {:<17} {:<15} {:<15} {:<15}\033[0m\n".format('ID', 'Tool', 'seconds (avg)', 'CPU_load (avg)', 'No. of runs')
239
+
219 240
         for k, v in dd.iteritems():
220
-            out += "{:<8} {:<17} {:<10} {:<10}\n".format(k, dd[k][0], dd[k][1][0], dd[k][1][1])          
221
-        
241
+            out += "{:<8} {:<17} {:<15} {:<15} {:<15}\n".format(k, dd[k][0], dd[k][1][0], dd[k][1][1], dd[k][1][2])
242
+
222 243
         print out
223
-        #return out
224
-    
244
+        # return out
245
+
225 246
     def showUsage(self):
226
-        #print zonalBenchmark.__init__.__doc__
247
+        # print zonalBenchmark.__init__.__doc__
227 248
         print u'''Usage:
228
-        python zonalStatBenchmark [tools] [input raster] [input mask / shape]
249
+        python zonalStatBenchmark [tools] [input raster] [input mask / shape] [number of runs]
229 250
         
230
-        [tools]                expects a chain of numbers, seperated by a hyphen; 
231
-                               e.g. 1-2-6 for selecting oft-stat, pktools and SAGA
232
-        [input raster]         the raster file for input
233
-        [input mask / shape]   the raster or shape mask; be aware: some 
234
-                               tools allow only raster input, some only 
235
-                               vector input - so save a vector file and 
236
-                               its rasterization with the same name in 
237
-                               the same dir
251
+        [tools]                 expects a chain of numbers, seperated by a hyphen; 
252
+                                e.g. 1-2-3 for selecting oft-stat, pktools and GRASS
253
+                               
254
+        [input raster]          the raster file for input
255
+        
256
+        [input mask / shape]    the mask shape; expects a vector file which will be
257
+                                automatically rasterized if the selected toos require it
258
+                               
259
+        [number of runs]        the number of runs / repetitions per tool, e.g. "5" means
260
+                                every tool will run 5 times and the average of the runs will
261
+                                be calculated
262
+        
263
+        All arguments are mandatory.
238 264
         
239 265
         The following tools are available:
240 266
         
241 267
         1. oft-stat
242 268
         2. pktools
243
-        3̶.̶ ̶P̶y̶t̶h̶o̶n̶/̶G̶D̶A̶L̶ ̶(̶p̶e̶r̶r̶y̶g̶e̶o̶ ̶-̶ ̶g̶i̶s̶t̶)̶
244
-        4̶.̶ ̶G̶R̶A̶S̶S̶
245
-        5̶.̶ ̶R̶
246
-        6. SAGA
269
+        3. GRASS
270
+        4. SAGA
247 271
         '''
248
-        
272
+
249 273
 # userArgs[0] = selected providers
250 274
 # userArgs[1] = input TIF
251 275
 # userArgs[2] = masking shape
@@ -261,7 +285,3 @@ commands = zonalBenchmark().setCommands(userArgs[0], userArgs[1], userArgs[2])
261 285
 timeOutput = zonalBenchmark().executeCommands(commands, userArgs[3])
262 286
 
263 287
 zonalBenchmark().showOutput(timeOutput, userArgs[0])
264
-
265
-
266
-
267
-