Better support for the Reanalysis work

RENCI · Feb 13, 2021 · f89ab96 · f89ab96
1 parent db10c3f
commit f89ab96
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 78 deletions.
diff --git a/TideSIMULATION/test_diffs.py b/TideSIMULATION/test_diffs.py
@@ -265,8 +265,8 @@ def makeLowpassHist(start, end, lowpassAllstations, filterOrder='', metadata=['l
 
 # Get HOUR  data. 3 stations are assembled in the obs.yml
 
-#yamlname=os.path.join(os.path.dirname(__file__), '../config', 'obs.yml')
-yamlname='~/ADCIRCSupportTools/TideSIMULATION/obs.yml'
+yamlname=os.path.join(os.path.dirname(__file__), '../config', 'obs.yml')
+#yamlname='~/ADCIRCSupportTools/TideSIMULATION/obs.yml'
 
 rpl = GetObsStations(product='hourly_height', rootdir=rootdir, yamlname=yamlname, metadata=iometadata)
 stations = rpl.stationListFromYaml()
@@ -320,52 +320,52 @@ def makeLowpassHist(start, end, lowpassAllstations, filterOrder='', metadata=['l
 # Do we need to carry colors ?
 # Create a station df with columns of cutoffs
 
-filterOrder=10
-lowpassAllstations=dict()
-for station in intersectedStations:
-    print('Process station {}'.format(station))
-    stationName = df_stationData.loc[int(station)]['stationname']
-    lowpassdata = dict() # Carry all stations in the order processed buty first add the OBS and detided
-    lowpassdata['OBS']=df_hourlyOBS[station] # Data to interpret
-    lowpassdata['DETIDE']=df_diff[station] # Actual detided data set
-    df_lowpass=pd.DataFrame()
-    for cutoffflank,cutoff in zip(cutoffs,hourly_cutoffs):
-        print('Process cutoff {} for station {}'.format(cutoff,station))
-        df_temp = df_hourlyOBS[station].dropna()
-        df_lowpass[str(cutoff)]=butter_lowpass_filter(df_temp,filterOrder=10, numHours=cutoffflank)
-    df_lowpass.index = df_temp.index
-    lowpassdata['LP']=df_lowpass
-    lowpassAllstations[station]=lowpassdata
-    lowpassAllstations['station']=station
-    lowpassAllstations['stationName']=stationName
-    # For each station plot. OBS,explicit detided, cutoffs
-    makeLowpassPlot(plot_timein, plot_timeout, lowpassAllstations, filterOrder=filterOrder)
-    makeLowpassHist(plot_timein, plot_timeout, lowpassAllstations, filterOrder=filterOrder)
+#filterOrder=10
+#lowpassAllstations=dict()
+#for station in intersectedStations:
+#    print('Process station {}'.format(station))
+#    stationName = df_stationData.loc[int(station)]['stationname']
+#    lowpassdata = dict() # Carry all stations in the order processed buty first add the OBS and detided
+#    lowpassdata['OBS']=df_hourlyOBS[station] # Data to interpret
+#    lowpassdata['DETIDE']=df_diff[station] # Actual detided data set
+#    df_lowpass=pd.DataFrame()
+#    for cutoffflank,cutoff in zip(cutoffs,hourly_cutoffs):
+#        print('Process cutoff {} for station {}'.format(cutoff,station))
+#        df_temp = df_hourlyOBS[station] #.dropna()
+#        df_lowpass[str(cutoff)]=butter_lowpass_filter(df_temp,filterOrder=10, numHours=cutoffflank)
+#    df_lowpass.index = df_temp.index
+#    lowpassdata['LP']=df_lowpass
+#    lowpassAllstations[station]=lowpassdata
+#    lowpassAllstations['station']=station
+#    lowpassAllstations['stationName']=stationName
+#    # For each station plot. OBS,explicit detided, cutoffs
+#    makeLowpassPlot(plot_timein, plot_timeout, lowpassAllstations, filterOrder=filterOrder)
+#    makeLowpassHist(plot_timein, plot_timeout, lowpassAllstations, filterOrder=filterOrder)
 
 print('Start filterOrder sweep')
 filterOrders=[1,2,3,4,5,6,50,7,8,9,10]
 
-lowpassFOAllstations=dict()
-cutoff=24+4 # 24 hours plus a 4 hour flank
-hourly_cutoff=24
-for station in intersectedStations:
-    print('Process station {}'.format(station))
-    stationName = df_stationData.loc[int(station)]['stationname']
-    lowpassFOdata = dict() # Carry all stations in the order processed buty first add the OBS and detided
-    lowpassFOdata['OBS']=df_hourlyOBS[station] # Data to interpret
-    lowpassFOdata['DETIDE']=df_diff[station] # Actual detided data set
-    df_lowpass=pd.DataFrame()
-    for filterOrder in filterOrders:
-        print('Process cutoff {} for station {} filterOrder {}'.format(cutoff,station,filterOrder))
-        df_temp = df_hourlyOBS[station].dropna()
-        df_lowpass[str(filterOrder)]=butter_lowpass_filter(df_temp,filterOrder=filterOrder, numHours=cutoff)
-    df_lowpass.index = df_temp.index
-    lowpassFOdata['LP']=df_lowpass
-    lowpassFOAllstations[station]=lowpassFOdata
-    lowpassFOAllstations['station']=station
-    lowpassFOAllstations['stationName']=stationName
-    # For each station plot. OBS,explicit detided, cutoffs
-    makeLowpassFOPlot(plot_timein, plot_timeout, hourly_cutoff, lowpassFOAllstations)
+#lowpassFOAllstations=dict()
+#cutoff=24+4 # 24 hours plus a 4 hour flank
+#hourly_cutoff=24
+#for station in intersectedStations:
+#    print('Process station {}'.format(station))
+#    stationName = df_stationData.loc[int(station)]['stationname']
+#    lowpassFOdata = dict() # Carry all stations in the order processed buty first add the OBS and detided
+#    lowpassFOdata['OBS']=df_hourlyOBS[station] # Data to interpret
+#    lowpassFOdata['DETIDE']=df_diff[station] # Actual detided data set
+#    df_lowpass=pd.DataFrame()
+#    for filterOrder in filterOrders:
+#        print('Process cutoff {} for station {} filterOrder {}'.format(cutoff,station,filterOrder))
+#        df_temp = df_hourlyOBS[station].dropna()
+#        df_lowpass[str(filterOrder)]=butter_lowpass_filter(df_temp,filterOrder=filterOrder, numHours=cutoff)
+#    df_lowpass.index = df_temp.index
+#    lowpassFOdata['LP']=df_lowpass
+#    lowpassFOAllstations[station]=lowpassFOdata
+#    lowpassFOAllstations['station']=station
+#    lowpassFOAllstations['stationName']=stationName
+#    # For each station plot. OBS,explicit detided, cutoffs
+#    makeLowpassFOPlot(plot_timein, plot_timeout, hourly_cutoff, lowpassFOAllstations)
 
 #
 # Try the FFT lowpass
@@ -375,8 +375,8 @@ def makeLowpassHist(start, end, lowpassAllstations, filterOrder='', metadata=['l
 hourly_cutoffs=[12,24,48,168]
 cutoffs = [x + upshift for x in hourly_cutoffs]
 
-hourly_cutoffs=[12]
-cutoffs=[12]
+#hourly_cutoffs=[12]
+#cutoffs=[12]
 
 fftAllstations=dict()
 for station in intersectedStations:

diff --git a/reanalysis/iterateKriging.py b/reanalysis/iterateKriging.py
@@ -9,7 +9,7 @@
 from utilities.utilities import utilities
 
 def main(args):
-    print('Process the 52 separate reanalysis error files')
+    print('Process the separate reanalysis error files')
     utilities.log.info('Start the iterative interpolation pipeline')
     ERRDIR=args.errordir
     CLAMPFILE=args.clampfile
@@ -26,23 +26,29 @@ def main(args):
     errfileJson=ERRDIR+'/runProps.json'
     with open(errfileJson, 'r') as fp:
         try:
-            weeklyFiles = json.load(fp)
+            listedFiles = json.load(fp)
         except OSError:
             utilities.log.error("Could not open/read file {}".format(errfileJson))
             sys.exit()
     print('Begin the iteration')
-    for key, value in weeklyFiles.items():
+
+    import random
+    #for key, value in dict(random.sample(listedFiles.items(),20)).items(): # listedFiles.items():
+    for key, value in listedFiles.items():
         print(key)
         print(value)
         ERRFILE=value
         #METADATA='_'+key  # Allows adjusting names of output files to include per-week
-        print('Start week {}'.format(key))
+        print('Start {}'.format(key))
         utilities.log.info('ERRFILE {}'.format(ERRFILE))
         utilities.log.info('CLAMPFILE {}'.format(CLAMPFILE))
         utilities.log.info('ADCJSON {}'.format(ADCJSON))
         utilities.log.info('YAMLNAME {}'.format(YAMLNAME))
         utilities.log.info('ROOTDIR {}'.format(ROOTDIR))
-        os.system('python krigListOfErrorSets.py --cv_kriging  --outroot '+ROOTDIR+' --yamlname '+YAMLNAME+'  --errorfile '+ERRFILE+' --clampfile '+CLAMPFILE+' --gridjsonfile '+ADCJSON)
+        if args.daily:
+            os.system('python krigListOfErrorSets.py --daily  --outroot '+ROOTDIR+' --yamlname '+YAMLNAME+'  --errorfile '+ERRFILE+' --clampfile '+CLAMPFILE+' --gridjsonfile '+ADCJSON)
+        else:
+            os.system('python krigListOfErrorSets.py --outroot '+ROOTDIR+' --yamlname '+YAMLNAME+'  --errorfile '+ERRFILE+' --clampfile '+CLAMPFILE+' --gridjsonfile '+ADCJSON)
     print('Completed ensemble')
 
 if __name__ == '__main__':
@@ -55,6 +61,8 @@ def main(args):
     parser.add_argument('--cv_kriging', action='store_true', dest='cv_kriging',
                         help='Boolean: Invoke a CV procedure prior to fitting kriging model')
     parser.add_argument('--yamlname', action='store', dest='yamlname', default=None)
+    parser.add_argument('--daily', action='store_true', dest='daily',
+                        help='Boolean: specify DAILY to the krig method')
     parser.add_argument('--outroot', action='store', dest='outroot', default=None,
                         help='Available high level output dir directory')
     args = parser.parse_args()

diff --git a/reanalysis/krigListOfErrorSets.py b/reanalysis/krigListOfErrorSets.py
@@ -37,14 +37,25 @@ def genSinglePlot(i, fig, df_data,vmin,vmax,inputMetadata):
     ax.get_yaxis().set_visible(False)
     plt.axis('tight')
 
-def parseDateFilename(infilename):
+def parseWeeklyDateFilename(infilename):
     """
     filename must be of the form stationSummaryAves_01_201801010000_201801070000.csv
     """
+    utilities.log.info('Using WEEKLY form of filenames')
     words=(infilename.split('.')[-2]).split('_') 
     metadata = '_'+words[-3]+'_'+words[-1]+'_'+words[-2]
     return metadata
 
+def parseDateFilename(infilename):
+    """
+    filename must be of the form stationSummaryAves_18-332_2018112800.csv
+    """
+    utilities.log.info('Using DAILY form of filenames')
+    words=(infilename.split('.')[-2]).split('_')
+    metadata = '_'+words[-2]+'_'+words[-1]
+    return metadata
+
+
 def main(args):
     utilities.log.info(args)
 
@@ -82,7 +93,10 @@ def main(args):
     iometadata=args.iometadata
     inerrorfile = args.errorfile
 
-    iometadata =  parseDateFilename(inerrorfile) # This will be used to update all output files
+    if args.daily:
+        iometadata =  parseDateFilename(inerrorfile) # This will be used to update all output files
+    else:
+        iometadata =  parseWeeklyDateFilename(inerrorfile) # This will be used to update all output files
 
     # Fetch clamping nodes to act as boundary for kriging
     # clampfile = os.path.join(os.path.dirname(__file__), "../config", config['DEFAULT']['ClampList'])
@@ -133,9 +147,12 @@ def main(args):
         param_dict, vparams, best_score, full_scores = krig_object.optimize_kriging(krig_object) # , param_dict_list, vparams_dict_list)
         utilities.log.info('Kriging best score is {}'.format(best_score))
         print('List of all scores {}'.format(full_scores))
-        fullScoreDict = {'best_score':best_score,'scores': full_scores, 'params':param_dict,'vparams':vparams}
+        #fullScoreDict = {'best_score':best_score,'scores': full_scores, 'params':param_dict,'vparams':vparams}
+        fullScoreDict = {'best_score':best_score,'params':param_dict,'vparams':vparams}
         ##jsonfilename = '_'.join(['','fullScores.json']) 
         jsonfilename = 'fullCVScores.json'
+        utilities.log.info('Partial CV score {}'.format(fullScoreDict))
+        print('Partial CV score {}'.format(fullScoreDict))
         with open(jsonfilename, 'w') as fp:
             json.dump(fullScoreDict, fp)
 
@@ -251,5 +268,7 @@ def main(args):
                         help='Boolean: Display histograms station only, vis grid errors, and adcirc nodes')
     parser.add_argument('--outroot', action='store', dest='outroot', default=None,
                         help='Available high level output dir directory')
+    parser.add_argument('--daily', action='store_true', dest='daily',
+                        help='Boolean: Choose the DAILY filename nomenclature')
     args = parser.parse_args()
     sys.exit(main(args))
diff --git a/reanalysis/runReanalysisDaily.sh b/reanalysis/runReanalysisDaily.sh
@@ -24,28 +24,13 @@ export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/DAILY
 python dailyLowpassSampledError.py --inDir $INDIR --outroot $OUTROOT
 mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-weekly
 
-
-# Interpolate a single specific file
-#export ERRFILE=$OUTROOT/errorfield/stationSummaryAves_18-332_2018112800.csv
-#export ADCJSON=$INDIR/adc_coord.json
-#export CLAMPFILE=$CODEBASE/config/clamp_list_hsofs.dat
-#export YAMLNAME=$CODEBASE/config/int.REANALYSIS.yml
-#export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/DAILY
-
 # Interpolate a single specific file
-export ADCJSON=$INDIR/adc_coord.json
-export CLAMPFILE=$CODEBASE/config/clamp_list_hsofs.dat
-export YAMLNAME=$CODEBASE/config/int.REANALYSIS.yml
-export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/DAILY
-export ERRDIR=$OUTROOT/errorfield
-
 export ERRFILE=$OUTROOT/errorfield/stationSummaryAves_18-332_2018112800.csv
 export ADCJSON=$INDIR/adc_coord.json
 export CLAMPFILE=$CODEBASE/config/clamp_list_hsofs.dat
 export YAMLNAME=$CODEBASE/config/int.REANALYSIS.yml
 export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/DAILY
 
-#python krigListOfErrorSets.py  --outroot $OUTROOT --yamlname $YAMLNAME --errorfile $ERRFILE --clampfile $CLAMPFILE --gridjsonfile $ADCJSON
 python krigListOfErrorSets.py  --daily --outroot $OUTROOT --yamlname $YAMLNAME --errorfile $ERRFILE --clampfile $CLAMPFILE --gridjsonfile $ADCJSON
 
 mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-interpolate
diff --git a/reanalysis/runReanalysisWeekly_allUpdate.sh b/reanalysis/runReanalysisWeekly_allUpdate.sh
@@ -20,8 +20,8 @@ export BASEDIREXTRA=TESTFULL/STATE/YEARLY-$YEAR/KRIG_LONGRANGE
 # Store files in $RUNTIMEDIR/WEEKLY/errorfield
 export INDIR=$RUNTIMEDIR/$BASEDIREXTRA
 export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/WEEKLY
-#python weeklyLowpassSampledError.py --inyear 2018 --inDir $INDIR --outroot $OUTROOT
-#mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-weekly
+python weeklyLowpassSampledError.py --inyear 2018 --inDir $INDIR --outroot $OUTROOT
+mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-weekly
 
 
 # Interpolate a single specific file
@@ -32,4 +32,4 @@ export YAMLNAME=$CODEBASE/config/int.REANALYSIS.yml
 export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/WEEKLY
 export ERRDIR=$OUTROOT/errorfield
 python  iterateKriging.py --outroot $OUTROOT --yamlname $YAMLNAME --errordir $ERRDIR --clampfile $CLAMPFILE --gridjsonfile $ADCJSON
-mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-interpolate
+#mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-interpolate
diff --git a/reanalysis/runReanalysisWeekly_singleUpdate.sh b/reanalysis/runReanalysisWeekly_singleUpdate.sh
@@ -9,14 +9,14 @@
 export YEAR=2018
 
 
-export CODEBASE=/home/jtilson/ADCIRCSupportTools
+export CODEBASE=/projects/sequence_analysis/vol1/prediction_work/CausalInference/CausalNetworking_forKirk/TEST/ADCIRCSupportTools 
 export PYTHONPATH=$CODEBASE:$PYTHONPATH
 export RUNTIMEDIR=.
-export BASEDIREXTRA=TESTFULLSTATE/YEARLY-$YEAR
+export BASEDIREXTRA=TESTFULL/STATE/YEARLY-$YEAR/KRIG_LONGRANGE
 
 # Build the yearly error file store in $RUNTIMEDIR/BASEDIREXTRA
-python yearlyReanalysis.py --iosubdir $BASEDIREXTRA --urljson reanalysis.json
-mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-yearly
+#python yearlyReanalysis.py --iosubdir $BASEDIREXTRA --urljson reanalysis.json
+#mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-yearly
 
 # Store files in $RUNTIMEDIR/WEEKLY/errorfield
 export INDIR=$RUNTIMEDIR/$BASEDIREXTRA
@@ -31,5 +31,5 @@ export ADCJSON=$INDIR/adc_coord.json
 export CLAMPFILE=$CODEBASE/config/clamp_list_hsofs.dat
 export YAMLNAME=$CODEBASE/config/int.REANALYSIS.yml
 export OUTROOT=$RUNTIMEDIR/$BASEDIREXTRA/WEEKLY
-python krigListOfErrorSets.py  --outroot $OUTROOT --yamlname $YAMLNAME --errorfile $ERRFILE --clampfile $CLAMPFILE --gridjsonfile $ADCJSON
-mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-interpolate
+python krigListOfErrorSets.py  --cv_kriging  --outroot $OUTROOT --yamlname $YAMLNAME --errorfile $ERRFILE --clampfile $CLAMPFILE --gridjsonfile $ADCJSON
+mv $RUNTIMEDIR/log $RUNTIMEDIR/$BASEDIREXTRA/log-CV-interpolate