climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From huiky...@apache.org
Subject [1/3] climate git commit: CLIMATE-732 - Update data_processor.temporal_rebin
Date Sat, 30 Jan 2016 00:45:00 GMT
Repository: climate
Updated Branches:
  refs/heads/master 078316308 -> 947478539


CLIMATE-732 - Update data_processor.temporal_rebin

- efficiency of dataset_processor.temporal_rebin has been significantly improved
- the option for temporal resolution needs to be given as characters.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/3fec4826
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/3fec4826
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/3fec4826

Branch: refs/heads/master
Commit: 3fec482641694a4917652e4bd74b6d88a872cd96
Parents: 64d3668
Author: huikyole <huikyole@argo.jpl.nasa.gov>
Authored: Fri Jan 29 15:43:32 2016 -0800
Committer: huikyole <huikyole@argo.jpl.nasa.gov>
Committed: Fri Jan 29 15:43:32 2016 -0800

----------------------------------------------------------------------
 ocw/dataset_processor.py | 83 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/3fec4826/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 9095883..40f6cc8 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -104,33 +104,22 @@ def temporal_subset(month_start, month_end, target_dataset, average_each_year=Fa
     
     return new_dataset
 
-def temporal_rebin(target_dataset, temporal_resolution):     
+def temporal_rebin(target_dataset, time_unit):     
     """ Rebin a Dataset to a new temporal resolution
     
     :param target_dataset: Dataset object that needs temporal rebinned
     :type target_dataset: :class:`dataset.Dataset`
 
-    :param temporal_resolution: The new temporal bin size
-    :type temporal_resolution: :class:`datetime.timedelta`
+    :param temporal_resolution: The new temporal resolution
+    :type temporal_resolution: :mod:`string`               
     
     :returns: A new temporally rebinned Dataset
     :rtype: :class:`dataset.Dataset`
     """
     # Decode the temporal resolution into a string format that 
     # _rcmes_calc_average_on_new_time_unit_K() can understand
-    day_count = temporal_resolution.days
-    time_unit = None
-    if day_count == 1:
-        time_unit = 'daily'
-    elif day_count > 1 and day_count <= 31:
-        time_unit = 'monthly'
-    elif day_count > 31 and day_count <= 366:
-        time_unit = 'annual'
-    else:
-        time_unit = 'full'
 
-    masked_values = target_dataset.values.view(ma.MaskedArray)
-    binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K(masked_values, target_dataset.times,
time_unit)
+    binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit(target_dataset.values,
target_dataset.times, time_unit)
     binned_dates = np.array(binned_dates)
     new_dataset = ds.Dataset(target_dataset.lats, 
                              target_dataset.lons, 
@@ -886,6 +875,70 @@ def _rcmes_create_mask_using_threshold(masked_array, threshold=0.5):
 
     return mymask
 
+def _rcmes_calc_average_on_new_time_unit(data, dates, unit):
+    """ Rebin 3d array and list of dates using the provided unit parameter
+
+    :param data: Input data that needs to be averaged
+    :type data: 3D masked numpy array of shape (times, lats, lons)
+    :param dates: List of dates that correspond to the given data values
+    :type dates: Python datetime objects
+    :param unit: Time unit to average the data into
+    :type unit: String matching one of these values : full | annual | monthly | daily
+
+    :returns: meanstorem, newTimesList
+    :rtype: 3D numpy masked array the same shape as the input array, list of python datetime
objects
+    """
+
+    # Check if the user-selected temporal grid is valid. If not, EXIT
+    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')
+    if not acceptable:
+        print 'Error: unknown unit type selected for time averaging: EXIT'
+        return -1,-1,-1,-1
+
+    nt, ny, nx = data.shape
+    if unit == 'full':
+        new_data = ma.mean(data, axis=0)
+        new_date = [dates[size(dates)/2]]
+    if unit == 'annual':
+        years = [d.year for d in target_dataset.times]
+        years_sorted = np.unique(years)
+        new_data = ma.zeros([years_sorted.size, ny, nx])
+        it = 0
+        new_date = []
+        for year in years_sorted:
+            index = np.where(years == year)[0]
+            new_data[it,:] = ma.mean(data[index,:], axis=0)
+            new_date.append(datetime.datetime(year=year, month=7, day=2))
+            it = it+1
+    if unit == 'monthly':
+        years = [d.year for d in target_dataset.times]
+        years_sorted = np.unique(years)
+        months = [d.month for d in target_dataset.times]
+        months_sorted = np.unique(months)
+        
+        new_data = ma.zeros([years_sorted.size*months_sorted.size, ny, nx])
+        it = 0
+        new_date = []
+        for year in years_sorted:
+            for month in months_sorted:
+                index = np.where((years == year) & (months == month))[0]
+                new_data[it,:] = ma.mean(data[index,:], axis=0)
+                new_date.append(datetime.datetime(year=year, month=month, day=15))
+                it = it+1   
+    if unit == 'daily':
+        dates = [d.year*10000.+d.month*100.+d.day for d in target_dataset.times] 
+        dates_sorted = np.unique(dates)
+
+        new_data = ma.zeros([dates_sorted.size, ny, nx])
+        it = 0
+        new_date = []
+        for date in dates_sorted:
+            index = np.where(dates == date)[0]
+            new_data[it,:] = ma.mean(data[index,:], axis=0)
+            new_date.append(datetime.datetime(year=date/10000, month=(date % 10000)/100,
day=date % 100))
+            it = it+1
+        
+    return new_data, np.array(new_date)
 
 def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
     """ Rebin 3d array and list of dates using the provided unit parameter


Mime
View raw message