climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jo...@apache.org
Subject [43/51] [abbrv] [partial] Adding Jinwon's custom RCMET
Date Fri, 09 May 2014 02:03:52 GMT
http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/storage/rcmed.py
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/storage/rcmed.py b/src/main/python/rcmes/storage/rcmed.py
new file mode 100755
index 0000000..39a235c
--- /dev/null
+++ b/src/main/python/rcmes/storage/rcmed.py
@@ -0,0 +1,349 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+'''
+Classes:
+    RCMED - A class for retrieving data from Regional Climate Model Evalutaion Database (JPL).
+'''
+
+import urllib, urllib2
+import re
+import json
+import numpy as np
+import numpy.ma as ma
+from datetime import datetime
+import calendar
+#from ocw.dataset import Dataset
+
+
+URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?'
+
+
+def get_parameters_metadata():
+    '''Get the metadata of all parameter from RCMED.
+
+    :returns: Dictionary of information for each parameter stored in one list
+    :rtype: List of dictionaries
+    '''
+
+    param_info_list = []
+    url = URL + "&param_info=yes"
+    string = urllib2.urlopen(url)
+    data_string = string.read()
+    json_format_data = json.loads(data_string)
+    fields_name = json_format_data['fields_name']
+    data = json_format_data['data']
+    for row in data:
+        dic = {}
+        for name in fields_name:
+            dic[name] = row[fields_name.index(name)]
+        param_info_list.append(dic)
+
+    return param_info_list
+
+
+def _make_mask_array(values, parameter_id, parameters_metadata):  
+    '''Created masked array to deal with missing values
+
+    :param values: Numpy array of values which may contain missing values
+    :type values: Numpy array
+    :param parameter_id: Parameter's id
+    :type parameter_id: Integer
+    :param parameters_metadata: Metadata for all parameters
+    :type parameters_metadata: List of dictionaries
+
+    :returns: Masked array of values
+    :rtype: Masked array
+    '''
+
+    for each in parameters_metadata:
+        if each['parameter_id'].encode() == str(parameter_id):
+            missing_values = each['missingdataflag'].encode()
+            break
+    # Need to encode the string to proper dtype so the mask is applied
+    if 'float' in str(values.dtype):
+        missing_values = float(missing_values)
+    if 'int' in str(values.dtype):
+        missing_values = int(missing_values)
+
+    values = ma.masked_array(values, mask=(values == missing_values))
+
+    return values
+
+
+def _reshape_values(values, unique_values):
+    '''Reshape values into 4D array.
+
+    :param values: Raw values data
+    :type values: numpy array
+    :param unique_values: Tuple of unique latitudes, longitudes and times data.
+    :type unique_values: Tuple 
+    
+    :returns: Reshaped values data
+    :rtype: Numpy array
+    '''
+
+    lats_len = len(unique_values[0])
+    lons_len = len(unique_values[1])
+    times_len = len(unique_values[2])
+
+    values = values.reshape(times_len, lats_len, lons_len)
+
+    return values
+
+
+def _calculate_time(unique_times, time_step):
+    '''Convert each time to the datetime object.
+
+    :param unique_times: Unique time data
+    :type unique_times: String
+    :param time_step: Time step
+    :type time_step: String
+
+    :returns: Unique datetime objects of time data
+    :rtype: Numpy array
+    '''
+
+    time_format = "%Y-%m-%d %H:%M:%S"
+    unique_times = np.array([datetime.strptime(time, time_format) for time in unique_times])
+    #There is no need to sort time.
+    #This function may required still in RCMES
+    #unique_times.sort()
+    #This function should be moved to the data_process.
+
+    return unique_times
+
+
+def _make_unique(lats, lons, times):
+    '''Find the unique values of input data.
+
+    :param lats: lats
+    :type lats: Numpy array
+    :param lons: lons
+    :type lons: Numpy array
+    :param times: times
+    :type times: Numpy array
+
+    :returns: Unique numpy arrays of latitudes, longitudes and times
+    :rtype: Tuple
+    '''
+
+    unique_lats = np.unique(lats)
+    unique_lons = np.unique(lons)
+    unique_times = np.unique(times)
+
+    return (unique_lats, unique_lons, unique_times)
+
+
+def _get_data(url):
+    '''Reterive data from database.
+
+    :param url: url to query from database
+    :type url: String
+
+    :returns: Latitudes, longitudes, times and values data
+    :rtype: (Numpy array, Numpy array, Numpy array, Numpy array)
+    '''
+
+    string = urllib2.urlopen(url)
+    data_string = string.read()    
+    index_of_data = re.search('data: \r\n', data_string)
+    data = data_string[index_of_data.end():len(data_string)]
+    data = data.split('\r\n') 
+
+    lats = []
+    lons = []
+    #levels = []
+    values = []
+    times = []
+
+    for i in range(len(data) - 1):  # Because the last row is empty, "len(data)-1" is used.
+        row = data[i].split(',')
+        lats.append(np.float32(row[0]))
+        lons.append(np.float32(row[1]))
+        # Level is not currently supported in Dataset class.
+        #levels.append(np.float32(row[2]))
+        times.append(row[3])
+        values.append(np.float32(row[4]))
+    
+    lats = np.array(lats)
+    lons = np.array(lons)
+    times = np.array(times)
+    values = np.array(values)
+
+    return lats, lons, times, values
+
+
+def _beginning_of_date(time, time_step):
+    '''Calculate the beginning of given time, based on time step.
+
+    :param time: Given time
+    :type time: Datetime
+    :param time_step: Time step (monthly or daily)
+    :type time_step: String
+
+    :returns: Beginning of given time
+    :rtype: Datetime
+    '''
+
+    if time_step.lower() == 'monthly':
+        if time.day != 1:
+            start_time_string = time.strftime('%Y%m%d')
+            start_time_string = start_time_string[:6] + '01'
+            time = datetime.strptime(start_time_string, '%Y%m%d')
+            ##TODO: Change the 3 lines above with this line:
+            ##time = datetime(time.year, time.month, 1)
+    elif time_step.lower() == 'daily':
+        if time.hour != 0 or time.minute != 0 or time.second != 0:
+            start_time_string = time.strftime('%Y%m%d%H%M%S')
+            start_time_string = start_time_string[:8] + '000000'
+            time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S')
+            ##TODO: Change the 3 lines above with this line:
+            ##time = datetime(time.year, time.month, time.day, 00, 00, 00)
+
+    return time
+
+
+def _end_of_date(time, time_step):
+    '''Calculate the end of given time, based on time step.
+
+    :param time: Given time
+    :type time: Datetime
+    :param time_step: Time step (monthly or daily)
+    :type time_step: String
+
+    :returns: End of given time
+    :rtype: Datetime
+    '''
+
+    last_day_of_month = calendar.monthrange(time.year, time.month)[1]
+    if time.day != last_day_of_month:
+        end_time_string = time.strftime('%Y%m%d')
+        end_time_string = end_time_string[:6] + str(last_day_of_month)
+        time = datetime.strptime(end_time_string, '%Y%m%d')
+        ##TODO: Change the 3 lines above with this line:
+        ##time = datetime(time.year, time.month, lastDayOfMonth)
+    elif time_step.lower() == 'daily':
+        end_time_string = time.strftime('%Y%m%d%H%M%S')
+        end_time_string = end_time_string[:8] + '235959'
+        time = datetime.strptime(end_time_string, '%Y%m%d%H%M%S')
+        ##TODO: Change the 3 lines above with this line:
+        ##time = datetime(time.year, time.month, end_time.day, 23, 59, 59)
+
+    return time
+
+
+def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time,
end_time, time_step):
+    '''Generate the url to query from database
+
+    :param dataset_id: Dataset id.
+    :type dataset_id: Integer
+    :param parameter_id: Parameter id
+    :type parameter_id: Integer
+    :param min_lat: Minimum latitude
+    :type min_lat: Float
+    :param max_lat: Maximum latitude
+    :type max_lat: Float
+    :param min_lon: Minimum longitude
+    :type min_lon: Float
+    :param max_lon: Maximum longitude
+    :type max_lon: Float
+    :param start_time: Start time
+    :type start_time: Datetime
+    :param end_time: End time 
+    :type end_time: Datetime
+    :param time_step: Time step
+    :type time_step: String
+
+    :returns: url to query from database
+    :rtype: String
+    '''
+
+    start_time = _beginning_of_date(start_time, time_step)
+    end_time = _end_of_date(end_time, time_step)
+    start_time = start_time.strftime("%Y%m%dT%H%MZ")
+    end_time = end_time.strftime("%Y%m%dT%H%MZ")
+
+    query = [('datasetId',dataset_id), ('parameterId',parameter_id), ('latMin',min_lat),
('latMax',max_lat),
+             ('lonMin', min_lon), ('lonMax',max_lon), ('timeStart', start_time), ('timeEnd',
end_time)]
+
+    query_url = urllib.urlencode(query)
+    url_request = URL + query_url
+    
+    return url_request
+
+
+def _get_parameter_info(parameters_metadata, parameter_id):
+    '''General information for given parameter id.
+
+    :param parameters_metadata: Metadata for all parameters
+    :type parameters_metadata: List of dictionaries
+    :param parameter_id: Parameter id
+    :type parameter_id: Integer
+
+    :returns: Database name, time step, realm, instrument, start_date, end_date and unit
for given parameter
+    :rtype: (string, string, string, string, string, string, string)
+    '''
+
+    for dic in parameters_metadata:
+        if int(dic['parameter_id']) == parameter_id:
+            database = dic['database']
+            time_step = dic['timestep']
+            realm = dic['realm']
+            instrument = dic['instrument']
+            start_date = dic['start_date']
+            end_date = dic['end_date']
+            unit = dic['units']
+
+    return (database, time_step, realm, instrument, start_date, end_date, unit)
+
+
+def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time,
end_time):
+    '''Get data from one database(parameter).
+
+    :param dataset_id: Dataset id.
+    :type dataset_id: Integer
+    :param parameter_id: Parameter id
+    :type parameter_id: Integer
+    :param min_lat: Minimum latitude
+    :type min_lat: Float
+    :param max_lat: Maximum latitude
+    :type max_lat: Float
+    :param min_lon: Minimum longitude
+    :type min_lon: Float
+    :param max_lon: Maximum longitude
+    :type max_lon: Float
+    :param start_time: Start time
+    :type start_time: Datetime
+    :param end_time: End time 
+    :type end_time: Datetime
+
+    :returns: Dataset object
+    :rtype: Object
+    '''
+    
+    parameters_metadata = get_parameters_metadata()
+    parameter_name, time_step, _, _, _, _, _= _get_parameter_info(parameters_metadata, parameter_id)
+    url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon,
start_time, end_time, time_step)
+    lats, lons, times, values = _get_data(url)
+
+    unique_lats_lons_times = _make_unique(lats, lons, times)
+    unique_times = _calculate_time(unique_lats_lons_times[2], time_step)
+    values = _reshape_values(values, unique_lats_lons_times)
+    values = _make_mask_array(values, parameter_id, parameters_metadata)
+    
+    return Dataset(unique_lats_lons_times[0], unique_lats_lons_times[1], unique_times, values,
parameter_name)

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/storage/rcmed.pyc
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/storage/rcmed.pyc b/src/main/python/rcmes/storage/rcmed.pyc
new file mode 100644
index 0000000..5b4e858
Binary files /dev/null and b/src/main/python/rcmes/storage/rcmed.pyc differ

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/all-wcprops
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/toolkit/.svn/all-wcprops b/src/main/python/rcmes/toolkit/.svn/all-wcprops
new file mode 100755
index 0000000..70196a3
--- /dev/null
+++ b/src/main/python/rcmes/toolkit/.svn/all-wcprops
@@ -0,0 +1,41 @@
+K 25
+svn:wc:ra_dav:version-url
+V 87
+/repos/asf/!svn/ver/1485875/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit
+END
+visualize.py
+K 25
+svn:wc:ra_dav:version-url
+V 100
+/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/visualize.py
+END
+process.py
+K 25
+svn:wc:ra_dav:version-url
+V 98
+/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py
+END
+__init__.py
+K 25
+svn:wc:ra_dav:version-url
+V 99
+/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/__init__.py
+END
+do_data_prep.py
+K 25
+svn:wc:ra_dav:version-url
+V 103
+/repos/asf/!svn/ver/1485875/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
+END
+plots.py
+K 25
+svn:wc:ra_dav:version-url
+V 96
+/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/plots.py
+END
+metrics.py
+K 25
+svn:wc:ra_dav:version-url
+V 98
+/repos/asf/!svn/ver/1476460/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py
+END

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/entries
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/toolkit/.svn/entries b/src/main/python/rcmes/toolkit/.svn/entries
new file mode 100755
index 0000000..0aec30c
--- /dev/null
+++ b/src/main/python/rcmes/toolkit/.svn/entries
@@ -0,0 +1,232 @@
+10
+
+dir
+1485921
+https://svn.apache.org/repos/asf/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit
+https://svn.apache.org/repos/asf
+
+
+
+2013-05-23T22:20:38.670815Z
+1485875
+goodale
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+13f79535-47bb-0310-9956-ffa450edef68
+
+visualize.py
+file
+
+
+
+
+2013-05-24T10:13:49.000000Z
+6f61f99e6bb37b883ce3bb0470cd2545
+2012-08-16T14:36:57.243978Z
+1473887
+cgoodale
+has-props
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+77
+
+process.py
+file
+
+
+
+
+2013-05-24T10:13:49.000000Z
+6172bcb1200fef493d541d93407c12ed
+2013-02-20T16:20:33.453039Z
+1474977
+cgoodale
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+46769
+
+__init__.py
+file
+
+
+
+
+2013-05-24T10:13:49.000000Z
+8761fbbcde9579ef3c42e09923e56f93
+2012-08-16T14:36:57.243978Z
+1473887
+cgoodale
+has-props
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+148
+
+do_data_prep.py
+file
+
+
+
+
+2013-05-24T10:13:49.000000Z
+881100e684bbad5dffcaa35da3d4a216
+2013-05-23T22:20:38.670815Z
+1485875
+goodale
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+16200
+
+plots.py
+file
+
+
+
+
+2013-05-24T10:13:49.000000Z
+16d8de7760d609df6373d15fee4cffe6
+2013-03-05T16:58:18.846251Z
+1475040
+mjjoyce
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+16323
+
+metrics.py
+file
+
+
+
+
+2013-05-24T10:13:49.000000Z
+8dbdd12fc067e73c0ae2c5f48c7245a7
+2013-02-10T21:47:51.341146Z
+1474940
+cgoodale
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+58120
+

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base b/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base
new file mode 100755
index 0000000..614166f
--- /dev/null
+++ b/src/main/python/rcmes/toolkit/.svn/prop-base/__init__.py.svn-base
@@ -0,0 +1,5 @@
+K 12
+svn:keywords
+V 11
+Id Revision
+END

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base b/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base
new file mode 100755
index 0000000..614166f
--- /dev/null
+++ b/src/main/python/rcmes/toolkit/.svn/prop-base/visualize.py.svn-base
@@ -0,0 +1,5 @@
+K 12
+svn:keywords
+V 11
+Id Revision
+END

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base b/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base
new file mode 100755
index 0000000..11548e6
--- /dev/null
+++ b/src/main/python/rcmes/toolkit/.svn/text-base/__init__.py.svn-base
@@ -0,0 +1,2 @@
+"""The toolkit Package is a collection of modules that provide a set of tools
+that can be used to process, analyze and plot the analysis results."""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/climate/blob/a6aa1cd2/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base
----------------------------------------------------------------------
diff --git a/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base b/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base
new file mode 100755
index 0000000..1b5c545
--- /dev/null
+++ b/src/main/python/rcmes/toolkit/.svn/text-base/do_data_prep.py.svn-base
@@ -0,0 +1,370 @@
+"""Prepare Datasets both model and observations for analysis using metrics"""
+
+
+import numpy as np
+import numpy.ma as ma
+import sys, os
+
+import Nio
+
+from storage import db, files
+import process
+from utils import misc
+
+# TODO:  swap gridBox for Domain
+def prep_data(settings, obsDatasetList, gridBox, modelList):
+    """
+    
+    returns numOBSs,numMDLs,nT,ngrdY,ngrdX,Times,lons,lats,obsData,modelData,obsList
+    
+        numOBSs - number of Observational Datasets.  Really need to look at using len(obsDatasetList)
instead
+        numMDLs - number of Models used.  Again should use the len(modelList) instead
+        nT - Time value count after temporal regridding. Should use length of the time axis
for a given dataset
+        ngrdY - size of the Y-Axis grid after spatial regridding
+        ngrdX - size of the X-Axis grid after spatial regridding
+        Times - list of python datetime objects the represent the list of time to be used
in further calculations
+        lons - 
+        lats - 
+        obsData - 
+        modelData - 
+        obsList - 
+        
+    
+    """
+    
+    # TODO:  Stop the object Deserialization and work on refactoring the core code here
+    cachedir = settings.cacheDir
+    workdir = settings.workDir
+
+    # Use list comprehensions to deconstruct obsDatasetList
+    #  ['TRMM_pr_mon', 'CRU3.1_pr']    Basically a list of Dataset NAME +'_' + parameter
name - THE 'CRU*' one triggers unit conversion issues later
+    # the plan here is to use the obsDatasetList which contains a longName key we can use.
+    obsList = [str(x['longname']) for x in obsDatasetList]
+    # Also using the obsDatasetList with a key of ['dataset_id']
+    obsDatasetId = [str(x['dataset_id']) for x in obsDatasetList]
+    # obsDatasetList ['paramter_id'] list
+    obsParameterId = [str(x['parameter_id']) for x in obsDatasetList]
+    obsTimestep = [str(x['timestep']) for x in obsDatasetList]
+    mdlList = [model.filename for model in modelList]
+
+    # Since all of the model objects in the modelList have the same Varnames and Precip Flag,
I am going to merely 
+    # pull this from modelList[0] for now
+    modelVarName = modelList[0].varName
+    precipFlag = modelList[0].precipFlag
+    modelTimeVarName = modelList[0].timeVariable
+    modelLatVarName = modelList[0].latVariable
+    modelLonVarName = modelList[0].lonVariable
+    regridOption = settings.spatialGrid
+    timeRegridOption = settings.temporalGrid
+    
+    """
+     Routine to read-in and re-grid both obs and mdl datasets.
+     Processes both single and multiple files of obs and mdl or combinations in a general
way.
+           i)    retrieve observations from the database
+           ii)   load in model data
+           iii)  temporal regridding
+           iv)   spatial regridding
+           v)    area-averaging
+           Input:
+                   cachedir 	- string describing directory path
+                   workdir 	- string describing directory path
+                   obsList        - string describing the observation data files
+                   obsDatasetId 	- int, db dataset id
+                   obsParameterId	- int, db parameter id 
+                   latMin, latMax, lonMin, lonMax, dLat, dLon, naLats, naLons: define the
evaluation/analysis domain/grid system
+    	         latMin		- float
+                   latMax		- float
+                   lonMin		- float
+                   lonMax		- float
+                   dLat  		- float
+                   dLon  		- float
+                   naLats		- integer
+                   naLons		- integer
+                   mdlList	- string describing model file name + path
+                   modelVarName	- string describing name of variable to evaluate (as written
in model file)
+    	         precipFlag	- bool  (is this precipitation data? True/False)
+                   modelTimeVarName - string describing name of time variable in model file
	
+                   modelLatVarName  - string describing name of latitude variable in model
file 
+                   modelLonVarName  - string describing name of longitude variable in model
file 
+                   regridOption 	 - string: 'obs'|'model'|'user'
+                   timeRegridOption -string: 'full'|'annual'|'monthly'|'daily'
+                   maskOption - Boolean
+                   
+                   # TODO:  This isn't true in the current codebase.
+                   Instead the SubRegion's are being used.  You can see that these values
are not
+                   being used in the code, at least they are not being passed in from the
function
+                   
+                   maskLatMin - float (only used if maskOption=1)
+                   maskLatMax - float (only used if maskOption=1)
+    	         maskLonMin - float (only used if maskOption=1)
+                   maskLonMax - float (only used if maskOption=1)
+           Output: image files of plots + possibly data
+           Jinwon Kim, 7/11/2012
+    """
+
+
+    # check the number of obs & model data files
+    numOBSs = len(obsList)
+    numMDLs = len(mdlList)
+    
+    # assign parameters that must be preserved throughout the process
+    
+    print 'start & end time = ', settings.startDate, settings.endDate
+    yymm0 = settings.startDate.strftime("%Y%m")
+    yymm1 = settings.endDate.strftime("%Y%m")
+    print 'start & end eval period = ', yymm0, yymm1
+
+
+
+    #TODO: Wrap in try except blocks instead
+    if numMDLs < 1: 
+        print 'No input model data file. EXIT'
+        sys.exit()
+    if numOBSs < 1: 
+        print 'No input observation data file. EXIT'
+        sys.exit()
+
+    ## Part 1: retrieve observation data from the database and regrid them
+    ##       NB. automatically uses local cache if already retrieved.
+
+    # preparation for spatial re-gridding: define the size of horizontal array of the target
interpolation grid system (ngrdX and ngrdY)
+    print 'regridOption in prep_data= ', regridOption
+    if regridOption == 'model':
+        ifile = mdlList[0]
+        typeF = 'nc'
+        lats, lons, mTimes = files.read_lolaT_from_file(ifile, modelLatVarName, modelLonVarName,
modelTimeVarName, typeF)
+        modelObject = modelList[0]
+        latMin = modelObject.latMin
+        latMax = modelObject.latMax
+        lonMin = modelObject.lonMin
+        lonMax = modelObject.lonMax
+    elif regridOption == 'user':
+        # Use the GridBox Object
+        latMin = gridBox.latMin
+        latMax = gridBox.latMax
+        lonMin = gridBox.lonMin
+        lonMax = gridBox.lonMax
+        naLats = gridBox.latCount
+        naLons = gridBox.lonCount
+        dLat = gridBox.latStep
+        dLon = gridBox.lonStep
+        lat = np.arange(naLats) * dLat + latMin
+        lon = np.arange(naLons) * dLon + lonMin
+        lons, lats = np.meshgrid(lon, lat)
+        lon = 0.
+        lat = 0.
+    else:
+        print "INVALID REGRID OPTION USED"
+        sys.exit()
+        
+    ngrdY = lats.shape[0]
+    ngrdX = lats.shape[1]
+
+    regObsData = []
+    
+    for n in np.arange(numOBSs):
+        # spatial regridding
+        oLats, oLons, _, oTimes, oData = db.extractData(obsDatasetId[n],
+                                                        obsParameterId[n],
+                                                        latMin, latMax,
+                                                        lonMin, lonMax,
+                                                        settings.startDate, settings.endDate,
+                                                        cachedir, obsTimestep[n])
+        
+        # TODO: modify this if block with new metadata usage.
+        if precipFlag == True and obsList[n][0:3] == 'CRU':
+            oData = 86400.0 * oData
+
+        nstOBSs = oData.shape[0]         # note that the length of obs data can vary for
different obs intervals (e.g., daily vs. monthly)
+        print 'Regrid OBS dataset onto the ', regridOption, ' grid system: ngrdY, ngrdX,
nstOBSs= ', ngrdY, ngrdX, nstOBSs
+        print 'For dataset: %s' % obsList[n]
+        
+        tmpOBS = ma.zeros((nstOBSs, ngrdY, ngrdX))
+        
+        print 'tmpOBS shape = ', tmpOBS.shape
+        
+        for t in np.arange(nstOBSs):
+            tmpOBS[t, :, :] = process.do_regrid(oData[t, :, :], oLats, oLons, lats, lons)
+            
+        # TODO:  Not sure this is needed with Python Garbage Collector
+        # The used memory should be freed when the objects are no longer referenced.  If
this continues to be an issue we may need to look
+        # at using generators where possible.
+        oLats = 0.0
+        oLons = 0.0       # release the memory occupied by the temporary variables oLats
and oLons.
+        
+        # temporally regrid the spatially regridded obs data
+        oData, newObsTimes = process.calc_average_on_new_time_unit_K(tmpOBS, oTimes, unit=timeRegridOption)
+
+        tmpOBS = 0.0
+        
+        # check the consistency of temporally regridded obs data
+        if n == 0:
+            oldObsTimes = newObsTimes
+        else:
+            if oldObsTimes != newObsTimes:
+                print 'temporally regridded obs data time levels do not match at ', n - 1,
n
+                print '%s Time through Loop' % (n + 1)
+                print 'oldObsTimes Count: %s' % len(oldObsTimes)
+                print 'newObsTimes Count: %s' % len(newObsTimes)
+                # TODO:  We need to handle these cases using Try Except Blocks or insert
a sys.exit if appropriate
+                sys.exit()
+            else:
+                oldObsTimes = newObsTimes
+        # if everything's fine, append the spatially and temporally regridded data in the
obs data array (obsData)
+        regObsData.append(oData)
+
+
+    """ all obs datasets have been read-in and regridded. convert the regridded obs data
from 'list' to 'array'
+    also finalize 'obsTimes', the time coordinate values of the regridded obs data.
+    NOTE: using 'list_to_array' assigns values to the missing points; this has become a problem
in handling the CRU data.
+          this problem disappears by using 'ma.array'."""
+
+    obsData = ma.array(regObsData)
+    obsTimes = newObsTimes
+    regObsData = 0
+    oldObsTimes = 0
+    nT = len(obsTimes)
+
+    # TODO:  Refactor this into a function within the toolkit module
+    # compute the simple multi-obs ensemble if multiple obs are used
+    if numOBSs > 1:
+        print 'numOBSs = ', numOBSs
+        oData = obsData
+        print 'oData shape = ', oData.shape
+        obsData = ma.zeros((numOBSs + 1, nT, ngrdY, ngrdX))
+        print 'obsData shape = ', obsData.shape
+        avg = ma.zeros((nT, ngrdY, ngrdX))
+        
+        for i in np.arange(numOBSs):
+            obsData[i, :, :, :] = oData[i, :, :, :]
+            avg[:, :, :] = avg[:, :, :] + oData[i, :, :, :]
+
+        avg = avg / float(numOBSs)
+        obsData[numOBSs, :, :, :] = avg[:, :, :]     # store the model-ensemble data
+        numOBSs = numOBSs + 1                     # update the number of obs data to include
the model ensemble
+        obsList.append('ENS-OBS')
+    print 'OBS regridded: ', obsData.shape
+
+
+    ## Part 2: load in and regrid model data from file(s)
+    ## NOTE: tthe wo parameters, numMDLs and numMOmx are defined to represent the number
of models (w/ all 240 mos) &
+    ##       the total number of months, respectively, in later multi-model calculations.
+
+    typeF = 'nc'
+    regridMdlData = []
+    # extract the model names and store them in the list 'mdlList'
+    mdlName = []
+    mdlListReversed=[]
+    if len(mdlList) >1:
+       for element in mdlList:
+           mdlListReversed.append(element[::-1])
+       prefix=os.path.commonprefix(mdlList)
+       postfix=os.path.commonprefix(mdlListReversed)[::-1]
+       for element in mdlList:
+           mdlName.append(element.replace(prefix,'').replace(postfix,''))
+    else:
+        mdlName.append('model') 
+
+    
+    for n in np.arange(numMDLs):
+        # read model grid info, then model data
+        ifile = mdlList[n]
+        print 'ifile= ', ifile
+        modelLats, modelLons, mTimes = files.read_lolaT_from_file(ifile, modelLatVarName,
modelLonVarName, modelTimeVarName, typeF)
+        mTime, mdlDat, mvUnit = files.read_data_from_one_file(ifile, modelVarName, modelTimeVarName,
modelLats, typeF)
+        mdlT = []
+        mStep = len(mTimes)
+
+        for i in np.arange(mStep):
+            mdlT.append(mTimes[i].strftime("%Y%m"))
+
+        wh = (np.array(mdlT) >= yymm0) & (np.array(mdlT) <= yymm1)
+        modelTimes = list(np.array(mTimes)[wh])
+        mData = mdlDat[wh, :, :]
+   
+        # determine the dimension size from the model time and latitude data.
+        nT = len(modelTimes)
+        
+        # UNUSED VARIABLES - WILL DELETE AFTER TESTING
+        # nmdlY=modelLats.shape[0]
+        # nmdlX=modelLats.shape[1]
+        #print 'nT, ngrdY, ngrdX = ',nT,ngrdY, ngrdX,min(modelTimes),max(modelTimes)
+        print '  The shape of model data to be processed= ', mData.shape, ' for the period
', min(modelTimes), max(modelTimes)
+        # spatial regridding of the modl data
+        tmpMDL = ma.zeros((nT, ngrdY, ngrdX))
+
+        if regridOption != 'model':
+            for t in np.arange(nT):
+                tmpMDL[t, :, :] = process.do_regrid(mData[t, :, :], modelLats, modelLons,
lats, lons)
+        else:
+            tmpMDL = mData
+
+        # temporally regrid the model data
+        mData, newMdlTimes = process.regrid_in_time(tmpMDL, modelTimes, unit=timeRegridOption)
+        tmpMDL = 0.0
+        
+        # check data consistency for all models 
+        if n == 0:
+            oldMdlTimes = newMdlTimes
+        else:
+            if oldMdlTimes != newMdlTimes:
+                print 'temporally regridded mdl data time levels do not match at ', n - 1,
n
+                print len(oldMdlTimes), len(newMdlTimes)
+                sys.exit()
+            else:
+                oldMdlTimes = newMdlTimes
+
+        # if everything's fine, append the spatially and temporally regridded data in the
obs data array (obsData)
+        regridMdlData.append(mData)
+
+    modelData = ma.array(regridMdlData)
+    modelTimes = newMdlTimes
+    regridMdlData = 0
+    oldMdlTimes = 0
+    newMdlTimes = 0
+    if (precipFlag == True) & (mvUnit == 'KG M-2 S-1'):
+        print 'convert model variable unit from mm/s to mm/day'
+        modelData = 86400.*modelData
+    
+    # check consistency between the time levels of the model and obs data
+    #   this is the final update of time levels: 'Times' and 'nT'
+    if obsTimes != modelTimes:
+        print 'time levels of the obs and model data are not consistent. EXIT'
+        print 'obsTimes'
+        print obsTimes
+        print 'modelTimes'
+        print modelTimes
+        sys.exit()
+    #  'Times = modelTimes = obsTimes' has been established and modelTimes and obsTimes will
not be used hereafter. (de-allocated)
+    Times = modelTimes
+    nT = len(modelTimes)
+    modelTimes = 0
+    obsTimes = 0
+
+    print 'Reading and regridding model data are completed'
+    print 'numMDLs, modelData.shape= ', numMDLs, modelData.shape
+    
+    # TODO: Do we need to make this a user supplied flag, or do we just create an ensemble
ALWAYS
+    # TODO: Add in Kyo's code here as well
+    # TODO:  Commented out until I can talk with Jinwon about this
+    # compute the simple multi-model ensemble if multiple models are evaluated
+    if numMDLs > 1:
+        mdlData=modelData
+        modelData=ma.zeros((numMDLs+1,nT,ngrdY,ngrdX))
+        avg=ma.zeros((nT,ngrdY,ngrdX))
+        for i in np.arange(numMDLs):
+            modelData[i,:,:,:]=mdlData[i,:,:,:]
+            avg[:,:,:]=avg[:,:,:]+mdlData[i,:,:,:]
+        avg=avg/float(numMDLs)
+        modelData[numMDLs,:,:,:]=avg[:,:,:]     # store the model-ensemble data
+        # THESE ARE NOT USED.  WILL DELETE AFTER TESTING
+        # i0mdl=0
+        # i1mdl=numMDLs
+        numMDLs=numMDLs+1
+        mdlList.append('ENS-MODEL')
+        print 'Eval mdl-mean timeseries for the obs periods: modelData.shape= ',modelData.shape
+    # GOODALE:  This ensemble code should be refactored into process.py module since it's
purpose is data processing
+
+    # Processing complete
+    print 'data_prep is completed: both obs and mdl data are re-gridded to a common analysis
grid'
+    return numOBSs, numMDLs, nT, ngrdY, ngrdX, Times, lons, lats, obsData, modelData, obsList,
mdlName


Mime
View raw message