diff --git a/pyiso/caiso.py b/pyiso/caiso.py index 4890e67..83f4dc5 100644 --- a/pyiso/caiso.py +++ b/pyiso/caiso.py @@ -3,6 +3,8 @@ import copy import re from bs4 import BeautifulSoup +import StringIO +import pandas class CAISOClient(BaseClient): @@ -13,15 +15,15 @@ class CAISOClient(BaseClient): see http://www.caiso.com/Documents/InterfaceSpecifications-OASISv4_1_3.pdf """ NAME = 'CAISO' - + base_url_oasis = 'http://oasis.caiso.com/oasisapi/SingleZip' base_url_gen = 'http://content.caiso.com/green/renewrpt/' base_url_outlook = 'http://content.caiso.com/outlook/SP/' base_payload = {'version': 1} oasis_request_time_format = '%Y%m%dT%H:%M-0000' - + TZ_NAME = 'America/Los_Angeles' - + fuels = { 'GEOTHERMAL': 'geo', 'BIOMASS': 'biomass', @@ -33,7 +35,7 @@ class CAISOClient(BaseClient): 'SOLAR THERMAL': 'solarth', 'NUCLEAR': 'nuclear', 'THERMAL': 'thermal', - 'HYDRO': 'hydro', + 'HYDRO': 'hydro', } oasis_markets = { @@ -41,6 +43,11 @@ class CAISOClient(BaseClient): BaseClient.MARKET_CHOICES.fivemin: 'RTM', BaseClient.MARKET_CHOICES.dam: 'DAM', } + LMP_MARKETS = { + 'RTM': 'PRC_INTVL_LMP', + 'DAM': 'PRC_LMP', + 'HASP': 'PRC_HASP_LMP', + } def get_generation(self, latest=False, yesterday=False, start_at=False, end_at=False, **kwargs): @@ -155,6 +162,52 @@ def get_trade(self, latest=False, # return all data return parsed_data + def get_lmp(self, node_id, **kwargs): + df = self.get_lmp_as_dataframe(node_id, **kwargs) + lmp_dict = {} + for i, row in df.iterrows(): + lmp_dict[i.to_pydatetime()] = row['LMP_PRC'] + return lmp_dict + + def get_lmp_as_dataframe(self, node_id, latest=True, start_at=False, end_at=False, + market_run_id='RTM', **kwargs): + """Returns a pandas DataFrame, not a list of dicts""" + # set args + self.handle_options(data='lmp', latest=latest, + start_at=start_at, end_at=end_at, + market_run_id=market_run_id, + **kwargs) + + if latest: + queryname = 'PRC_CURR_LMP' + else: + queryname = self.LMP_MARKETS[market_run_id] + payload = self.construct_oasis_payload(queryname, + resultformat=6, # csv + node=node_id) + + # Fetch data + data = self.fetch_oasis(payload=payload) + + # Turn into pandas Dataframe + str_data = StringIO.StringIO(data) + df = pandas.DataFrame.from_csv(str_data, sep=",") + + # strip congestion and loss prices + df = df.query('LMP_TYPE == "LMP"') + df.rename(columns={'MW': 'LMP_PRC'}, inplace=True) + + # Get all data indexed on 'INTERVALSTARTTIME_GMT' as panda datetime + if df.index.name != 'INTERVALSTARTTIME_GMT': + df.set_index('INTERVALSTARTTIME_GMT', inplace=True) + df.index.name = 'INTERVALSTARTTIME_GMT' + df.index = pandas.to_datetime(df.index) + + # utcify + df.index = self.utcify_index(df.index, tz_name='UTC') + + return df + def construct_oasis_payload(self, queryname, **kwargs): # get start and end times if self.options['latest']: @@ -166,8 +219,11 @@ def construct_oasis_payload(self, queryname, **kwargs): enddatetime = self.options['end_at'] # get market id - market_run_id = self.oasis_markets[self.options['market']] - + try: + market_run_id = self.options['market_run_id'] + except KeyError: + market_run_id = self.oasis_markets[self.options['market']] + # construct payload payload = {'queryname': queryname, 'market_run_id': market_run_id, @@ -178,7 +234,7 @@ def construct_oasis_payload(self, queryname, **kwargs): payload.update(kwargs) # return - return payload + return payload def set_dt_index(self, df, date, hours, end_of_hour=True): if end_of_hour: @@ -208,7 +264,7 @@ def _generation_historical(self): # set up request url_file = this_date.strftime('%Y%m%d_DailyRenewablesWatch.txt') url = self.base_url_gen + url_file - + # carry out request response = self.request(url) if not response: @@ -247,25 +303,25 @@ def _generation_historical(self): # return return parsed_data - + def fetch_oasis(self, payload={}): """Returns a list of report data elements, or an empty list if an error was encountered.""" # set up storage raw_data = [] - + # try get response = self.request(self.base_url_oasis, params=payload) # have request if not response: return [] - + # read data from zip content = self.unzip(response.content) if not content: return [] - + # load xml into soup soup = BeautifulSoup(content) - + # check xml content error = soup.find('m:error') if error: @@ -274,24 +330,27 @@ def fetch_oasis(self, payload={}): msg = 'XML error for CAISO OASIS with payload %s: %s %s' % (payload, code, desc) self.logger.error(msg) return [] - + else: - raw_data = soup.find_all('report_data') - return raw_data - + if payload.get('resultformat', False) == 6: + return content + else: + raw_data = soup.find_all('report_data') + return raw_data + def parse_oasis_renewable(self, raw_data): """Parse raw data output of fetch_oasis for renewables.""" # set up storage preparsed_data = {} parsed_data = [] - + # extract values from xml for raw_soup_dp in raw_data: # set up storage for timestamp ts = self.utcify(raw_soup_dp.find('interval_start_gmt').string) if ts not in preparsed_data: preparsed_data[ts] = {'wind': 0, 'solar': 0} - + # store generation value try: fuel_name = raw_soup_dp.find('renewable_type').string.lower() @@ -300,7 +359,7 @@ def parse_oasis_renewable(self, raw_data): except TypeError: self.logger.error('Error in schema for CAISO OASIS result %s' % raw_soup_dp.prettify()) continue - + # collect values into dps freq = self.options.get('freq', self.FREQUENCY_CHOICES.hourly) market = self.options.get('market', self.MARKET_CHOICES.hourly) @@ -311,14 +370,14 @@ def parse_oasis_renewable(self, raw_data): 'freq': freq, 'market': market, 'gen_MW': 0, 'ba_name': self.NAME} - + # collect data for fuel_name in ['wind', 'solar']: parsed_dp = copy.deepcopy(base_parsed_dp) parsed_dp['fuel_name'] = fuel_name parsed_dp['gen_MW'] += preparsed_dp[fuel_name] parsed_data.append(parsed_dp) - + # return return parsed_data @@ -337,7 +396,7 @@ def parse_oasis_slrs(self, raw_data): freq = self.options.get('freq', self.FREQUENCY_CHOICES.fivemin) market = self.options.get('market', self.MARKET_CHOICES.fivemin) - + # set up storage extracted_data = {} parsed_data = [] @@ -359,7 +418,7 @@ def parse_oasis_slrs(self, raw_data): try: extracted_data[ts] += val except KeyError: - extracted_data[ts] = val + extracted_data[ts] = val # assemble data for ts in sorted(extracted_data.keys()): @@ -378,7 +437,7 @@ def parse_oasis_demand_forecast(self, raw_data): """Parse raw data output of fetch_oasis for system-wide 5-min RTM demand forecast.""" # set up storage parsed_data = [] - + # set up freq and market freq = self.options.get('freq', self.FREQUENCY_CHOICES.fivemin) market = self.options.get('market', self.MARKET_CHOICES.fivemin) @@ -391,7 +450,7 @@ def parse_oasis_demand_forecast(self, raw_data): for raw_soup_dp in raw_data: if raw_soup_dp.find('data_item').string == data_item_key and \ raw_soup_dp.find('resource_name').string == 'CA ISO-TAC': - + # parse timestamp ts = self.utcify(raw_soup_dp.find('interval_start_gmt').string) @@ -400,14 +459,14 @@ def parse_oasis_demand_forecast(self, raw_data): 'freq': freq, 'market': market, 'ba_name': self.NAME} - + # store generation value parsed_dp['load_MW'] = float(raw_soup_dp.find('value').string) parsed_data.append(parsed_dp) - + # return return parsed_data - + def todays_outlook_time(self): # get timestamp response = self.request(self.base_url_outlook+'systemconditions.html') @@ -425,7 +484,7 @@ def fetch_todays_outlook_renewables(self): # get renewables data response = self.request(self.base_url_outlook+'renewables.html') return BeautifulSoup(response.content) - + def parse_todays_outlook_renewables(self, soup, ts): # set up storage parsed_data = [] @@ -448,7 +507,7 @@ def parse_todays_outlook_renewables(self, soup, ts): parsed_dp['gen_MW'] = float(match.group('val')) parsed_dp['fuel_name'] = fuel_name parsed_data.append(parsed_dp) - + # actual 'renewable' value should be only renewables that aren't accounted for in other categories accounted_for_ren = 0 for dp in parsed_data: @@ -457,9 +516,9 @@ def parse_todays_outlook_renewables(self, soup, ts): for dp in parsed_data: if dp['fuel_name'] == 'renewable': dp['gen_MW'] -= accounted_for_ren - - return parsed_data - + + return parsed_data + def _generation_latest(self, **kwargs): # set up parsed_data = [] @@ -467,7 +526,7 @@ def _generation_latest(self, **kwargs): # override market and freq to 10 minute self.options['market'] = self.MARKET_CHOICES.tenmin self.options['freq'] = self.FREQUENCY_CHOICES.tenmin - + # get and parse "Today's Outlook" data soup = self.fetch_todays_outlook_renewables() ts = self.todays_outlook_time() @@ -476,7 +535,7 @@ def _generation_latest(self, **kwargs): return parsed_data total_ren_MW = sum([dp['gen_MW'] for dp in parsed_data]) ts = parsed_data[0]['timestamp'] - + # get OASIS total gen data payload = self.construct_oasis_payload(queryname='ENE_SLRS', schedule='ALL') oasis_data = self.fetch_oasis(payload=payload) @@ -529,6 +588,6 @@ def _generation_forecast(self, **kwargs): dp['gen_MW'] -= total_ren_MW[dp['timestamp']] # add to storage parsed_data.append(dp) - + # return return parsed_data diff --git a/requirements.txt b/requirements.txt index 2e0c4a7..6ac2598 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ python-dateutil==2.2 pytz requests==2.2.1 xlrd==0.9.2 -celery>=3.1 \ No newline at end of file +celery>=3.1 +numexpr==2.4 diff --git a/tests/test_caiso.py b/tests/test_caiso.py index 5fe85e6..bf47ba3 100644 --- a/tests/test_caiso.py +++ b/tests/test_caiso.py @@ -6,6 +6,7 @@ import pytz from datetime import date, datetime, timedelta from bs4 import BeautifulSoup +import pandas class TestCAISOBase(TestCase): @@ -413,6 +414,20 @@ def test_fetch_oasis_demand_rtm(self): 26723\n\ ') + def test_fetch_oasis_csv(self): + c = self.create_client('CAISO') + ts = c.utcify('2014-05-08 12:00') + payload = {'queryname': 'SLD_FCST', + 'market_run_id': 'RTM', + 'startdatetime': (ts-timedelta(minutes=20)).strftime(c.oasis_request_time_format), + 'enddatetime': (ts+timedelta(minutes=20)).strftime(c.oasis_request_time_format), + 'resultformat': 6, + } + payload.update(c.base_payload) + data = c.fetch_oasis(payload=payload) + self.assertEqual(len(data), 7828) + self.assertIn('INTERVALSTARTTIME_GMT', data) + def test_parse_oasis_demand_rtm(self): # set up list of data c = self.create_client('CAISO') @@ -425,7 +440,7 @@ def test_parse_oasis_demand_rtm(self): # test self.assertEqual(len(parsed_data), 1) - expected = {'ba_name': 'CAISO', + expected = {'ba_name': 'CAISO', 'timestamp': datetime(2014, 5, 8, 18, 55, tzinfo=pytz.utc), 'freq': '5m', 'market': 'RT5M', 'load_MW': 26755.0} @@ -528,7 +543,7 @@ def test_parse_oasis_slrs_gen_rtm(self): # test self.assertEqual(len(parsed_data), 2) - expected = {'ba_name': 'CAISO', + expected = {'ba_name': 'CAISO', 'timestamp': datetime(2013, 9, 19, 17, 0, tzinfo=pytz.utc), 'freq': '5m', 'market': 'RT5M', 'fuel_name': 'other', 'gen_MW': 23900.79} @@ -546,7 +561,7 @@ def test_parse_oasis_slrs_trade_dam(self): # test self.assertEqual(len(parsed_data), 3) - expected = {'ba_name': 'CAISO', + expected = {'ba_name': 'CAISO', 'timestamp': datetime(2013, 9, 19, 7, 0, tzinfo=pytz.utc), 'freq': '1hr', 'market': 'DAHR', 'net_exp_MW': -5014.0} @@ -564,8 +579,62 @@ def test_parse_oasis_renewables_dam(self): # test self.assertEqual(len(parsed_data), 6) - expected = {'ba_name': 'CAISO', + expected = {'ba_name': 'CAISO', 'timestamp': datetime(2013, 9, 20, 6, 0, tzinfo=pytz.utc), 'freq': '1hr', 'market': 'DAHR', 'fuel_name': 'wind', 'gen_MW': 580.83} self.assertEqual(expected, parsed_data[0]) + + def test_get_lmp_dataframe_latest(self): + c = self.create_client('CAISO') + ts = pytz.utc.localize(datetime.utcnow()) + lmp = c.get_lmp_as_dataframe('SLAP_PGP2-APND') + self.assertEqual(len(lmp), 1) + + self.assertGreaterEqual(lmp.iloc[0]['LMP_PRC'], -300) + self.assertLessEqual(lmp.iloc[0]['LMP_PRC'], 1500) + + # lmp is a dataframe, lmp.iloc[0] is a Series, Series.name is the index of that entry + self.assertGreater(lmp.iloc[0].name, ts - timedelta(minutes=5)) + self.assertLess(lmp.iloc[0].name, ts + timedelta(minutes=5)) + + def test_get_lmp_dataframe_hist(self): + c = self.create_client('CAISO') + ts = pytz.utc.localize(datetime(2015, 3, 1, 12)) + start = ts - timedelta(hours=2) + lmps = c.get_lmp_as_dataframe('SLAP_PGP2-APND', latest=False, start_at=start, end_at=ts) + self.assertEqual(len(lmps), 24) + + self.assertGreaterEqual(lmps['LMP_PRC'].max(), 0) + self.assertLess(lmps['LMP_PRC'].max(), 1500) + self.assertGreaterEqual(lmps['LMP_PRC'].min(), -300) + + self.assertGreaterEqual(lmps.index.to_pydatetime().min(), start) + self.assertLessEqual(lmps.index.to_pydatetime().max(), ts) + + def test_get_lmp_latest(self): + c = self.create_client('CAISO') + ts = pytz.utc.localize(datetime.utcnow()) + lmp = c.get_lmp('SLAP_PGP2-APND') + self.assertEqual(len(lmp), 1) + + self.assertGreaterEqual(min(lmp.keys()), ts - timedelta(minutes=5)) + self.assertLessEqual(max(lmp.keys()), ts + timedelta(minutes=5)) + + self.assertGreaterEqual(min(lmp.values()), -300) + self.assertLess(max(lmp.values()), 1500) + + def test_get_lmp_hist(self): + c = self.create_client('CAISO') + ts = pytz.utc.localize(datetime(2015, 3, 1, 11, 0, 0)) + start = ts - timedelta(hours=2) + lmp = c.get_lmp('SLAP_PGP2-APND', latest=False, start_at=start, end_at=ts) + self.assertEqual(len(lmp), 24) + + self.assertGreaterEqual(min(lmp.keys()), start) + self.assertLessEqual(max(lmp.keys()), ts) + + self.assertGreaterEqual(min(lmp.values()), -300) + self.assertLess(max(lmp.values()), 1500) + +