Skip to content

Commit

Permalink
Add WEPP Location ID to database (PLATTS ID)
Browse files Browse the repository at this point in the history
Addressing part of Issue #6, add a new field (`wepp_id`) to the output
database. This field holds a reference to a unique plant identifier in
the widely-used PLATTS-WEPP dataset. This should allow for
intercomparisons with an independently derived dataset, and will be
helpful to users who want to build a composite dataset for their
analyses.

The `wepp_id` field is a delimited field, with LOCATIONID values
separated by the pipe character (|) if multiple LOCATIONIDs match to a
single plant.

With this update, there are 16460 plants with known `wepp_id`s, which
account for 4312.6 GW in total (about 80% of all capacity in the output
database).

There are no changes to plants in the database except for the addition
of this `wepp_id`.

Database Version: 1.1.10
  • Loading branch information
loganbyers committed Dec 10, 2018
1 parent 54fec91 commit 383d6b3
Show file tree
Hide file tree
Showing 29 changed files with 2,003,638 additions and 1,873,151 deletions.
8 changes: 6 additions & 2 deletions build_databases/build_global_power_plant_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
Builds the Global Power Plant Database from various data sources.
- Log build to DATABASE_BUILD_LOG_FILE
- Use country and fuel information as specified in powerplant_database.py
- Use matches/concordances as specified in powerplants_database.py
TO-DOS:
- Alias list for power plants
- Primary fuel type designation
- Link to Platts/WEPP ID
"""

import csv
Expand Down Expand Up @@ -196,6 +195,11 @@
estimated_plants = pw.estimate_generation(core_database)
print('...estimated for {0} plants.'.format(estimated_plants))

# STEP 4.1: Add WEPP ID matches
pw.add_wepp_id(core_database)
if DATA_DUMP:
pw.add_wepp_id(datadump)

# STEP 5: Write the Global Power Plant Database
for dbname, data in database_additions.iteritems():
print("Added {0} plants ({1} MW) from {2}.".format(data['count'], data['capacity'], dbname))
Expand Down
2 changes: 1 addition & 1 deletion output_database/DATABASE_VERSION
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
1.1.9
1.1.10

59,364 changes: 29,682 additions & 29,682 deletions output_database/global_power_plant_database.csv

Large diffs are not rendered by default.

426 changes: 213 additions & 213 deletions output_database/global_power_plant_database_country_summary.csv

Large diffs are not rendered by default.

51 changes: 48 additions & 3 deletions powerplant_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
COUNTRY_NAMES_THESAURUS_FILE = os.path.join(RESOURCES_DIR, "country_names_thesaurus.csv")
COUNTRY_INFORMATION_FILE = os.path.join(RESOURCES_DIR, "country_information.csv")
MASTER_PLANT_CONCORDANCE_FILE = os.path.join(RESOURCES_DIR, "master_plant_concordance.csv")
WEPP_CONCORDANCE_FILE = os.path.join(RESOURCES_DIR, "master_wepp_concordance.csv")
SOURCE_THESAURUS_FILE = os.path.join(RESOURCES_DIR, "sources_thesaurus.csv")
GENERATION_FILE = os.path.join(RESOURCES_DIR, "generation_by_country_by_fuel_2014.csv")

Expand Down Expand Up @@ -59,15 +60,17 @@ def __init__(self, plant_idnr, plant_name, plant_country,
plant_other_fuel=NO_DATA_SET,
plant_generation=NO_DATA_OTHER,
plant_commissioning_year=NO_DATA_NUMERIC,
plant_estimated_generation_gwh=NO_DATA_NUMERIC
plant_estimated_generation_gwh=NO_DATA_NUMERIC,
plant_wepp_id=NO_DATA_UNICODE
):

# check and set data for attributes that should be unicode
unicode_attributes = {
'idnr': plant_idnr, 'name': plant_name, 'country': plant_country,
'owner': plant_owner, 'nat_lang': plant_nat_lang,
'url': plant_source_url, 'coord_source': plant_coord_source,
'primary_fuel': plant_primary_fuel
'primary_fuel': plant_primary_fuel,
'wepp_id': plant_wepp_id
}

for attribute, input_parameter in unicode_attributes.iteritems():
Expand Down Expand Up @@ -819,6 +822,41 @@ def make_plant_concordance(master_plant_condordance_file=MASTER_PLANT_CONCORDANC
}
return plant_concordance

def add_wepp_id(powerplant_dictionary, wepp_matches_file=WEPP_CONCORDANCE_FILE):
"""
Set WEPP Location ID for each plant, if a match is available.
Modifies powerplant_dictionary in place.
Parameters
----------
powerplant_dictionary : dict
Dictionary of all PowerPlant objects.
wepp_concordance_file : path
Path to file with WEPP Location ID matches.
Returns
-------
None.
"""
wepp_match_count = 0
with open(wepp_matches_file, 'rbU') as f:
csvreader = csv.DictReader(f)
for row in csvreader:
if row['wepp_location_id']:
gppd_id = str(row['gppd_idnr'])
wepp_id = str(row['wepp_location_id'])
if gppd_id in powerplant_dictionary:
# test that we haven't already set this wepp id
try:
if not powerplant_dictionary[gppd_id].wepp_id:
powerplant_dictionary[gppd_id].wepp_id = wepp_id
wepp_match_count += 1
else:
print(u"Error: Duplicate WEPP match for plant {0}".format(gppd_id))
except:
print(u"Error: plant {0} does not have wepp_id attribute".format(gppd_id))
else:
print(u"Error: Attempt to match WEPP ID {0} to non-existant plant {1}".format(wepp_id, gppd_id))
print(u"Added {0} matches to WEPP plants.".format(wepp_match_count))

### STRING CLEANING ###

def format_string(value, encoding=UNICODE_ENCODING):
Expand Down Expand Up @@ -1097,6 +1135,7 @@ def _dict_row(powerplant):
ret['latitude'] = NO_DATA_NUMERIC
ret['longitude'] = NO_DATA_NUMERIC
ret['geolocation_source'] = powerplant.coord_source.encode(UNICODE_ENCODING)
ret['wepp_id'] = powerplant.wepp_id.encode(UNICODE_ENCODING)
ret['commissioning_year'] = powerplant.commissioning_year
# handle fuel
ret['primary_fuel'] = powerplant.primary_fuel
Expand Down Expand Up @@ -1140,6 +1179,7 @@ def _dict_row(powerplant):
"source",
"url",
"geolocation_source",
"wepp_id",
"year_of_capacity_data",
"generation_gwh_2013",
"generation_gwh_2014",
Expand Down Expand Up @@ -1234,6 +1274,9 @@ def read_csv_file_to_dict(filename):
# check if geolocation source is empty string
if not row['geolocation_source']:
row['geolocation_source'] = None
# check if wepp_id is empty string
if not row['wepp_id']:
row['wepp_id'] = None
# add row to output dict
pdb[row['gppd_idnr']] = row
return pdb
Expand Down Expand Up @@ -1291,6 +1334,7 @@ def write_sqlite_file(plants_dict, filename, return_connection=False):
source TEXT,
url TEXT,
geolocation_source TEXT,
wepp_id TEXT,
year_of_capacity_data INTEGER,
generation_gwh_2013 REAL,
generation_gwh_2014 REAL,
Expand All @@ -1304,7 +1348,7 @@ def write_sqlite_file(plants_dict, filename, return_connection=False):
c.execute('begin')
for k, p in plants_dict.iteritems():
stmt = u'''INSERT INTO powerplants VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'''
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'''
vals = (
p['country'],
p['country_long'],
Expand All @@ -1322,6 +1366,7 @@ def write_sqlite_file(plants_dict, filename, return_connection=False):
p['source'],
p['url'],
p['geolocation_source'],
p['wepp_id'],
p['year_of_capacity_data'],
p['generation_gwh_2013'],
p['generation_gwh_2014'],
Expand Down
Loading

0 comments on commit 383d6b3

Please sign in to comment.