# -*- coding: utf-8 -*-
"""
Created on Tue Jan 9 16:04:29 2018
@author: cenv0574
"""
import os
from functions import unzip_worldpop,spatial_overlays,get_country,create_poly_files
import geopandas as gpd
import shutil
from rasterstats import zonal_stats
import numpy as np
import fiona
from shapely.geometry import mapping
import time
from multiprocess import Pool , cpu_count
import urllib.request
import pandas as pd
[docs]def single_country(country,continent_osm,base_path,grump,overwrite=False,tertiary=False,track=False):
"""
Estimation of the Rural Accesibility Index (RAI) for the specified country.
The RAI is calculated by using the methodology specified in the report below:
World Bank. 2016. Measuring rural access: using new technologies.
Washington, D.C. : World Bank Group. http://bit.ly/2p5asME
Args:
*country*: to country for which we calculate the RAI.
*continent_osm*: the continent that the country 'belongs' to. This is required in the osm extraction.
*base_path*: base path to location of all files.
*idx_urban*: rTree index of the urban areas. By using an rtree index, we can quickly find the intersecting areas.
*tertiary*: default option is 'False', but if we want to estimate the **RAI** for tertiary roads as well, we can set this option to 'True'.
*track*: default option is 'False', but if we want to estimate the **RAI** for both tracks and tertiary roads as well, we can set this option to 'True'.
Returns:
A dictionary with the **total rural population**, the **total rural population within 2km of the selected roads** and the **RAI** for the specified country.
"""
try:
print('%s started!' % country)
# =============================================================================
# """ First set all paths for output dirs"""
# =============================================================================
calc_dir = os.path.join(base_path,'calc')
# =============================================================================
# """ Set the paths for the files we are going to use """
# =============================================================================
#set global file paths for worldpop
if ('central-america' in continent_osm) or ('south-america' in continent_osm) :
world_pop = os.path.join(base_path,'Worldpop','LAC_PPP_2015_adj_v2.tif')
elif ('africa' in continent_osm):
world_pop = os.path.join(base_path,'Worldpop','AFR_PPP_2015_adj_v2.tif')
elif ('asia' in continent_osm):
world_pop = os.path.join(base_path,'Worldpop','Asia_PPP_2015_adj_v2.tif')
elif ('europe' in continent_osm):
world_pop = os.path.join(base_path,'Worldpop','EUROPOP_WGS84.tif')
# due to a few islands not included in the global worldpop data, we need to
# extract their data individually
islands = ['FJI','KIR','MHL','FSM','PLW','PNG','WSM','SLB','TON','VUT']
if country in islands:
temp_path = os.path.join(base_path,'Worldpop','temp_%s' % country)
if not os.path.exists(temp_path):
os.makedirs(temp_path)
unzip_worldpop(country,base_path,temp_path)
world_pop = os.path.join(temp_path,'popmap15adj.tif')
if country == 'KIR':
world_pop = os.path.join(temp_path,'popmap15adj_lzw.tif')
elif country == 'TON':
world_pop = os.path.join(temp_path,'popmap15.tif')
world_pop_out = os.path.join(temp_path,'popmap15adj_wgs84.tif')
os.system('gdalwarp -t_srs EPSG:4326 -tr 0.018 0.018 -overwrite '+world_pop+' '+world_pop_out)
world_pop = world_pop_out
if country == 'MEX':
world_pop = os.path.join(base_path,'Worldpop','MEX_ppp_v2c_2015_UNadj.tif')
elif country == 'UKR':
world_pop = os.path.join(base_path,'Worldpop','UKR_ppp_v2b_2015_UNadj.tif')
# set path for global country shapes
shp_world = os.path.join(base_path,'input_data','country_shapes.shp')
# set country shapefile paths
shp_country = os.path.join(calc_dir,'%s.shp' % country)
buffer_file = os.path.join(calc_dir,'%s_buffer.shp' % country)
# =============================================================================
# """ Get country boundary from world boundaries file and save to shp"""
# =============================================================================
world_boundaries = gpd.read_file(shp_world)
country_boundary = world_boundaries.loc[world_boundaries['ISO3166_1_'] == country]
country_boundary.crs = {'init' :'epsg:4326'}
# country_boundary = world_boundaries.loc[world_boundaries['ISO_Codes'] == country]
country_boundary.to_file(shp_country)
urban_geoms = gpd.read_file(grump)
urban_geoms.crs = {'init' :'epsg:4326'}
grump_country_in = os.path.join(base_path,'calc','grump_%s.shp' % country)
grump_country_out = os.path.join(base_path,'calc','exact_grump_%s.shp' % country)
if len(urban_geoms.loc[urban_geoms['ISO3']==country]) != 0:
urban_geoms = urban_geoms.loc[urban_geoms['ISO3']==country]
urban_geoms.to_file(grump_country_in)
os.system('ogr2ogr -skipfailures -nlt geometry -clipsrc '+shp_country+' '+grump_country_out+' '+grump_country_in)
elif country == 'TUV':
None
else:
urban_geoms = spatial_overlays(country_boundary,urban_geoms)
urban_geoms.to_file(grump_country_in)
os.system('ogr2ogr -skipfailures -nlt geometry -clipsrc '+shp_country+' '+grump_country_out+' '+grump_country_in)
# =============================================================================
# """ Subtract the people living in urban areas from the total population
# =============================================================================
""" Estimate the total population living in rural areas """
country_tot = sum(item['sum'] for item in zonal_stats(country_boundary,world_pop,
stats="sum") if item['sum'] is not None)
if country == 'TUV':
stats_ctry = country_tot
else:
stats_ctry = country_tot- sum(item['sum'] for item in zonal_stats(gpd.read_file(grump_country_out),world_pop,
stats="sum") if item['sum'] is not None)
# =============================================================================
# """ Next step is to load roads of the country and create a buffer """
# =============================================================================
# load data, similar as when we estimate the length of the roads
load_country = get_country(country,continent_osm,base_path,overwrite=False,RAI=True)
# if we include tertiary as well, we do a different filter
if tertiary == True:
load_country = load_country.loc[load_country['roads'].isin(['primary','secondary','tertiary'])]
elif track == True:
load_country = load_country.loc[load_country['roads'].isin(['primary','secondary','tertiary','track'])]
else:
load_country = load_country.loc[load_country['roads'].isin(['primary','secondary'])]
# to find the buffer around a road, we convert it quickly to a utm
# coordinate system to be able to do an exact 2km around the road (it
# is a bit tricky to find this based on a WGS84 coordinate sytem)
if len(load_country) == 0:
if country in islands:
try:
shutil.rmtree(temp_path, ignore_errors=True)
except:
True
return {country: [stats_ctry,0,0]}
country_centroid = country_boundary.centroid.values[0]
lat,lon = country_centroid.bounds[1],country_centroid.bounds[0]
# formula below based on :https://gis.stackexchange.com/a/190209/80697
epsg=int(32700-np.round((45+lat)/90,0)*100+np.round((183+lon)/6,0))
load_country = load_country.to_crs(epsg=epsg)
# and do the actual buffer of 2km
load_country['geometry'] = load_country.buffer(2000)
# and change the geodataframe back to WGS84 again
load_country = load_country.to_crs(epsg=4326)
# union this to one multipolygon and save to a shapefile
try:
poly = load_country['geometry'].unary_union
except:
poly = load_country['geometry'].buffer(0).unary_union
# define a polygon feature geometry with one attribute
schema = {
'geometry': 'Polygon',
'properties': {'id': 'int'},
}
# write a new Shapefile
with fiona.open(buffer_file, 'w', 'ESRI Shapefile', schema) as c:
## If there are multiple geometries, put the "for" loop here
c.write({
'geometry': mapping(poly),
'properties': {'id': 0},
})
# =============================================================================
# """ Here we exlude urban areas from the road buffer file, similar as how
# we did it in the country boundary file """
# =============================================================================
buffer_inb = os.path.join(base_path,'calc','%s_buffer.shp' % country)
buffer_in = os.path.join(base_path,'calc','%s_buffer_exact.shp' % country)
buffer_out = os.path.join(base_path,'calc','%s_buffer_urban.shp' % country)
os.system('ogr2ogr -skipfailures -nlt geometry -clipsrc '+buffer_inb+' '+buffer_in+' '+shp_country)
total_buffer = gpd.read_file(buffer_in)
if country == 'TUV':
None
else:
urban_geoms_exact = gpd.read_file(grump_country_out)
urban_buffer = spatial_overlays(total_buffer,urban_geoms_exact,how='intersection')
urban_buffer.to_file(buffer_out)
""" Estimate the total rural population living within 2km of selected roads"""
tot_buf_country = sum(item['sum'] for item in zonal_stats(total_buffer,world_pop,
stats="sum") if item['sum'] is not None)
if country == 'TUV':
stats_roads = tot_buf_country
else:
stats_roads = tot_buf_country- sum(item['sum'] for item in zonal_stats(urban_buffer,world_pop,
stats="sum") if item['sum'] is not None)
if country in islands:
try:
shutil.rmtree(temp_path, ignore_errors=True)
except:
True
""" Return the total rural population, total population with 2km of
selected roads and the RAI"""
if stats_roads == 0:
return {country: [stats_ctry,0,0]}
stats =(stats_roads/stats_ctry)*100
if stats > 100:
stats = 100
return {country: [stats_ctry,stats_roads,stats]}
except Exception as e:
print(str(e)+' for %s' % country)
return {country: [0,0,0]}
[docs]def all_countries(base_path,multiprocess=True,overwrite=True,tertiary=False):
"""
Main function to estimate the **RAI** for all the countries we are interested in.
Args:
*base_path* : Base path to the location of all files and directories in this project.
*multiprocess* : Set to True by default. Set to False in the case of limited processing power.
*overwrite* : Set to True by default. This relates to all input data (i.e. .poly files, .osm.pbf files and shapefiles).
*tertiary* : Set to False by default. When set to True, the calculation of the **RAI** will run a second time, now including tertiary roads.
Returns:
An Excel with file the **total rural population**, the **total rural population within 2km of the selected roads** and the **RAI** for each country. if *tertiary* is set to True, the Excel file will return an additional sheet with the results of that second calculation.
"""
print ('The calculation of road lenghts has started!')
start = time.time()
# =============================================================================
# """ Set path to dirs"""
# =============================================================================
dir_out = os.path.join(base_path,'output_data')
poly_dir = os.path.join(base_path,'poly_files')
osm_path_in = os.path.join(base_path,'osm_continent')
# =============================================================================
# """ create directories if they are not created yet """
# =============================================================================
if not os.path.exists(dir_out):
os.makedirs(dir_out)
if not os.path.exists(poly_dir):
os.makedirs(poly_dir)
if not os.path.exists(osm_path_in):
os.makedirs(osm_path_in)
# =============================================================================
# """Set path to files we use """
# =============================================================================
wb_country_in = os.path.join(base_path,'input_data','wbccodes2014.csv')
global_shape = os.path.join(base_path,'input_data','2015_GAUL_Dataset_Mod.gdb')
# =============================================================================
# """Load country shapes and list and only save the required countries"""
# =============================================================================
wb_country = pd.read_csv(wb_country_in,header=0,index_col=0)
#filter high income countries from country file
country_list = wb_country[['country','continent']].loc[wb_country['wbregion']!='YHI']
# add column to country list so we can easily look up the required continental
# osm file for that continent
map_continent = {'MA': 'central-america','SA': 'south-america','EU': 'europe','AS': 'asia',
'AU': 'australia-oceania','AF':'africa','AM':'north-america'}
country_list['osm-cont'] = country_list['continent'].map(lambda x: (map_continent[x]))
# =============================================================================
# """ create .poly files to clip countries from osm.pbf files """
# =============================================================================
if not os.listdir(poly_dir):
create_poly_files(base_path,global_shape,save_shapefile=overwrite)
# =============================================================================
# """ check if we have actually downloaded the openstreetmap input files. If not,
# lets download them. Note: this will take a while! """
# =============================================================================
continent_list = ['central-america','south-america','europe','asia','australia-oceania','africa','north-america']
for continent in continent_list:
url = 'http://download.geofabrik.de/%s-latest.osm.pbf' % continent
if '%s-latest.osm.pbf' % (continent) not in os.listdir(osm_path_in):
urllib.request.urlretrieve(url, osm_path_in)
# =============================================================================
# """ create extracted osm files for each country per continent """
# =============================================================================
out = []
countries = []
continent_osms = []
base_paths = []
overwrites = []
tertaries_true = []
tertaries_false = []
for country in country_list.iterrows():
country = country[1]
continent_osm = os.path.join(osm_path_in,'%s-latest.osm.pbf' % (country['osm-cont']))
countries.append(country['country'])
continent_osms.append(continent_osm)
base_paths.append(base_path)
overwrites.append(overwrite)
tertaries_true.append(True)
tertaries_false.append(False)
# multiprocessing will start if set to True. Set to False with limited processing capacities
if multiprocess==True:
pool = Pool(cpu_count()-1)
out = pool.starmap(single_country, zip(countries,continent_osms,base_paths,overwrites,tertaries_false))
# when multiprocessing set to False, we will just loop over the countries.
else:
out = []
i = 0
for country in country_list.iterrows():
country = country[1]
continent_osm = os.path.join(osm_path_in,'%s-latest.osm.pbf' % (country['osm-cont']))
out.append(single_country(country['country'],continent_osm,base_path,overwrites[i],tertaries_false[i]))
i += 1
map_dict = {0: 'Pop Rural Total', 1: 'Pop Rural < 2km', 2: 'RAI'}
df = pd.concat([pd.DataFrame(l) for l in out],axis=1)
df.index = df.index.map(mapper=(lambda x: map_dict[x]))
map_country = dict(zip(wb_country['country'],wb_country['country_name']))
df = df.T
df['Country'] = df.index.to_series().map(map_country)
df.set_index('Country',inplace=True,drop=True)
writer = pd.ExcelWriter(os.path.join(dir_out,'RAI.xlsx'))
df.to_excel(writer,'RAI_PS')
if tertiary ==True:
if multiprocess==True:
pool = Pool(cpu_count()-1)
out = pool.starmap(single_country, zip(countries,continent_osms,base_paths,overwrites,tertaries_false))
# when multiprocessing set to False, we will just loop over the countries.
else:
out = []
i = 0
for country in country_list.iterrows():
country = country[1]
continent_osm = os.path.join(osm_path_in,'%s-latest.osm.pbf' % (country['osm-cont']))
out.append(single_country(country['country'],continent_osm,base_path,overwrites[i],tertaries_false[i]))
i += 1
df = pd.concat([pd.DataFrame(l) for l in out],axis=1)
df.index = df.index.map(mapper=(lambda x: map_dict[x]))
map_country = dict(zip(wb_country['country'],wb_country['country_name']))
df = df.T
df['Country'] = df.index.to_series().map(map_country)
df.set_index('Country',inplace=True,drop=True)
df.to_excel(writer,'RAI_PST')
writer.save()
end = time.time()
print('It took ' + str(np.float16((end - start))) + " seconds to finish!")