"""
@author: Elco Koks
@date: Jan, 2018
"""
import os
import urllib.request
from multiprocess import Pool , cpu_count
import time
import pandas as pd
from functions import create_poly_files,get_country,create_figure
import numpy as np
[docs]def single_country(country,continent_osm,base_path,overwrite=True,savefig=False,report=False):
"""
Function to estimate the road length for each road segment in a country.
Args:
*country* : The country for which we calculate the RAI.
*continent_osm* : The continent the country 'belongs' to. This is required for the osm extraction.
*base_path* : Base path to location of all files.
Returns:
The distance per road type (**Primary**, **Secondary**, **Tertiary**, **Track**, **Other**)
for the specified country as pandas Series.
"""
try:
print('%s started!' % country)
# =============================================================================
# Load country road data
# =============================================================================
try:
load_country = get_country(country,continent_osm,base_path,overwrite=False)
except:
load_country = get_country(country,continent_osm,base_path,overwrite=True)
# =============================================================================
# Groupby aggregated road classifcation
# =============================================================================
dist_per_roadtype = load_country.groupby('roads').sum()
dist_per_roadtype.columns = [country]
# =============================================================================
# Create and save a plot of the road network
# =============================================================================
if savefig==True:
create_figure(country,load_country,base_path)
# =============================================================================
# Return pandas Series with total road length in kilometers per road type
# =============================================================================
return dist_per_roadtype
except Exception as e: print(str(e)+' for %s' % country)
[docs]def all_countries(base_path,multiprocess=True,overwrite=True,savefig=False,report=False):
"""
Main function to estimate the length of all the roads and countries we are interested in.
Args:
*base_path* : Base path to the location of all files and directories in this project.
*multiprocess* : Set to True by default. Set to False in the case of limited processing power.
*overwrite* : Set to True by default. This relates to all input data (i.e. .poly files, .osm.pbf files and shapefiles).
*savefig* : Set to False by default. When set to True, it will return a figure with the roads of a country.
Returns:
An Excel file with the length of all **Primary**, **Secondary**, **Tertiary**, **Track** and **Other** roads for each country.
"""
print ('The calculation of road lenghts has started!')
start = time.time()
# =============================================================================
# """ Set path to dirs"""
# =============================================================================
dir_out = os.path.join(base_path,'output_data')
poly_dir = os.path.join(base_path,'poly_files')
osm_path_in = os.path.join(base_path,'osm_continent')
fig_dir = os.path.join(base_path,'Figures')
# =============================================================================
# """ create directories if they are not created yet """
# =============================================================================
if not os.path.exists(dir_out):
os.makedirs(dir_out)
if not os.path.exists(poly_dir):
os.makedirs(poly_dir)
if not os.path.exists(osm_path_in):
os.makedirs(osm_path_in)
if (savefig == True) and not os.path.exists(fig_dir):
os.makedirs(fig_dir)
# =============================================================================
# """Set path to files we use """
# =============================================================================
wb_country_in = os.path.join(base_path,'input_data','wbccodes2014.csv')
global_shape = os.path.join(base_path,'input_data','2015_GAUL_Dataset_Mod.gdb')
# =============================================================================
# """Load country shapes and list and only save the required countries"""
# =============================================================================
wb_country = pd.read_csv(wb_country_in,header=0,index_col=0)
#filter high income countries from country file
country_list = wb_country[['country','continent']].loc[wb_country['wbregion']!='YHI']
# add column to country list so we can easily look up the required continental
# osm file for that continent
map_continent = {'MA': 'central-america','SA': 'south-america','EU': 'europe','AS': 'asia',
'AU': 'australia-oceania','AF':'africa','AM':'north-america'}
country_list['osm-cont'] = country_list['continent'].map(lambda x: (map_continent[x]))
# =============================================================================
# """ create .poly files to clip countries from osm.pbf files """
# =============================================================================
if not os.listdir(poly_dir):
create_poly_files(base_path,global_shape,save_shapefile=overwrite)
# =============================================================================
# """ check if we have actually downloaded the openstreetmap input files. If not,
# lets download them. Note: this will take a while! """
# =============================================================================
continent_list = ['central-america','south-america','europe','asia','australia-oceania','africa','north-america']
for continent in continent_list:
url = 'http://download.geofabrik.de/%s-latest.osm.pbf' % continent
if '%s-latest.osm.pbf' % (continent) not in os.listdir(osm_path_in):
urllib.request.urlretrieve(url, osm_path_in)
# =============================================================================
# """ create extracted osm files for each country per continent """
# =============================================================================
out = []
countries = []
continent_osms = []
base_paths = []
overwrites = []
savefigs = []
reporting = []
for country in country_list.iterrows():
country = country[1]
continent_osm = os.path.join(osm_path_in,'%s-latest.osm.pbf' % (country['osm-cont']))
countries.append(country['country'])
continent_osms.append(continent_osm)
base_paths.append(base_path)
overwrites.append(overwrite)
savefigs.append(savefig)
reporting.append(report)
# multiprocessing will start if set to True. Set to False with limited processing capacities
if multiprocess==True:
pool = Pool(cpu_count()-1)
out = pool.starmap(single_country, zip(countries,continent_osms,base_paths,overwrites,savefigs,reporting))
# when multiprocessing set to False, we will just loop over the countries.
else:
out = []
i = 0
for country in country_list.iterrows():
country = country[1]
continent_osm = os.path.join(osm_path_in,'%s-latest.osm.pbf' % (country['osm-cont']))
out.append(single_country(country['country'],continent_osm,base_path,overwrites[i],savefigs[i],reporting[i]))
i += 1
df = pd.concat(out,axis=1).T
map_country = dict(zip(wb_country['country'],wb_country['country_name']))
df['Country'] = df.index.to_series().map(map_country)
df.set_index('Country',inplace=True,drop=True)
writer = pd.ExcelWriter(os.path.join(dir_out,'dist_roads.xlsx'))
df.to_excel(writer,'output')
writer.save()
end = time.time()
print('It took ' + str(np.float16((end - start))) + " seconds to finish!")