from IPython.core.display import HTML
from IPython.display import Image
Image(filename = '../data/zambia-travel.ngsversion.1503413472260.adapt.1900.1.jpg')
Photo credit: National Geographic Travel Guide
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="View Code"></form>''')
import pandas as pd
import numpy as np
from plotly import tools
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
# Here I'll load main dataset with Green House Gas emissions...
df = pd.read_excel('../data/CW_CAIT_GHG_Emissions_31102017.xlsx',
sheet_name='GHG Emissions')
df.columns = list(df.iloc[0].values)
df = df.drop('Country',level=0)
df_= df.reset_index()
The data contains year (1990-2014), countries, and emission contributing sectors in megatons of C02e. The data has some nan values in it where no data collected for the year, but otherwise clean.
df_.head(3)
# Adding the continents to the dataset for further analysis by stripping from another dataset...
# extract continents for each country in target dataset
df3 = pd.read_csv('../data/gapminderDataFiveYear.csv')[['country', 'continent']].drop_duplicates()
df_select = df_.rename(columns={'level_0':'country', 'level_1':'year'})
col_lst_select = ['country','year','Energy (MtCO2e)', 'Industrial Processes (MtCO2e)',
'Agriculture (MtCO2e)', 'Waste (MtCO2e)',
'Land-Use Change and Forestry (MtCO2)', 'Other Fuel Combustion (MtCO2e)',
'Fugitive Emissions (MtCO2e)']
# I want to clean up the column headers a bit for visual presentation and remove totals as we won't be needing those.
# merge continents with target dataset and clean up columns
df_all = df_select.loc[:,col_lst_select].merge(df3).set_index('continent').reset_index().rename(columns={'continent':'Continent','country':'Country','year':'Year'})
df_all.columns = list(df_all.columns.str.replace(' \(MtCO2e\)','').str.replace(' \(MtCO2\)', ''))
# I'll save this merged dataset for further analysis.
# save target dataset to csv
df_all.to_csv('ghg.csv',index=False)
Now to drill down on GHGs related to Zambia, and comparing to its neighbors.
lst = ['Malawi', 'Zambia', 'Mozambique']
def select_countries(df,lst):
df = df[df['Country'].isin(lst)]
return df
df_select = select_countries(df_all,lst)
df_select_piv = df_select.pivot(index='Year', columns='Country', values= df_select.columns[3:])
def plot_country(country):
data = []
for i in list(df_select_piv.columns.get_level_values(0).unique()):
data.append(go.Bar(
x=df_select_piv.index,
y=df_select_piv[i][country],
name=i
))
layout = go.Layout(
barmode='stack',
title=country+' GHG Emissions by Sector'
)
fig = go.Figure(data=data, layout=layout)
return fig
country='Zambia'
fig1 = plot_country(country)
iplot(fig1, filename='pandas-bar-chart-layout '+country);
Zambia emitted 120 million metric tons (MtCO2e) of greenhouse gases in 2011. The land-use change and forestry sector contributed 61 percent to overall emissions, followed by the energy sector (19%), agriculture sector (17%), waste sector (2%) and industrial processes sector (1%). The large share to land-use change and forestry is notable, and requires considering further splitting of this category for monitoring and policy purposes.1 Is this the case across all countries for this category? How does Zambia compare to its neighbors in this regard?
country='Mozambique'
fig1 = plot_country(country)
iplot(fig1, filename='pandas-bar-chart-layout '+country);
Over half of emissions in Mozambique come from 'Land-Use Change and Forestry'. This category is potentially crowding out other factors. However, this does not appear relevant for Malawi.
country='Malawi'
fig1 = plot_country(country)
iplot(fig1, filename='pandas-bar-chart-layout '+country);
In addition to largest contributing sectors, how does Zambia fair with its neighbors if we look at emissions over time?
# get total ghgs for 3 countries
df1 = df.reset_index()
df1 = df1[df1['level_0'].isin(['Malawi', 'Zambia', 'Mozambique']) ]
df1 = pd.DataFrame(df1.iloc[:,0:3])
df1.columns = ['Country', 'year', 'Emission Totals']
df1 = df1.pivot(index='year', columns='Country', values='Emission Totals').reset_index()
Let's get population values from World Bank Data.
def merge_pop_values(lst):
for i,j in enumerate(lst):
country=i
file = 'pop.csv'
df_pop = pd.read_csv('../data/'+ file)
vals=df_pop[df_pop['Country Name']==j]
values_=df_pop.loc[i].values[4:]
len_=len(values_)
columns_=[x+1990 for x in list(range(len_))]
df1[j+'_pop'] = values_[:25]
return df1
df_m=merge_pop_values(lst)
data = [go.Scatter(x=df_m.year, y=df_m[i], name=i) for i in list(df_m.columns)[1:4]]
layout = go.Layout(yaxis=dict(title='Total MtCO2e'), showlegend=True, title='Total GHG Emissions Excluding Land-Use Change and Forestry MtCO2e')
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='non-show-legend.html');
Greenhouse gas emissions increased 3 percent from 1990 - 2011, while the gross domestic product grew from US \$5.2 billion to US $13.4 billion in the same time period. This suggests that the economy became less carbon intensive during this time frame.2 Yet, globally, in 2014, Zambia was the second largest emitter of greenhouse gases due to land-use change and forestry.
Zambia is in the top 5 GHG emitting countries by Land-use Change and Forestry category.
df_all[df_all.Year==2014].sort_values(by='Land-Use Change and Forestry', ascending=False).head(5)
land_use_2014 = df_all[df_all.Year==2014]['Land-Use Change and Forestry']
data = [go.Histogram(x=land_use_2014, xbins=dict(size=50))]
layout = go.Layout(
yaxis=dict(title='Number of Countries'),
xaxis=dict(title='MtCO2e'),
title='GHG Emissions by Land-Use Change and Forestry (2014)',
showlegend=False,
annotations=[
dict(
x=328,
y=2,
xref='x',
yref='y',
text='Zambia',
showarrow=True,
arrowhead=7,
ax=0,
ay=-260
),
dict(
x=1668,
y=1.8,
xref='x',
yref='y',
text='Indonesia',
showarrow=True,
arrowhead=7,
ax=0,
ay=-200
)
]
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
Categorized emissions are important, but what about total emissions on a per capita basis? Say as compared with neighbors?
for i in lst:
df_m[i+'_MtCO2e_pc'] = df_m[i] / pd.to_numeric(df_m[i+'_pop'])
df_s=df_m.set_index('year').iloc[:,6:]
data = [go.Scatter(x=df_s.index, y=df_s[i], name=i) for i in list(df_s.columns)]
layout = go.Layout(yaxis=dict(title='Total MtCO2e'), showlegend=True, title='Total Per Capita GHG Emissions Excluding Land-Use Change and Forestry MtCO2e')
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='show-legend')
Zambia is not far off Mozambique on a per capita basis, where the two countries appear to have flat growth in emissions. However if we include land-use change and forestry, Zambia appears to be an outlier compared to its neighbors and globally