## Importing Libraries
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from IPython.display import display
import ipywidgets as widgets
import pandas as pd
import numpy as np
import matplotlib as mpl # optional (here)
import matplotlib.pyplot as plt
import seaborn # Optional, will only affect the color of bars and the grid
from ipywidgets import widgets, interactive
from ipywidgets import interact, Checkbox, FloatRangeSlider, VBox, HBox
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.io as pio
import plotly.io as pio
pio.renderers.default = 'notebook_connected'
# In your Jupyter Notebook, run the following command to get the required javascript files:
pio.templates.default = "plotly"
pio.templates[pio.templates.default].layout.update({
'autosize': True,
'height': 550,
'width': 1000,
})
Layout({
'annotationdefaults': {'arrowcolor': '#2a3f5f', 'arrowhead': 0, 'arrowwidth': 1},
'autosize': True,
'autotypenumbers': 'strict',
'coloraxis': {'colorbar': {'outlinewidth': 0, 'ticks': ''}},
'colorscale': {'diverging': [[0, '#8e0152'], [0.1, '#c51b7d'], [0.2,
'#de77ae'], [0.3, '#f1b6da'], [0.4, '#fde0ef'],
[0.5, '#f7f7f7'], [0.6, '#e6f5d0'], [0.7,
'#b8e186'], [0.8, '#7fbc41'], [0.9, '#4d9221'],
[1, '#276419']],
'sequential': [[0.0, '#0d0887'], [0.1111111111111111,
'#46039f'], [0.2222222222222222, '#7201a8'],
[0.3333333333333333, '#9c179e'],
[0.4444444444444444, '#bd3786'],
[0.5555555555555556, '#d8576b'],
[0.6666666666666666, '#ed7953'],
[0.7777777777777778, '#fb9f3a'],
[0.8888888888888888, '#fdca26'], [1.0,
'#f0f921']],
'sequentialminus': [[0.0, '#0d0887'], [0.1111111111111111,
'#46039f'], [0.2222222222222222, '#7201a8'],
[0.3333333333333333, '#9c179e'],
[0.4444444444444444, '#bd3786'],
[0.5555555555555556, '#d8576b'],
[0.6666666666666666, '#ed7953'],
[0.7777777777777778, '#fb9f3a'],
[0.8888888888888888, '#fdca26'], [1.0,
'#f0f921']]},
'colorway': [#636efa, #EF553B, #00cc96, #ab63fa, #FFA15A, #19d3f3, #FF6692,
#B6E880, #FF97FF, #FECB52],
'font': {'color': '#2a3f5f'},
'geo': {'bgcolor': 'white',
'lakecolor': 'white',
'landcolor': '#E5ECF6',
'showlakes': True,
'showland': True,
'subunitcolor': 'white'},
'height': 550,
'hoverlabel': {'align': 'left'},
'hovermode': 'closest',
'mapbox': {'style': 'light'},
'paper_bgcolor': 'white',
'plot_bgcolor': '#E5ECF6',
'polar': {'angularaxis': {'gridcolor': 'white', 'linecolor': 'white', 'ticks': ''},
'bgcolor': '#E5ECF6',
'radialaxis': {'gridcolor': 'white', 'linecolor': 'white', 'ticks': ''}},
'scene': {'xaxis': {'backgroundcolor': '#E5ECF6',
'gridcolor': 'white',
'gridwidth': 2,
'linecolor': 'white',
'showbackground': True,
'ticks': '',
'zerolinecolor': 'white'},
'yaxis': {'backgroundcolor': '#E5ECF6',
'gridcolor': 'white',
'gridwidth': 2,
'linecolor': 'white',
'showbackground': True,
'ticks': '',
'zerolinecolor': 'white'},
'zaxis': {'backgroundcolor': '#E5ECF6',
'gridcolor': 'white',
'gridwidth': 2,
'linecolor': 'white',
'showbackground': True,
'ticks': '',
'zerolinecolor': 'white'}},
'shapedefaults': {'line': {'color': '#2a3f5f'}},
'ternary': {'aaxis': {'gridcolor': 'white', 'linecolor': 'white', 'ticks': ''},
'baxis': {'gridcolor': 'white', 'linecolor': 'white', 'ticks': ''},
'bgcolor': '#E5ECF6',
'caxis': {'gridcolor': 'white', 'linecolor': 'white', 'ticks': ''}},
'title': {'x': 0.05},
'width': 1000,
'xaxis': {'automargin': True,
'gridcolor': 'white',
'linecolor': 'white',
'ticks': '',
'title': {'standoff': 15},
'zerolinecolor': 'white',
'zerolinewidth': 2},
'yaxis': {'automargin': True,
'gridcolor': 'white',
'linecolor': 'white',
'ticks': '',
'title': {'standoff': 15},
'zerolinecolor': 'white',
'zerolinewidth': 2}
})
from IPython.display import HTML
HTML('''<button type="button" class="btn btn-outline-danger" onclick="codeToggle();">Toggle Code</button>''')
import warnings
warnings.filterwarnings('ignore')
We are a group of 4: Asma, Dhairya, Nokzendi, and Sukruta. We have worked on the world inequality dataset and the global poverty dataset, and tried to use a lot of metrics (some of which are defined below) to analyse the world inequality and its various nuances.
The Gini index is a statistical measure of inequality in income distribution within a population. It is calculated by plotting the cumulative share of income against the cumulative share of the population. High Gini indices suggest a larger gap between the rich and the poor, while low Gini indices suggest a more equal distribution of wealth. The Gini index is commonly used by policymakers and analysts to measure inequality and to identify areas where policy interventions may be needed to address income disparities.
Atkinson Coefficient: A measure of income inequality that takes into account how society values income redistribution. The coefficient ranges from 0 (perfect equality) to 1 (perfect inequality), with higher values indicating greater inequality.
Percentile Ratio (90/10): The ratio of the 90th percentile of income to the 10th percentile of income. This is a measure of the difference in income between the wealthiest and poorest members of society.
Relative Poverty Rates Total Population 60%: The percentage of the population living in households with an equivalized disposable income below 60% of the national median equivalized disposable income. This is a measure of the proportion of the population living in poverty.
Children Poverty Rates Two Parent Families 50%: The percentage of children living in households with two parents and an equivalized disposable income below 50% of the national median equivalized disposable income. This is a measure of child poverty rates in households with two parents.
Children Poverty Rates Single Mother Families 50%: The percentage of children living in households headed by a single mother and an equivalized disposable income below 50% of the national median equivalized disposable income. This is a measure of child poverty rates in single-parent households.
% Children Living in Single Mother Families: The percentage of children living in households headed by a single mother. This is a measure of family structure and can impact child poverty rates.
Median Equivalized Income: The income at which half of the population has a higher income and half has a lower income, adjusted for household size and composition.
Mean Equivalized Income: The total income of the population divided by the total number of people, adjusted for household size and composition. This is a measure of the average income in a population.
## Data Preprocessing
poverty_data = pd.read_csv('poverty.csv')
loc = pd.read_csv('ISO.csv')
# Drop unnecessary columns from the ISO dataset
loc = loc.drop(['Alpha-2 code', 'ISO 3166-2', 'Numeric code'], axis=1)
# Renaming country column to help with merging later
loc = loc.rename(columns={'English short name lower case': 'country'})
# Renaming country column to help with merging later
poverty_data = poverty_data.rename(columns={'region': 'country', 'atk5': 'Atkinson_Coefficient_(epsilon=0.5)', 'd9010': 'Percentile_Ratio_(90/10)',
'poorAll6': 'Relative_Poverty_Rates_Total_Population_60%',
'poortp': 'Children_Poverty_Rates_Two_Parent_Families_50%',
'poorsm': 'Children_Poverty_Rates_Single_Mother_Families_50%',
'pkidsm': '%_Children_Living_in_Single_Mother_Families', 'eymed': 'Median_Equivalized_Income',
'average': 'Mean_Equivalized_Income'})
# Selecting only the columns we want to keep
poverty_data = poverty_data.loc[:, ['country', 'year', 'gini', 'Atkinson_Coefficient_(epsilon=0.5)', 'Percentile_Ratio_(90/10)',
'Relative_Poverty_Rates_Total_Population_60%', 'Children_Poverty_Rates_Two_Parent_Families_50%',
'Children_Poverty_Rates_Single_Mother_Families_50%', '%_Children_Living_in_Single_Mother_Families',
'Median_Equivalized_Income', 'Mean_Equivalized_Income']]
# Adding some corrections in the names of countries
corrections = {'Czech Rebuplic': 'Czech Republic', 'Cote D Ivoire': "Côte d'Ivoire", 'Palestine': 'Palestinian Territory, Occupied',
'South Korea': 'Korea, Republic of (South Korea)', 'United States': 'United States Of America'}
poverty_data['country'] = poverty_data['country'].replace(corrections)
# Merge the two dataframes
data = poverty_data.merge(loc, on='country')
data.to_csv('data.csv', index = False)
import plotly.graph_objs as go
import pandas as pd
# Read in the data
data = pd.read_csv('data.csv')
f_data = data.dropna()
# Group the data by year
grouped_data = f_data.groupby('year')
# Find the row with the highest Gini coefficient for each year
highest_gini_by_year = grouped_data.apply(lambda x: x.loc[x['gini'].idxmax()])
# Filter the data for every 4th year
filtered_data = highest_gini_by_year[highest_gini_by_year['year'] % 4 == 0]
# Create a scatter plot
scatter = go.Scatter(x=filtered_data['year'], y=filtered_data['gini'], mode='markers+text',
text=filtered_data['country'], textposition='bottom center')
fig.add_trace(scatter)
# Set the title and axis labels
layout = go.Layout(title='Highest Gini Coefficients by Year (in gaps of 4 years)',
xaxis_title='Year', yaxis_title='Gini Coefficient')
fig = go.Figure(data = scatter, layout = layout)
iplot(fig)
The United States had consistently high Gini index values from the 1970s through the 1990s. The country had the highest Gini index in 1979, 1980, and every year from 1982 through 1989. This is consistent with historical facts, as the 1980s saw a rise in income inequality in the United States due to factors such as tax cuts, deregulation, and globalization.
Chile and Paraguay also appear to have high Gini index values, with Chile having the highest value in 1990, 1992, 1994, 1996, 1998, and 2002, and Paraguay having the highest value in 1997, 1999, 2000, 2003, 2011, 2014, and 2016. These countries have both experienced political instability and economic crises that have contributed to high levels of income inequality.
South Africa also had consistently high Gini index values from the late 2000s through the 2010s. The country had the highest Gini index in 2008, 2010, 2012, 2015, and 2017. South Africa has a history of racial inequality, with the legacy of apartheid still affecting the country's economic and social structures.
These insights are supported by historical facts and events in each of these countries. For example, the United States experienced a decline in labor unions, the rise of the finance industry, and globalization, which contributed to rising income inequality in the 1980s. Chile experienced a military dictatorship in the 1970s and 1980s, which led to policies that favored the wealthy and led to an increase in income inequality. Similarly, Paraguay experienced political instability and economic crises that have contributed to high levels of income inequality. South Africa's history of apartheid has created economic disparities that continue to affect the country today.
## Plotting gini index over time
map_data = data[data.year>1970][["country", "year", "gini", "Alpha-3 code", "Atkinson_Coefficient_(epsilon=0.5)", "Percentile_Ratio_(90/10)",
"Relative_Poverty_Rates_Total_Population_60%", "Children_Poverty_Rates_Two_Parent_Families_50%",
"Children_Poverty_Rates_Single_Mother_Families_50%", "%_Children_Living_in_Single_Mother_Families",
"Median_Equivalized_Income", "Mean_Equivalized_Income"]].dropna()
# Minimum and max vaue of year
min_year = map_data.year.min()
max_year = map_data.year.max()
# Fill in missing years for all countries with Alpha-3 code as the same and all other values as 0
for year in range(min_year, max_year+1):
for country in map_data.country.unique():
if year not in map_data[map_data.country == country].year.values:
map_data = map_data.append({'country': country, 'year': year, 'gini': 0, "Atkinson_Coefficient_(epsilon=0.5)": 0,
"Percentile_Ratio_(90/10)": 0, "Relative_Poverty_Rates_Total_Population_60%": 0,
"Children_Poverty_Rates_Two_Parent_Families_50%": 0, "Children_Poverty_Rates_Single_Mother_Families_50%": 0,
"%_Children_Living_in_Single_Mother_Families": 0, "Median_Equivalized_Income": 0,
"Mean_Equivalized_Income": 0, 'Alpha-3 code': map_data[map_data.country == country]['Alpha-3 code'].values[0]}, ignore_index=True)
# Sort the dataframe by country and year
map_data = map_data.sort_values(["country", "year"]).reset_index(drop=True)
fig = px.scatter_geo(map_data, locations = 'Alpha-3 code', color = "country", hover_name = "country", size = "gini",
projection = "natural earth", animation_frame="year", template = "plotly_dark", title = "Gini index of countries over time",
hover_data=["Atkinson_Coefficient_(epsilon=0.5)", "Percentile_Ratio_(90/10)", "Relative_Poverty_Rates_Total_Population_60%",
"Children_Poverty_Rates_Two_Parent_Families_50%", "Children_Poverty_Rates_Single_Mother_Families_50%",
"%_Children_Living_in_Single_Mother_Families", "Median_Equivalized_Income", "Mean_Equivalized_Income"])
fig.show()
In our dataset we didn't have the gini values of all the countries for each and every year. That's why you can observe that the dataset is so sparse. We have made this world map such that on increasing years, the bubbles corresponding to each country changes it's size depending on the gini index.
If you hover on the bubbles you can also analyze the other metrics which help us analyze the income inequalities.
## Timeline of Gini Index of Countries
# List of countries to be plotted
countries = ['United States of America', 'United Kingdom', 'Austria', 'Russia', 'China', 'Germany', 'Australia', 'India', 'South Africa']
# Plotting the graph
fig = px.line(title="Gini Index of Countries over time", template="plotly_dark")
for country in countries:
fig_data = data[(data.country == country) & (data.year >= 1990)][['year', 'gini']]
fig.add_trace(go.Scatter(x=fig_data['year'], y=fig_data['gini'], name=country, mode='lines'))
fig.update_layout(xaxis_title="Year", yaxis_title="Gini Index")
fig.show()
South Africa has the highest Gini Index value among the selected countries, which can be attributed to its history of apartheid and racial inequality. Even after the end of apartheid, the legacy of racial inequality has persisted, with the majority black population facing significant barriers to economic opportunities and wealth accumulation.
Countries like Austria, Germany, and Australia have implemented policies to reduce income inequality, such as progressive taxation, minimum wage laws, and social welfare programs. These policies have contributed to a lower level of income inequality in these countries.
Russia's economic transition in the 1990s was marked by privatization of state-owned enterprises, which resulted in a concentration of wealth in the hands of a few individuals. The subsequent economic growth during the 2000s led to a decrease in the Gini Index value, but recent years have seen a reversal of this trend.
China and India have experienced a significant increase in their Gini Index values since the 1990s. This can be attributed to the rapid economic growth and industrialization in these countries, which have led to a concentration of wealth in the hands of a few individuals and income disparities between urban and rural areas.
## Plots for different poverty rates
countries = ['Japan', 'Russia', 'India']
for country in countries:
# fig2_data = data[(data.country == country) & (data.year >= 0)][['year', 'gini']]
# print(fig2_data)
fig2 = px.line(title = country, template="plotly_dark")
fig2_data = data[(data.country == country) & (data.year >= 1990)][['year', 'Relative_Poverty_Rates_Total_Population_60%',
'Children_Poverty_Rates_Two_Parent_Families_50%',
'Children_Poverty_Rates_Single_Mother_Families_50%',
'%_Children_Living_in_Single_Mother_Families']]
fig2.add_trace(go.Scatter(x=fig2_data['year'], y=fig2_data['Relative_Poverty_Rates_Total_Population_60%'], name='Relative_Poverty_Rates_Total_Population_60%', mode='lines'))
fig2.add_trace(go.Scatter(x=fig2_data['year'], y=fig2_data['Children_Poverty_Rates_Two_Parent_Families_50%'], name='Children_Poverty_Rates_Two_Parent_Families_50%', mode='lines'))
fig2.add_trace(go.Scatter(x=fig2_data['year'], y=fig2_data['Children_Poverty_Rates_Single_Mother_Families_50%'], name='Children_Poverty_Rates_Single_Mother_Families_50%', mode='lines'))
fig2.add_trace(go.Scatter(x=fig2_data['year'], y=fig2_data['%_Children_Living_in_Single_Mother_Families'], name='%_Children_Living_in_Single_Mother_Families', mode='lines'))
fig2.update_layout(xaxis_title="Year", yaxis_title="Percentage")
fig2.show()
Japan has one of the lowest poverty rates in the world, but its child poverty rate is relatively high compared to other developed countries, with single-mother households being particularly vulnerable to poverty.
After the collapse of the Soviet Union, Russia experienced a sharp increase in poverty and inequality, which have been slowly decreasing since the early 2000s, but child poverty remains a persistent problem.
Despite India's rapid economic growth, poverty and inequality remain major challenges, with child poverty being a particular concern. The country has implemented various poverty alleviation programs, such as the National Rural Employment Guarantee Act, but their effectiveness has been a subject of debate.
## Plots for different income
countries = ['Japan', 'Russia', 'India']
for country in countries:
# fig2_data = data[(data.country == country) & (data.year >= 0)][['year', 'gini']]
# print(fig2_data)
fig2 = px.line(title = country, template="plotly_dark")
fig2_data = data[(data.country == country) & (data.year >= 1990)][['year', 'Median_Equivalized_Income',
'Mean_Equivalized_Income']]
fig2.add_trace(go.Scatter(x=fig2_data['year'], y=fig2_data['Median_Equivalized_Income'], name='Median_Equivalized_Income', mode='lines'))
fig2.add_trace(go.Scatter(x=fig2_data['year'], y=fig2_data['Mean_Equivalized_Income'], name='Mean_Equivalized_Income', mode='lines'))
fig2.update_layout(xaxis_title="Year", yaxis_title="Income in country's currency")
fig2.show()
According to the World Bank, Japan has a high-income economy and is the third-largest economy in the world. Japan's economy has been characterized by a skilled workforce, advanced technology, and efficient infrastructure, contributing to its high income levels.
Russia's economy has been struggling since the collapse of the Soviet Union, with high levels of corruption and a reliance on natural resources. The country has been facing economic sanctions and political isolation, contributing to its low income levels.
India's economy has been growing rapidly in recent years, with a focus on technology, services, and manufacturing sectors. The country has been implementing economic reforms and liberalization policies, contributing to its increasing income levels. However, income inequality remains a significant challenge in the country.
All sampled distributions are Gaussian (or normal) is a common approximation used in statistics, but it is not necessarily true for all types of data. While many natural phenomena and random variables tend to follow a normal distribution, some types of data, such as income or wealth distribution, do not.
Income or wealth distribution is not Gaussian because it is heavily influenced by social and economic factors, such as income inequality and the distribution of resources. These factors create a dependency between individuals' income or wealth, which causes deviations from the normal distribution. In particular, income or wealth distribution often exhibits a skewed distribution, with a long tail on the higher end of the distribution, indicating a small number of individuals with very high income or wealth.
Through the following plots and their analysis, we are going to prove the above hypothesis.
import matplotlib.pyplot as plt
import ipywidgets
import pandas as pd
seaborn.set()
# Define the countries and their corresponding data files
countries = ["India", "Korea", "Singapore", "Switzerland"]
file_names = [f".\created_data\{c}.csv" for c in countries]
# Load the data for each country into a list of dataframes
list_df = [pd.read_csv(f) for f in file_names]
# Define the plot function for the interactive widget
def plot(value,Var):
count = 0
plt.figure(figsize=(10, 6))
plt.grid()
for df in list_df:
# Select the data for the specified percentile and variable
df1 = df[df["variable"] == Var]
dfg = df1[df1["percentile"] == value]
if(len(dfg)==0):
pass
else:
# Plot the data for the current country
li2 = list(dfg["shorttype"].unique())
df3 = dfg[dfg['shorttype']==li2[0]]
li3 = list(df3["pop"].unique())
df4 = df3[df3['pop']==li3[0]]
li4 = list(df4["pop"].unique())
df5 = df4[df4['pop']==li4[0]]
plt.plot(df4['year'], df4['value'], label = countries[count] )
plt.legend()
count += 1
# Add axis labels and a title
plt.xlabel('Year')
plt.ylabel(Var)
plt.title(str(Var))
# Display the plot
plt.show()
ipywidgets.interact(plot, value=["p0p50", "p50p90", "p90p100", "p99.9p100", "p99.99p100", "p0p100", "p37p38", "p30p31"],Var = ["Net personal wealth", "Fiscal income "])
interactive(children=(Dropdown(description='value', options=('p0p50', 'p50p90', 'p90p100', 'p99.9p100', 'p99.9…
<function __main__.plot(value, Var)>
The elephant chart is a visual representation of changes in global income distribution between the late 20th century and the early 21st century. The chart takes its name from the shape of the graph, which resembles an elephant with a raised trunk.
The x-axis of the chart shows the income percentiles of the global population, ranging from the poorest 1% to the richest 1%. The y-axis shows the change in income level for each percentile between the defined countries.
The elephant chart shows that the biggest income gains over this period were experienced by the middle class in emerging economies, particularly in China and India, and the global top 1%. However, the chart also reveals a significant "trunk" in the chart, where incomes for the poorest 10% of the global population actually declined over this period.
In order to observe this, we plotted the income percentiles for the top 10% and the bottom 50%, and got the below graphs.
If you plot for the bottom 50 percent (p0p50) then you get the below plot:
If you plot for the top 10 percent (p90p100) then you get the below plot:
We notice the same trend that is proposed in the elephant chart. The chart is shaped like an elephant, with a raised trunk at the right-hand side of the chart, indicating the significant income growth experienced by the global top 1% during this period.
The "body" of the elephant represents the income growth experienced by the middle class (particularly in a country like India). The "trunk" of the elephant represents the decline in incomes for the poorest 10% of the global population during this period.
In the next plot, we discuss the economic disparities that exist in various parts of the world, more in some than others. In order to understand these, we have used a dataset that includes:
1) MPI (Multidimensional Poverty Index) - It is a measure of poverty that takes into account multiple dimensions of poverty, including health, education, and living standards.
2) HCR (HeadCount Ratio) - The headcount ratio is a measure of poverty that represents the percentage of the population that lives below a certain poverty line.
3) Intensity of deprivation - It is a measure that is used to assess the severity of poverty among those who are classified as poor. It provides a more nuanced understanding of poverty than the headcount ratio, which only indicates the proportion of the population that is living below the poverty line. The intensity of deprivation is calculated by adding up the weights of the different dimensions of poverty in which a household is deprived, and then dividing this by the total number of dimensions in which the household is deprived. The resulting number represents the average severity of poverty experienced by the household.
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets
# Read in the shapefile of the world map
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# Read in the CSV file with data to plot
data = gpd.read_file('MPI_national.csv')
data = data.rename(columns={'ISO' :'iso_a3' })
# Join the shapefile and data on a common column
def plot2(value):
# Set missing values to NaN
merged = world.merge(data, on='iso_a3', how='left')
merged[value] = merged[value].astype(float)
merged.loc[merged[value].isnull(), value] = np.nan
# Plot the map, coloring by the desired column
fig, ax = plt.subplots(figsize=(12, 6))
merged = gpd.GeoDataFrame(merged)
merged = merged.set_geometry('geometry_x')
merged.plot(column=value, cmap='YlOrRd', legend=True, ax=ax)
ax.set_title('Map colored by column')
# Add a colorbar
sm = plt.cm.ScalarMappable(cmap='YlOrRd', norm=plt.Normalize(vmin=merged[value].min(), vmax=merged[value].max()))
# cbar = plt.colorbar(sm)
#Show the plot
plt.show()
va = ['MPI Urban','Headcount Ratio Urban','Intensity of Deprivation Urban','MPI Rural','Headcount Ratio Rural','Intensity of Deprivation Rural']
ipywidgets.interact(plot2, value=va )
interactive(children=(Dropdown(description='value', options=('MPI Urban', 'Headcount Ratio Urban', 'Intensity …
<function __main__.plot2(value)>
The plot shows that Africa has some of the highest Multidimensional Poverty Index (MPI) scores in the world. 28 out of the 38 countries with the highest MPI scores are located in sub-Saharan Africa. There are several reasons why poverty is so prevalent in Africa. One key factor is the continent's history of colonization, which led to the extraction of resources and the exploitation of local populations. This has left many African countries with weak economies, limited access to education and healthcare, and high levels of inequality.
In addition, many African countries are characterized by political instability, conflict, and corruption, which have further hindered economic growth and development. These challenges are compounded by environmental factors such as climate change, which disproportionately affects vulnerable communities in Africa.
Similar conclusions are obtained for the HCR and Intensity of deprivation for these countries.
This next plot includes a huge variety of parameter on the basis of which we can compare inequalities that exist between countries. In order to optimize the large database, we refer to a first-world country, a second-world country and a third-world country, and compare these to understand the discrepancies in resources available to them.
To compare between countries, we plot graphs for three countries: India, Thailand, and Singapore below.
import ipywidgets
file_name = ".\created_data\India.csv"
df = pd.read_csv(file_name)
li = list(df["variable"].unique())
def plot(X_values, year):
df2 = df[df['variable']== X_values ]
li2 = list(df2["shorttype"].unique())
def plot_y(y):
df3 = df2[df2['shorttype']==y]
li3 = list(df2["percentile"].unique())
for z in li3:
df4 = df3[df3['percentile']==z]
li4 = list(df4["age"].unique())
for r in li4:
df5 = df4[df4['age']==r]
li5 = list(df4["pop"].unique())
plt.grid()
for u in li5:
plt.plot(df5['year'], df5['value'], label = r)
plt.scatter(year, df5[df5['year']==year]['value'].iloc[0])
plt.legend
# Add axis labels and a title
plt.xlabel('Year')
plt.ylabel(X_values)
plt.title('India')
# Display the plot
plt.show()
ipywidgets.interact(plot_y, y=li2)
ipywidgets.interact(plot, X_values = li, year = (1990,2020,1))
interactive(children=(Dropdown(description='X_values', options=('Personal carbon footprint (investments only)…
<function __main__.plot(X_values, year)>
import ipywidgets
file_name = ".\created_data\Thailand.csv"
df = pd.read_csv(file_name)
li = list(df["variable"].unique())
def plot(X_values, year):
df2 = df[df['variable']== X_values ]
li2 = list(df2["shorttype"].unique())
def plot_y(y):
df3 = df2[df2['shorttype']==y]
li3 = list(df2["percentile"].unique())
for z in li3:
df4 = df3[df3['percentile']==z]
li4 = list(df4["age"].unique())
for r in li4:
df5 = df4[df4['age']==r]
li5 = list(df4["pop"].unique())
plt.grid()
for u in li5:
plt.plot(df5['year'], df5['value'], label = r)
plt.scatter(year, df5[df5['year']==year]['value'].iloc[0])
plt.legend
# Add axis labels and a title
plt.xlabel('Year')
plt.ylabel(X_values)
plt.title('Thailand')
# Display the plot
plt.show()
ipywidgets.interact(plot_y, y=li2)
ipywidgets.interact(plot, X_values = li, year = (1990,2020,1))
interactive(children=(Dropdown(description='X_values', options=('Personal carbon footprint (investments only)…
<function __main__.plot(X_values, year)>
import ipywidgets
file_name = ".\created_data\Singapore.csv"
df = pd.read_csv(file_name)
li = list(df["variable"].unique())
def plot(X_values, year):
df2 = df[df['variable']== X_values ]
li2 = list(df2["shorttype"].unique())
def plot_y(y):
df3 = df2[df2['shorttype']==y]
li3 = list(df2["percentile"].unique())
for z in li3:
df4 = df3[df3['percentile']==z]
li4 = list(df4["age"].unique())
for r in li4:
df5 = df4[df4['age']==r]
li5 = list(df4["pop"].unique())
plt.grid()
for u in li5:
plt.plot(df5['year'], df5['value'], label = r)
plt.scatter(year, df5[df5['year']==year]['value'].iloc[0])
plt.legend
# Add axis labels and a title
plt.xlabel('Year')
plt.ylabel(X_values)
plt.title('Singapore')
# Display the plot
plt.show()
ipywidgets.interact(plot_y, y=li2)
ipywidgets.interact(plot, X_values = li, year = (1990,2020,1))
interactive(children=(Dropdown(description='X_values', options=('Household direct emissions ', 'Personal carb…
<function __main__.plot(X_values, year)>
Selected countries: India (Third-world country), Thailand (Second-world country), Singapore (First-world country)
We chose and compared these countries through following parameters:
1) Education The level of education is significantly higher in Thailand than India (more than double), and even more so in Singapore. This is because first world countries typically have well-funded education systems, highly trained teachers, and access to advanced technology and resources. These countries also tend to place a high value on education and prioritize it as a key component of their economic and social development. In contrast, many third world countries face significant challenges such as limited funding, inadequate infrastructure, a shortage of qualified teachers, and cultural and social barriers that can limit access to education for certain groups, particularly girls and women.
2) Market Exchange Rate, LCD per USD We observe that the currency value of the third world country (India) is much lower than that of the first world country. This is because the value of a country's currency is largely determined by factors such as its economic strength, political stability, and level of international trade. First world countries tend to have stronger economies with more developed infrastructure, higher levels of education and technology, and greater access to resources and capital. These factors all contribute to a higher demand for their currency and a stronger exchange rate. In contrast, third world countries often have weaker economies with less developed infrastructure, lower levels of education and technology, and limited access to resources and capital. These factors can contribute to a lower demand for their currency and a weaker exchange rate.
3) National CO2 Footprint The CO2 footprint in Singapore has largely decreased over the last decade, while India and even Thailand have increased CO2 levels. The per capita carbon dioxide (CO2) footprint is lower in first world countries than in third world countries. This is largely due to differences in levels of economic development and access to modern energy sources. First world countries typically have higher levels of economic development and greater access to modern energy sources such as electricity, natural gas, and renewable energy. This allows them to implement more efficient technologies and infrastructure, which can reduce their carbon footprint. In contrast, many third world countries still rely heavily on traditional sources of energy such as biomass and coal, which can be highly inefficient and generate significant amounts of CO2. Additionally, many third world countries have limited access to modern infrastructure and transportation systems, which can contribute to higher carbon emissions.
We discussed numerous kinds of inequalities until now, of which, inequalities like income inequality have gotten worse over the years in many countries.
Another inequality that exists almost everywhere and is in dire need for some action and improvement is gender inequality. This comes in various forms, including unequal access to education, employment, healthcare, and political representation, as well as discriminatory laws and cultural attitudes.
Over the years, there have been significant improvements in some areas of gender equality, but progress has been uneven and challenges remain.
Below, we have plotted the gender inequality index for a few countries.
import matplotlib.pyplot as plt
import ipywidgets
import pandas as pd
countries = [" India", " Japan", " korea", " United Kingdom", " United states", " Canada", " Iraq"]
def plot(value, year):
year_vals = []
path = value + ".csv"
df = pd.read_csv(path)
for c in countries:
df1 = df[df["Country"] == c]
n = year - 1993
m =len(df1.columns)
if n<m:
val = df1.iloc[:, n]
if(len(val)==0):
val = 0
else:
val = val.iloc[0]
else:
val = 0
year_vals.append(val)
plt.figure(figsize=(10, 6)) # set the size of the plot
plt.bar(countries, year_vals, color='skyblue') # set the color of the bars
plt.title(value + " in " + str(year)) # set the title of the plot
plt.xlabel("Countries") # set the x-axis label
plt.ylabel("Values") # set the y-axis label
plt.xticks(rotation=45) # rotate the x-axis labels for better readability
plt.show()
ipywidgets.interact(plot, value=["gender_inequality_index", "gender_development_index"],year = (1998,2011,1))
interactive(children=(Dropdown(description='value', options=('gender_inequality_index', 'gender_development_in…
<function __main__.plot(value, year)>
The first thing we can clearly see is that, since decades, India and Iraq have had extremely high rates of gender discrimination. But, over the years, there has been a decrease in this inequality in India, however the same isn't observed in Iraq.
This observation is supported by the current situation in Iraq. It is a conservative country with strict cultural and religious traditions that often limit women's participation in public life. It has also experienced political instability and conflict for many years, which has further marginalized women and limited their ability to advocate for their rights. Owing to these factors, it makes sense why the gender inequality hasn't decreased and remained stagnant.
Now, when we see countries like Japan, UK, and Canada, their gender inequality coefficient was already considerably low before. The reason for this could be that all of the three are first world countries. First world countries often have strong legal frameworks that protect women's rights and promote gender equality. These legal frameworks may include anti-discrimination laws, laws against domestic violence and sexual harassment, and laws that promote women's political and economic empowerment.
These countries generally have higher levels of education for both men and women, which can help to break down gender stereotypes and promote greater gender equality. Such nations also have stronger economies and higher levels of economic prosperity, which can provide greater opportunities for women to participate in the workforce and achieve economic independence.
Overall, while first world countries are not immune to gender inequality, they generally have stronger legal frameworks, better access to education and healthcare, and more progressive cultural attitudes that help to promote greater gender equality.
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ipywidgets
seaborn.set()
def plot2(year):
# Set missing values to NaN
value = 'Proportion of seats held by women in national parliaments (%)'
# Read in the shapefile of the world map
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# Read in the CSV file with data to plot
data = gpd.read_file('Viz5_August_Female_Political_Representation.csv')
data = data.rename(columns={'Country Code' :'iso_a3' })
data['Year'] = pd.to_numeric(data['Year'], errors='coerce')
data[value] = pd.to_numeric(data[value], errors='coerce')
data2 = data[data['Year'] == year]
merged = world.merge(data2, on='iso_a3', how='left')
# merged[value] = merged[value].astype(float)
merged.loc[merged[value].isnull(), value] = np.nan
# Plot the map, coloring by the desired column
fig, ax = plt.subplots(figsize=(12, 6))
merged = gpd.GeoDataFrame(merged)
merged = merged.set_geometry('geometry_x')
merged.plot(column=value, cmap='YlOrRd', legend=True, ax=ax)
ax.set_title('Map colored by column')
# Add a colorbar
sm = plt.cm.ScalarMappable(cmap='YlOrRd', norm=plt.Normalize(vmin=merged[value].min(), vmax=merged[value].max()))
#Show the plot
plt.show()
ipywidgets.interact(plot2, year= (1997, 2019, 1))
interactive(children=(IntSlider(value=2008, description='year', max=2019, min=1997), Output()), _dom_classes=(…
<function __main__.plot2(year)>
In order to understand the above visualisation, let us consider the following countries/regions:
-> When we move the cursor to the right (observing the progress from 1997 to 2019), we see that South Africa, including many others, has made significant strides in increasing women's political representation in recent years, with women now holding a relatively high percentage of seats in the national parliament and other political bodies. This increase in women's political representation in South Africa can be attributed to a combination of progressive policies and legislation, strong civil society advocacy, quota systems, and a culture that values women's leadership.
-> On the other side, if we look at Asia which includes India, we see that the ratio is much less compared to the rest of the world. In fact, some Asian countries have had a decline in the number of women in politics. This could be because in many parts of Asia, there are deeply ingrained cultural and traditional attitudes that view women's role as primarily in the home and family, rather than in politics or public life. This can make it difficult for women to break into male-dominated political systems and gain the support they need to succeed.
Now, let us look into the portion of the female population that is actually employed. Below, we plot the gender wage gap for four countries. The gender wage gap refers to the difference in earnings between men and women in the workforce. It is typically measured as the ratio of women's average earnings to men's average earnings, expressed as a percentage.
import ipywidgets
file_name = "gender-wage-gap-oecd.csv"
df = pd.read_csv(file_name)
countries = ["Australia", "Japan", "United Kingdom", "Canada"]
def plot(co):
df2 = df[df['Entity']== co ]
plt.figure(figsize=(20, 10))
plt.plot(df2['Year'], df2['Gender wage gap (OECD 2017)'], label = co )
plt.legend
# Add axis labels and a title
plt.xlabel('Year')
plt.ylabel('Gender wage gap')
plt.title(co)
# Display the plot
plt.show()
ipywidgets.interact(plot, co = countries)
interactive(children=(Dropdown(description='co', options=('Australia', 'Japan', 'United Kingdom', 'Canada'), v…
<function __main__.plot(co)>
We see that the gender wage gap has decreased over the years. There have been several reasons this:
~ Women's access to education and training has increased over the years, leading to more opportunities for them to enter higher-paying professions and industries. ~ Many countries have implemented legislative changes aimed at reducing the gender wage gap, such as equal pay laws and anti-discrimination measures. ~ Many organizations have implemented policies and practices aimed at reducing the gender wage gap, such as pay transparency, pay equity audits, and diversity and inclusion initiatives. ~ There has been a broader cultural shift towards recognizing and valuing women's contributions in the workforce, as well as a greater emphasis on work-life balance and family-friendly policies.
While there has been progress in reducing the gender wage gap, disparities still exist and more work needs to be done to ensure that women are paid fairly for their work. This is something we work to analyze in the next section.
As we saw above that the gender wage gap in many of the countries has decreased quite a bit in the last few decades. But, a problem that still persists is the lack of women in high power positions. To see the extent of this inequality, we plot the percentage of firms with top female managers in different countries.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
seaborn.set()
# Read the CSV file into a pandas dataframe
path = 'top_female.csv'
df = pd.read_csv(path)[:-2]
sorted_df = df.sort_values(by=['Firms with female top manager (% of firms)'], ascending=False)
df = sorted_df.iloc[:-2]
li = list(df['Entity'])
li2 = list(df['Firms with female top manager (% of firms)'])
li3 = [17.68]*len(li2)
# Plot a bar graph
plt.figure(figsize=(20, 10))
plt.bar(li, li2, color='skyblue', width=0.8)
plt.title('Firms with female top manager (% of firms)')
plt.plot(li,li3,color='blue')
plt.xlabel('Countries')
plt.ylabel('Firms with female top manager (% of firms)')
plt.xticks(rotation=45, fontsize = '6')
plt.show()
On calculating the average of this percentage over all the countries, it comes out that on an average, only around 18% of firms in a country are run by women. This number is even lower in certain regions, such as the Middle East and North Africa, where women-owned businesses make up only 7% of all firms.
This lack of representation is a significant issue, as it means that women are not able to fully participate in the global economy and benefit from the opportunities that entrepreneurship can provide. It also means that the world is missing out on the innovative ideas and contributions that women can bring to the business world.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
seaborn.set()
# Read the CSV file into a pandas dataframe
path = 'top_female.csv'
df = pd.read_csv(path)
df = df.iloc[:-2]
li = list(df['Entity'])
li2 = list(df['Firms with female top manager (% of firms)'])
li3 = [50]*len(li2)
# Plot a bar graph
plt.figure(figsize=(20, 10))
plt.scatter(li, li2, cmap='viridis')
plt.title('Firms with female top manager (% of firms)')
plt.plot(li,li3,color='blue')
plt.xlabel('Countries')
plt.ylabel('Firms with female top manager (% of firms)')
plt.gca().set_xticklabels([])
plt.show()
When we plot the same database using a scatter plot, we get a better understanding of just how unfair the ratio is.
The horizantal line in the above plot depicts firms where there are equal number of firms run by women and men in a country. However, we can only see two points above this line, while the rest lie below the line.
The reason for this could be due to discrimination in accessing capital, networking opportunities, and other resources needed to start and grow a business. Women may also be less likely to see themselves as entrepreneurs if they don't see other women in leadership positions or if they lack role models in their industry.
Apart from the better diversity of perspectives and ideas that results due to women in business leadership positions, is there another reason why women in power positions is so crucial? Yes.
A deeper analysis of the dataset used above shows us that when women hold top leadership positions in a company, it can lead to a greater proportion of permanent female workers. This is because women leaders are more likely to prioritize gender diversity and to implement policies and practices that support and advance women in the workplace.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
# Read the CSV file into a pandas dataframe
lir = ["United Kingdom", "Canada", "Argentina"]
def plot(con):
path = 'WIID_06MAY2020.csv'
df = pd.read_csv(path)
df = df[df['country']==con]
df = df.drop_duplicates(subset=['year'], keep='first')
# Interpolate missing values
df = df.set_index('year').interpolate().reset_index()
plt.figure(figsize=(20, 10))
plt.plot(df['year'], df['ratio_top20bottom20'] )
plt.title('ratio_top20bottom20')
plt.xlabel('Years')
plt.ylabel('Ratio')
plt.show()
ipywidgets.interact(plot, con = lir)
interactive(children=(Dropdown(description='con', options=('United Kingdom', 'Canada', 'Argentina'), value='Un…
<function __main__.plot(con)>
This plot depicts income inequality using the ratio of how much wealth is with the top 20% and how much wealth is with the bottom 205. For developing countries(Argentina) it is increasing but after a point it starts decreasing.
For developed countries(Canada), it is just fluctuating around some value but is gradually increasing.
As you can see from the various income inequality plots that we have displayed, the wealth distribution plots of any country in reality is not supposed to be Gaussian(Normal). It has a tail also which shows the income inequality of the world.