Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added basic inequality mesasures: lorenz curve and gini #414

Merged
merged 3 commits into from
Oct 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions quantecon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# from .game_theory import <objects-here> #Place Holder if we wish to promote any general objects to the qe namespace.
from .graph_tools import DiGraph, random_tournament_graph
from .gridtools import cartesian, mlinspace, simplex_grid, simplex_index
from .inequality import lorenz_curve, gini_coefficient
from .kalman import Kalman
from .lae import LAE
from .arma import ARMA
Expand Down
86 changes: 86 additions & 0 deletions quantecon/inequality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Implements inequality and segregation measures such as Gini, Lorenz Curve

"""

import numpy as np
from numba import njit, prange


@njit
def lorenz_curve(y):
"""
Calculates the Lorenz Curve, a graphical representation of the distribution of income
or wealth.

It returns the cumulative share of people (x-axis) and the cumulative share of income earned

Parameters
----------
y : array_like(float or int, ndim=1)
Array of income/wealth for each individual. Unordered or ordered is fine.

Returns
-------
cum_people : array_like(float, ndim=1)
Cumulative share of people for each person index (i/n)
cum_income : array_like(float, ndim=1)
Cumulative share of income for each person index


References
----------
https://en.wikipedia.org/wiki/Lorenz_curve

Examples
--------
a_val, n = 3, 10_000
y = np.random.pareto(a_val, size=n)
f_vals, l_vals = lorenz(y)
#Plot
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.plot(f_vals, l_vals, label="Pareto with a={0}".format(a_val))
fig.suptitle("Pareto distribution with a={0}".format(a_val))

"""

n = len(y)
y = np.sort(y)
s = np.zeros(n + 1)
s[1:] = np.cumsum(y)
cum_people = np.zeros(n + 1)
cum_income = np.zeros(n + 1)
for i in range(1, n + 1):
cum_people[i] = i / n
cum_income[i] = s[i] / s[n]
return cum_people, cum_income


@njit(parallel=True)
def gini_coefficient(y):
r"""
Implements the Gini inequality index

Parameters
-----------
y : array_like(float)
Array of income/wealth for each individual. Ordered or unordered is fine

Returns
-------
Gini index: float
The gini index describing the inequality of the array of income/wealth

References
----------

https://en.wikipedia.org/wiki/Gini_coefficient
"""
n = len(y)
i_sum = np.zeros(n)
for i in prange(n):
for j in range(n):
i_sum[i] += abs(y[i] - y[j])
return np.sum(i_sum) / (2 * n * np.sum(y))


66 changes: 66 additions & 0 deletions quantecon/tests/test_inequality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

"""
Tests for inequality.py

"""

import numpy as np
from numpy.testing import assert_allclose
from quantecon import lorenz_curve, gini_coefficient


def test_lorenz_curve():
"""
Tests `lorenz` function, which calculates the lorenz curve

An income distribution where everyone has almost the same wealth should
be similar to a straight line

An income distribution where one person has almost the wealth should
be flat and then shoot straight up when it approaches one
"""
n = 3000

# Almost Equal distribution
y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n)
cum_people, cum_income = lorenz_curve(y)
assert_allclose(cum_people, cum_income, rtol=1e-03)

# Very uneven distribution
y = np.repeat(0.001, n)
y[4] = 100000
pop_cum, income_cum = lorenz_curve(y)
expected_income_cum = np.repeat(0., n + 1)
expected_income_cum[-1] = 1.
assert_allclose(expected_income_cum, income_cum, atol=1e-4)


def test_gini_coeff():
"""
Tests how the funciton `gini_coefficient` calculates the Gini coefficient
with the Pareto and the Weibull distribution.

Analytically, we know that Pareto with parameter `a` has
G = 1 / (2*a - 1)

Likewise, for the Weibull distribution with parameter `a` we know that
G = 1 - 2**(-1/a)

"""
n = 10000

# Tests Pareto: G = 1 / (2*a - 1)
a = np.random.randint(2, 15)
expected = 1 / (2 * a - 1)

y = (np.random.pareto(a, size=n) + 1) * 2
coeff = gini_coefficient(y)
assert_allclose(expected, coeff, rtol=1e-01)

# Tests Weibull: G = 1 - 2**(-1/a)
a = np.random.randint(2, 15)
expected = 1 - 2 ** (-1 / a)

y = np.random.weibull(a, size=n)
coeff = gini_coefficient(y)
assert_allclose(expected, coeff, rtol=1e-01)