Gis Notes Ii

Published:

gis_applied_econ

Python GIS notes (II)

Count points in polygon, haversine distance (from Nunn 2008)

In [1]:
# pyscience imports
import os
import sys
import glob
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("seaborn-darkgrid")
sns.set(style="ticks", context="talk")
# plt.style.use("dark_background")
import statsmodels.api as sm
import statsmodels.formula.api as smf
# %matplotlib inline
# run for jupyter notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
#%%
/home/alal/anaconda3/envs/gds/lib/python3.5/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
In [2]:
# spatial libraries
import matplotlib.pyplot as plt
import seaborn as sns
from seaborn import palplot
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats
import pysal as ps
from pysal.contrib.viz import mapping as maps
import mplleaflet
In [7]:
root = "/home/alal/Desktop/code/tutorials/gis/gis-applied-econ-Python/"
In [8]:
%cd $root
/home/alal/Desktop/code/tutorials/gis/gis-applied-econ-Python

Based on Mora's lectures here; stored in notebook for future reference

Section 1

In [9]:
%cd Session_1
/home/alal/Desktop/code/tutorials/gis/gis-applied-econ-Python/Session_1
In [10]:
africa_outline  = gpd.read_file('Polygons/Africa_dvp_level0.shp')
africa_subn     = gpd.read_file('Polygons/Africa_dvp_level1.shp')
africa_conflict = gpd.read_file('Points/ACLED_Africa_subsample.shp')
africa_outline.head()
Out[10]:
AREAPERIMETERG2000_0_G2000_0_IDADM0_CODEADM0_NAMELAST_UPDATCONTINENTREGIONSTR_YEAR0EXP_YEAR0geometry
00.000150.047622050920508106Guinea20060103AfricaWestern Africa00POLYGON ((-14.89970970153809 10.94465827941895...
10.000010.016292461324612133Kenya20081111AfricaEastern Africa00POLYGON ((41.51533126831055 -1.752033352851868...
20.000110.068412461724616133Kenya20081111AfricaEastern Africa00POLYGON ((41.49936676025391 -1.77346134185791,...
30.000060.033722052020519106Guinea20060103AfricaWestern Africa00POLYGON ((-14.81044387817383 10.93490600585938...
40.001020.131322052120520106Guinea20060103AfricaWestern Africa00POLYGON ((-14.85491752624512 10.89849662780762...
In [11]:
africa_outline.shape
Out[11]:
(2217, 12)
In [12]:
f , ax = plt.subplots(1, figsize=(13,11))
africa_outline.plot(column='REGION',cmap='viridis',ax=ax,edgecolor='k')
africa_conflict.plot(markersize=5,color='r',ax=ax)
plt.suptitle('ACLED Conflict Data',color='w')
ax.set_axis_off()
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febeef84c18>
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febeef84c18>
Out[12]:
Text(0.5,0.98,'ACLED Conflict Data')

Count number of points in each polygon

Calculate Conflict intensity in each country

In [13]:
# copy points to new df so it can be shortened over iterations
pts = africa_conflict.copy()
pts_in_polys = []

# iterate through polygons
for i, poly in africa_outline.iterrows():
    pts_in_poly = []
    # iterate through points
    for j, pt in pts.iterrows():
        # check if point is in polygon
        if poly.geometry.contains(pt.geometry):
            pts_in_poly.append(pt.geometry)
            pts = pts.drop([j])
    pts_in_polys.append(len(pts_in_poly))
In [14]:
africa_outline['conflict_count'] = gpd.GeoSeries(pts_in_polys)
In [15]:
africa_outline.conflict_count.describe()
Out[15]:
count    2217.000000
mean        0.447452
std         5.265176
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max       151.000000
Name: conflict_count, dtype: float64
In [16]:
sns.distplot(africa_outline['conflict_count'])
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febef01fe80>
In [17]:
f , ax = plt.subplots(1, figsize=(13,11))
africa_outline.plot(column='conflict_count',cmap='OrRd',
                    ax=ax,edgecolor='k')
plt.suptitle('ACLED Conflict Intensity',color='w')
ax.set_axis_off()
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febeeefe080>
Out[17]:
Text(0.5,0.98,'ACLED Conflict Intensity')
In [18]:
africa_outline.shape
africa_outline.query("ADM0_NAME == 'Somalia'").head()
Out[18]:
(2217, 13)
Out[18]:
AREAPERIMETERG2000_0_G2000_0_IDADM0_CODEADM0_NAMELAST_UPDATCONTINENTREGIONSTR_YEAR0EXP_YEAR0geometryconflict_count
194651.8171252.123511963119630226Somalia20061128AfricaEastern Africa00POLYGON ((43.25469207763672 11.46959590911865,...151
21000.000130.053282003320032226Somalia20061128AfricaEastern Africa00POLYGON ((43.45139312744141 11.49118614196777,...0
21050.000220.123282004720046226Somalia20061128AfricaEastern Africa00POLYGON ((43.33499908447266 11.44763374328613,...0
21060.000150.064282415324152226Somalia20061128AfricaEastern Africa00POLYGON ((42.54333877563477 -0.394348978996276...0
21180.000390.078402010420103226Somalia20061128AfricaEastern Africa00POLYGON ((43.45517730712891 11.42086791992188,...0

Section 2

In [19]:
%cd $root/Session_2
/home/alal/Desktop/code/tutorials/gis/gis-applied-econ-Python/Session_2
In [20]:
africa = gpd.read_file('Polygons/Africa_dvp_level0.shp')
In [21]:
f , ax = plt.subplots(1, figsize=(13,11))
africa_outline.plot(column='REGION',cmap='OrRd',ax=ax,edgecolor='k')
plt.suptitle('Africa Regional Map',color='w')
ax.set_axis_off()
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febeebaea20>
Out[21]:
Text(0.5,0.98,'Africa Regional Map')
In [22]:
from shapely.geometry import shape, mapping
import os,sys
import fiona
import rtree 
from fiona.crs import from_epsg
In [23]:
afr = gpd.read_file('Polygons/Africa_dvp_level0.shp')
In [24]:
afr.plot()
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febeecda2e8>

this is suspiciously easy

In [25]:
cents = afr.centroid
In [26]:
f , ax = plt.subplots(1, figsize=(13,11))
afr.plot(column='REGION',cmap='OrRd',ax=ax,edgecolor='k')
cents.plot(markersize=5,ax=ax)
ax.set_axis_off()
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febee8df0f0>
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febee8df0f0>
In [27]:
afr['cents'] = afr.centroid
afr.drop(['geometry'],axis=1, inplace=True)
afr.rename(columns={'cents':'geometry'},inplace=True)
afr.head()
Out[27]:
AREAPERIMETERG2000_0_G2000_0_IDADM0_CODEADM0_NAMELAST_UPDATCONTINENTREGIONSTR_YEAR0EXP_YEAR0geometry
00.000150.047622050920508106Guinea20060103AfricaWestern Africa00POINT (-14.90421035109124 10.95188983006651)
10.000010.016292461324612133Kenya20081111AfricaEastern Africa00POINT (41.51520196665683 -1.748503351977873)
20.000110.068412461724616133Kenya20081111AfricaEastern Africa00POINT (41.50708744527619 -1.762500797052462)
30.000060.033722052020519106Guinea20060103AfricaWestern Africa00POINT (-14.80903127776753 10.93929452298301)
40.001020.131322052120520106Guinea20060103AfricaWestern Africa00POINT (-14.8698447839993 10.92089222792764)
In [28]:
afr.to_file('africa_centroids.shp', 
            driver='ESRI Shapefile')

dissolve

In [30]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.head()
Out[30]:
pop_estcontinentnameiso_a3gdp_md_estgeometry
028400000.0AsiaAfghanistanAFG22270.0POLYGON ((61.21081709172574 35.65007233330923,...
112799293.0AfricaAngolaAGO110300.0(POLYGON ((16.32652835456705 -5.87747039146621...
23639453.0EuropeAlbaniaALB21810.0POLYGON ((20.59024743010491 41.85540416113361,...
34798491.0AsiaUnited Arab EmiratesARE184300.0POLYGON ((51.57951867046327 24.24549713795111,...
440913584.0South AmericaArgentinaARG573900.0(POLYGON ((-65.50000000000003 -55.199999999999...
In [31]:
continents = world.dissolve(by='continent',aggfunc='sum')
continents
Out[31]:
geometrypop_estgdp_md_est
continent
Africa(POLYGON ((49.54351891459575 -12.4698328589405...9.932819e+082718269.20
Antarctica(POLYGON ((-159.2081835601977 -79.497059421708...3.802000e+03760.40
Asia(POLYGON ((120.7156087586305 -10.2395813940878...4.085853e+0924826690.77
Europe(POLYGON ((-52.55642473001839 2.50470530843705...7.281312e+0818546159.00
North America(POLYGON ((-61.68000000000001 10.76, -61.105 1...5.393510e+0818537449.00
Oceania(POLYGON ((173.0203747907408 -40.9190524228564...3.351961e+07938913.50
Seven seas (open ocean)POLYGON ((68.935 -48.62500000000001, 69.58 -48...1.400000e+0216.00
South America(POLYGON ((-68.63401022758316 -52.636370458874...3.943555e+084041845.10
In [32]:
f, ax = plt.subplots(1,figsize=(20,12))
continents.plot(cmap='YlOrRd',column='pop_est',scheme='quantiles',ax=ax)
ax.set_axis_off()
Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febee8a43c8>

Section 3

In [33]:
from shapely.geometry import Point
In [34]:
patch = Point(0.0, 0.0).buffer(10.0)
patch.area
Out[34]:
313.6548490545939
In [35]:
%cd $root/Session_3
/home/alal/Desktop/code/tutorials/gis/gis-applied-econ-Python/Session_3

Replication - Nunn(2008)

In [36]:
coastline = gpd.read_file('10m-coastline/10m_coastline.shp')
coastline.head()
Out[36]:
ScaleRankFeatureClaNotegeometry
00CoastlineNoneLINESTRING (44.25477135508373 -20.376031182818...
16CoastlineNoneLINESTRING (81.87598717539623 7.09192536015035...
20CoastlineNoneLINESTRING (111.0105086597713 19.6837832703066...
30CoastlineNoneLINESTRING (121.9057723316463 24.9500796570253...
40CoastlineNoneLINESTRING (131.6546736988337 32.4569155945253...
In [37]:
### Open Packages
from shapely.geometry import shape, mapping,Point
import fiona
import os,sys
import rtree
import csv
import math
In [38]:
## Haversine Distance

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    km = 6367 * c
    return km
In [39]:
countr_shp    = "output/africa_centroids.shp"
coast_shp     = "10m-coastline/10m_coastline_atlantic.shp"
slaves_data   = "trade_centers_nun2008.csv"
In [40]:
t = gpd.read_file(countr_shp)
t.head()
Out[40]:
AREAPERIMETERG2000_0_G2000_0_IDADM0_CODEADM0_NAMELAST_UPDATCONTINENTREGIONSTR_YEAR0EXP_YEAR0geometry
00.000150.047622050920508106Guinea20060103AfricaWestern Africa00POINT (-14.90421035109124 10.95188983006651)
10.000010.016292461324612133Kenya20081111AfricaEastern Africa00POINT (41.51520196665683 -1.748503351977873)
20.000110.068412461724616133Kenya20081111AfricaEastern Africa00POINT (41.50708744527619 -1.762500797052462)
30.000060.033722052020519106Guinea20060103AfricaWestern Africa00POINT (-14.80903127776753 10.93929452298301)
40.001020.131322052120520106Guinea20060103AfricaWestern Africa00POINT (-14.8698447839993 10.92089222792764)
In [41]:
slaves = pd.read_csv(slaves_data)
slaves.columns = ['name','lat','lon','long_name']
data_slaves = slaves.T.to_dict()
In [42]:
slaves.head()
Out[42]:
namelatlonlong_name
0Virginia36.85-75.98Virginia Beach
1Havana23.13-82.36Habana
2Haiti18.54-72.33Port au Prince
3Kingston18.00-76.80Kingston
4Dominica15.30-61.40Roseau
In [44]:
from geopandas import GeoDataFrame

geometry = [Point(xy) for xy in zip(slaves.lon, slaves.lat)]
crs = {'init': 'epsg:4326'}
gdf = GeoDataFrame(slaves, crs=crs, geometry=geometry)
In [47]:
f, ax = plt.subplots(1,figsize=(20,12))
coastline.plot(ax=ax)
gdf.plot(color='r',ax=ax)
f.suptitle('Slave-trading ports in the new world')
ax.set_axis_off()
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febedfb3940>
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febedfb3940>
Out[47]:
Text(0.5,0.98,'Slave-trading ports in the new world')
In [48]:
## Now we create an index for the slaves trades
idx_trade = rtree.index.Index()
for id in data_slaves:
	idx_trade.insert(id, (float(data_slaves[id]['lat']),
                          float(data_slaves[id]['lon']), 
                          float(data_slaves[id]['lat']), 
                          float(data_slaves[id]['lon'])))
In [49]:
country_distance={}
with fiona.open(countr_shp, 'r') as country_layer:
	with fiona.open(coast_shp, 'r') as coast_layer:
		### Create index for coast-line nodes
		print("Creating the Spatial index - coastline")
		index = rtree.index.Index()
		for feat1 in coast_layer:
			fid = int(feat1['id'])
			geom1 = shape(feat1['geometry'])
			index.insert(fid, geom1.bounds)
		### Now iterate over african countries
		print("Iterating over coastline")
		for feat in country_layer:
			## Get Geometries from centroid
			geom1 = shape(feat['geometry'])
			country=feat['properties']['ADM0_NAME']
			lon,lat=feat['geometry']['coordinates']
			### Get the distance to the closest point in the coast
			for fid_coast in list(index.nearest(geom1.bounds,1)):
				feat1=coast_layer[fid_coast]
				dist={}
				for lon_coast,lat_coast in feat1['geometry']['coordinates']:
					dist[(lat_coast,lon_coast)]=haversine(lon,lat,
                                                          lon_coast,lat_coast)
			## Now we get closest distance
			dist_coast=min(dist.items(), key=lambda x: x[1]) 
			## It will give us [(lon),value]
			## Now we are ready to estimate the distance to the coastline to the trade market
			lon_coast_1,lat_coast_1=dist_coast[0]
			for fid_market in list(idx_trade.nearest(shape(
                                                Point(lon_coast_1,
                                                      lat_coast_1)).bounds,1)):
				dist_coast_to_market=haversine(float(data_slaves[fid_market]['lon']),
                                               float(data_slaves[fid_market]['lat']),
                                               lon_coast_1,lat_coast_1)
			### Now, we are ready to estimate the final distance
			dist_final=dist_coast[1]+dist_coast_to_market

			### Get the value for the shapefile
			try:
				country_distance[country].append(dist_final)
			except:
				country_distance[country]=[dist_final]
Creating the Spatial index - coastline
Iterating over coastline
In [50]:
## Finally we need to get he closest feature to each country
final_data = dict(zip(country_distance.keys(), 
                      [[min(item)] for item in country_distance.values()]))
In [51]:
pd.DataFrame.from_dict(country_distance,orient='index').head()
Out[51]:
0123456789...1281128212831284128512861287128812891290
Guinea5331.2494905328.7678085330.9561545327.2077355329.5968495315.9586885246.5000415229.7453915226.3578765222.171771...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Botswana4474.107786NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Kenya7645.5909227643.9489367637.1224487632.1440327628.3141807624.9417807625.5224637623.7429967623.9301367625.132802...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Hala'ib triangle9982.402268NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Sudan10165.30575210205.19298310385.84058710389.87134910388.19572610391.77750210402.26658810442.06321410443.00804210434.403287...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN

5 rows × 1291 columns

In [52]:
final_data['Egypt']
Out[52]:
[8904.837084661662]
In [53]:
distances = pd.DataFrame.from_dict(final_data,orient='index')
distances = distances.reset_index()

distances.columns = ['Country','distance_to_market']
distances.head()
Out[53]:
Countrydistance_to_market
0Guinea5156.395421
1Botswana4474.107786
2Kenya7324.884376
3Hala'ib triangle9982.402268
4Sudan9928.535020
In [54]:
distances.describe()
Out[54]:
distance_to_market
count66.000000
mean6728.856624
std2010.223934
min1345.540685
25%5298.587995
50%6582.348580
75%8085.700569
max11619.513497
In [55]:
afr = gpd.read_file('Polygons/Africa_dvp_level0.shp')
merged = pd.merge(afr,distances,left_on='ADM0_NAME',right_on='Country')
In [56]:
f, ax = plt.subplots(1,figsize=(13,11))
merged.plot(cmap='YlOrRd',column='distance_to_market',
            ax=ax)
plt.title('Distance to Slave-trading Markets',color='w')
ax.set_axis_off()
Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x7febedfae588>
Out[56]:
Text(0.5,1,'Distance to Slave-trading Markets')