Intermediate Earth Data Science Notes

worked examples from this book

In [1]:
# system
import os, sys, glob, re, itertools, collections
from pathlib import Path

# pyscience imports
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# viz
import matplotlib.pyplot as plt
import seaborn as sns
from plotnine import *
# plt.style.use("seaborn-darkgrid")
sns.set(style="ticks", context="talk")
font = {'family' : 'IBM Plex Sans',
               'weight' : 'normal',
               'size'   : 10}
plt.rc('font', **font)


# geodata packages
import geopandas as gpd
import geoplot as gplt
import mapclassify as mc
import geoplot.crs as gcrs
import earthpy as et
import earthpy.plot as ep

# raster packages
import rasterio as rio
from rasterstats import zonal_stats

# show all output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
#%%
In [2]:
os.chdir(os.path.join(et.io.HOME, 'earth-analytics'))
In [5]:
# Set working directory
os.chdir(os.path.join(et.io.HOME, 'earth-analytics'))
In [4]:
# Download csv of temp (F) and precip (inches) in July 2018 for Boulder, CO
file_url = "https://ndownloader.figshare.com/files/12948515"
et.data.get_data(url=file_url)
Out[4]:
'/home/alal/earth-analytics/data/earthpy-downloads/july-2018-temperature-precip.csv'
In [6]:
# Define relative path to file
file_path = os.path.join("data", "earthpy-downloads",
                         "july-2018-temperature-precip.csv")

Parse datetime on read

In [8]:
# Import file into pandas dataframe
boulder_july_2018 = pd.read_csv(file_path, parse_dates=['date'], index_col = ['date'])
boulder_july_2018.head()
Out[8]:
max_temp precip
date
2018-07-01 87 0.00
2018-07-02 92 0.00
2018-07-03 90 -999.00
2018-07-04 87 0.00
2018-07-05 84 0.24
In [9]:
boulder_july_2018.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 31 entries, 2018-07-01 to 2018-07-31
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   max_temp  31 non-null     int64  
 1   precip    31 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 744.0 bytes
In [12]:
# Create figure and plot space
fig, ax = plt.subplots(figsize=(10, 10))

# Add x-axis and y-axis
ax.scatter(boulder_july_2018.index.values,
        boulder_july_2018['precip'],
        color='purple')

# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Total Precipitation\nBoulder, Colorado in July 2018")

plt.setp(ax.get_xticklabels(), rotation=90)
plt.show()
Out[12]:
<matplotlib.collections.PathCollection at 0x7f1fce4b1978>
Out[12]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Total Precipitation\nBoulder, Colorado in July 2018')]
Out[12]:
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

Read Missing values correctly

In [18]:
# Import file into pandas dataframe
boulder_july_2018 = pd.read_csv(file_path, parse_dates=['date'], index_col = ['date'])
boulder_july_2018.loc[boulder_july_2018.precip == -999, ['precip']] = None
boulder_july_2018.head()
Out[18]:
max_temp precip
date
2018-07-01 87 0.00
2018-07-02 92 0.00
2018-07-03 90 NaN
2018-07-04 87 0.00
2018-07-05 84 0.24
In [19]:
# Create figure and plot space
fig, ax = plt.subplots(figsize=(10, 10))

# Add x-axis and y-axis
ax.scatter(boulder_july_2018.index.values,
        boulder_july_2018['precip'],
        color='purple')

# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Total Precipitation\nBoulder, Colorado in July 2018")

plt.setp(ax.get_xticklabels(), rotation=90)
plt.show()
Out[19]:
<matplotlib.collections.PathCollection at 0x7f1fcdec4eb8>
Out[19]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Total Precipitation\nBoulder, Colorado in July 2018')]
Out[19]:
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

TS Slices

In [20]:
# Download the data
data = et.data.get_data('colorado-flood')
# Set working directory
os.chdir(os.path.join(et.io.HOME, 'earth-analytics'))

# Define relative path to file with daily precip total
file_path = os.path.join("data", "colorado-flood",
                         "precipitation",
                         "805325-precip-dailysum-2003-2013.csv")
Downloading from https://ndownloader.figshare.com/files/16371473
Extracted output to /home/alal/earth-analytics/data/colorado-flood/.
In [22]:
# Import data using datetime and no data value
boulder_precip_2003_2013 = pd.read_csv(file_path,
                                       parse_dates=['DATE'],
                                       index_col= ['DATE'],
                                       na_values=['999.99'])

boulder_precip_2003_2013.head()
boulder_precip_2003_2013.info()
Out[22]:
DAILY_PRECIP STATION STATION_NAME ELEVATION LATITUDE LONGITUDE YEAR JULIAN
DATE
2003-01-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2003 1
2003-01-05 NaN COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2003 5
2003-02-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2003 32
2003-02-02 NaN COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2003 33
2003-02-03 0.4 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2003 34
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 792 entries, 2003-01-01 to 2013-12-31
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   DAILY_PRECIP  788 non-null    float64
 1   STATION       792 non-null    object 
 2   STATION_NAME  792 non-null    object 
 3   ELEVATION     792 non-null    float64
 4   LATITUDE      792 non-null    float64
 5   LONGITUDE     792 non-null    float64
 6   YEAR          792 non-null    int64  
 7   JULIAN        792 non-null    int64  
dtypes: float64(4), int64(2), object(2)
memory usage: 55.7+ KB
In [23]:
boulder_precip_2003_2013.describe()
Out[23]:
DAILY_PRECIP ELEVATION LATITUDE LONGITUDE YEAR JULIAN
count 788.000000 792.0 792.000000 792.000000 792.000000 792.000000
mean 0.247843 1650.5 40.033850 -105.281106 2007.967172 175.541667
std 0.462558 0.0 0.000045 0.000005 3.149287 98.536373
min 0.000000 1650.5 40.033800 -105.281110 2003.000000 1.000000
25% 0.100000 1650.5 40.033800 -105.281110 2005.000000 96.000000
50% 0.100000 1650.5 40.033890 -105.281110 2008.000000 167.000000
75% 0.300000 1650.5 40.033890 -105.281100 2011.000000 255.250000
max 9.800000 1650.5 40.033890 -105.281100 2013.000000 365.000000

row-index slices w time

In [25]:
# select 2013 data - datetime index is magic
boulder_precip_2003_2013['2013'].head()
Out[25]:
DAILY_PRECIP STATION STATION_NAME ELEVATION LATITUDE LONGITUDE YEAR JULIAN
DATE
2013-01-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.0338 -105.2811 2013 1
2013-01-28 0.1 COOP:050843 BOULDER 2 CO US 1650.5 40.0338 -105.2811 2013 28
2013-01-29 0.1 COOP:050843 BOULDER 2 CO US 1650.5 40.0338 -105.2811 2013 29
2013-02-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.0338 -105.2811 2013 32
2013-02-14 0.1 COOP:050843 BOULDER 2 CO US 1650.5 40.0338 -105.2811 2013 45
In [26]:
# Select all December data - view first few rows
boulder_precip_2003_2013[boulder_precip_2003_2013.index.month == 12].head()
Out[26]:
DAILY_PRECIP STATION STATION_NAME ELEVATION LATITUDE LONGITUDE YEAR JULIAN
DATE
2003-12-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2003 335
2004-12-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2004 336
2004-12-22 0.2 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2004 357
2004-12-24 0.1 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2004 359
2005-12-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2005 335

Range slice

In [27]:
# Subset data to May-Aug 2005
precip_may_aug_2005 = boulder_precip_2003_2013['2005-05-01':'2005-08-31']
precip_may_aug_2005.head()
Out[27]:
DAILY_PRECIP STATION STATION_NAME ELEVATION LATITUDE LONGITUDE YEAR JULIAN
DATE
2005-05-01 0.1 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2005 121
2005-05-11 1.2 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2005 131
2005-05-30 0.5 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2005 150
2005-05-31 0.1 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2005 151
2005-06-01 0.0 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 2005 152
In [28]:
# Create figure and plot space
fig, ax = plt.subplots(figsize=(8, 8))

# Add x-axis and y-axis
ax.bar(precip_may_aug_2005.index.values,
       precip_may_aug_2005['DAILY_PRECIP'],
       color='purple')

# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Total Precipitation\nMay - Aug 2005 for Boulder Creek")

# Rotate tick marks on x-axis
plt.setp(ax.get_xticklabels(), rotation=45)

plt.show()
Out[28]:
<BarContainer object of 24 artists>
Out[28]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Total Precipitation\nMay - Aug 2005 for Boulder Creek')]
Out[28]:
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

Resampling/Aggregating

In [7]:
# Define relative path to file with hourly precip
file_path = os.path.join("data", "colorado-flood",
                         "precipitation",
                         "805325-precip-daily-2003-2013.csv")

# Import data using datetime and no data value
precip_2003_2013_hourly = pd.read_csv(file_path,
                                      parse_dates=['DATE'],
                                      index_col=['DATE'],
                                      na_values=['999.99'])

# View first few rows
precip_2003_2013_hourly.head()
precip_2003_2013_hourly.info()
precip_2003_2013_hourly.describe()
Out[7]:
STATION STATION_NAME ELEVATION LATITUDE LONGITUDE HPCP Measurement Flag Quality Flag
DATE
2003-01-01 01:00:00 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 0.0 g
2003-02-01 01:00:00 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 0.0 g
2003-02-02 19:00:00 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 0.2
2003-02-02 22:00:00 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 0.1
2003-02-03 02:00:00 COOP:050843 BOULDER 2 CO US 1650.5 40.03389 -105.28111 0.1
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1840 entries, 2003-01-01 01:00:00 to 2013-12-31 00:00:00
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   STATION           1840 non-null   object 
 1   STATION_NAME      1840 non-null   object 
 2   ELEVATION         1840 non-null   float64
 3   LATITUDE          1840 non-null   float64
 4   LONGITUDE         1840 non-null   float64
 5   HPCP              1746 non-null   float64
 6   Measurement Flag  1840 non-null   object 
 7   Quality Flag      1840 non-null   object 
dtypes: float64(4), object(4)
memory usage: 129.4+ KB
Out[7]:
ELEVATION LATITUDE LONGITUDE HPCP
count 1840.0 1840.000000 1840.000000 1746.000000
mean 1650.5 40.033851 -105.281106 0.111856
std 0.0 0.000045 0.000005 0.093222
min 1650.5 40.033800 -105.281110 0.000000
25% 1650.5 40.033800 -105.281110 0.100000
50% 1650.5 40.033890 -105.281110 0.100000
75% 1650.5 40.033890 -105.281100 0.100000
max 1650.5 40.033890 -105.281100 2.200000
In [8]:
# hourly index
precip_2003_2013_hourly.index
Out[8]:
DatetimeIndex(['2003-01-01 01:00:00', '2003-02-01 01:00:00',
               '2003-02-02 19:00:00', '2003-02-02 22:00:00',
               '2003-02-03 02:00:00', '2003-02-05 02:00:00',
               '2003-02-05 08:00:00', '2003-02-06 00:00:00',
               '2003-02-07 12:00:00', '2003-02-10 13:00:00',
               ...
               '2013-12-01 01:00:00', '2013-12-03 20:00:00',
               '2013-12-04 03:00:00', '2013-12-04 06:00:00',
               '2013-12-04 09:00:00', '2013-12-22 01:00:00',
               '2013-12-23 00:00:00', '2013-12-23 02:00:00',
               '2013-12-29 01:00:00', '2013-12-31 00:00:00'],
              dtype='datetime64[ns]', name='DATE', length=1840, freq=None)
In [26]:
# Resample to daily precip sum and save as new dataframe
precip_2003_2013_daily = precip_2003_2013_hourly.resample('D').sum()
# Resample to monthly precip sum and save as new dataframe
precip_2003_2013_monthly = precip_2003_2013_daily.resample('M').sum()
# Resample to yearly precip sum and save as new dataframe
precip_2003_2013_yearly = precip_2003_2013_hourly.resample('Y').sum()
In [28]:
# Create figure and plot space
fig, ax = plt.subplots(1, 3, figsize=(20, 8))



# Add x-axis and y-axis
ax[0].scatter(precip_2003_2013_daily.index.values,
           precip_2003_2013_daily['HPCP'], s = 2,
           color='purple')
# Set title and labels for axes
ax[0].set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Precipitation - Boulder Station\n 2003-2013 : Daily")

# Add x-axis and y-axis
ax[1].scatter(precip_2003_2013_monthly.index.values,
           precip_2003_2013_monthly['HPCP'], s = 2,
           color='green')
# Set title and labels for axes
ax[1].set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Precipitation - Boulder Station\n 2003-2013 : Monthly")
# Add x-axis and y-axis
ax[2].scatter(precip_2003_2013_yearly.index.values,
           precip_2003_2013_yearly['HPCP'], s = 2,
           color='blue')
# Set title and labels for axes
ax[2].set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Precipitation - Boulder Station\n 2003-2013 : Yearly")
Out[28]:
<matplotlib.collections.PathCollection at 0x7f6154f4bf98>
Out[28]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Precipitation - Boulder Station\n 2003-2013 : Daily')]
Out[28]:
<matplotlib.collections.PathCollection at 0x7f6154edc668>
Out[28]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Precipitation - Boulder Station\n 2003-2013 : Monthly')]
Out[28]:
<matplotlib.collections.PathCollection at 0x7f6154edcbe0>
Out[28]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Precipitation - Boulder Station\n 2003-2013 : Yearly')]

Date Formats for plots

In [37]:
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
In [29]:
# Define relative path to file with daily precip
file_path = os.path.join("data", "colorado-flood",
                         "precipitation",
                         "805325-precip-dailysum-2003-2013.csv")
precip_june_aug_2005 = precip_2003_2013_daily['2005-06-01':'2005-08-31']
precip_june_aug_2005.head()
Out[29]:
ELEVATION LATITUDE LONGITUDE HPCP
DATE
2005-06-01 1650.5 40.03389 -105.28111 0.0
2005-06-02 1650.5 40.03389 -105.28111 0.1
2005-06-03 3301.0 80.06778 -210.56222 0.3
2005-06-04 9903.0 240.20334 -631.68666 0.7
2005-06-05 0.0 0.00000 0.00000 0.0
In [41]:
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 8))

# Add x-axis and y-axis
ax.bar(precip_june_aug_2005.index.values,
       precip_june_aug_2005['HPCP'],
       color='purple')

# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="Precipitation (inches)",
       title="Daily Total Precipitation\nJune - Aug 2005 for Boulder Creek")

date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)

# Ensure a major tick for each week using (interval=1) 
ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=1))
plt.setp(ax.get_xticklabels(), rotation=90)

plt.show()
Out[41]:
<BarContainer object of 92 artists>
Out[41]:
[Text(0, 0.5, 'Precipitation (inches)'),
 Text(0.5, 0, 'Date'),
 Text(0.5, 1.0, 'Daily Total Precipitation\nJune - Aug 2005 for Boulder Creek')]
Out[41]:
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]
In [2]:
# Get data and set working directory
data = et.data.get_data('spatial-vector-lidar')


# Define path to file
plot_centroid_path = os.path.join("data", "spatial-vector-lidar", 
                                  "california", "neon-sjer-site", 
                                  "vector_data", "SJER_plot_centroids.shp")

# Import shapefile using geopandas
sjer_plot_locations = gpd.read_file(plot_centroid_path)
In [10]:
# View top 6 rows of attribute table
sjer_plot_locations.head(6)
Out[10]:
Plot_ID Point northing easting plot_type geometry
0 SJER1068 center 4111567.818 255852.376 trees POINT (255852.376 4111567.818)
1 SJER112 center 4111298.971 257406.967 trees POINT (257406.967 4111298.971)
2 SJER116 center 4110819.876 256838.760 grass POINT (256838.760 4110819.876)
3 SJER117 center 4108752.026 256176.947 trees POINT (256176.947 4108752.026)
4 SJER120 center 4110476.079 255968.372 grass POINT (255968.372 4110476.079)
5 SJER128 center 4111388.570 257078.867 trees POINT (257078.867 4111388.570)
In [9]:
f, ax = plt.subplots(figsize = (10, 12))
sjer_plot_locations.plot(column = 'plot_type', categorical = True, legend = True,
                         markersize = 45, cmap = "viridis", ax = ax)
ax.set_title('SJER Plot Locations\nMadera County, CA')
ax.set_axis_off()
# f.savefig(out/'location.pdf')
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b75d12128>
Out[9]:
Text(0.5, 1, 'SJER Plot Locations\nMadera County, CA')

Projections/CRS

In [29]:
from shapely.geometry import Point, Polygon
from matplotlib.ticker import ScalarFormatter
In [30]:
sjer_plot_locations.crs
sjer_plot_locations.total_bounds
Out[30]:
<Projected CRS: EPSG:32611>
Name: WGS 84 / UTM zone 11N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: World - N hemisphere - 120°W to 114°W - by country
- bounds: (-120.0, 0.0, -114.0, 84.0)
Coordinate Operation:
- name: UTM zone 11N
- method: Transverse Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
Out[30]:
array([ 254738.618, 4107527.074,  258497.102, 4112167.778])
In [31]:
# Import world boundary shapefile
worldBound_path = os.path.join("data", "spatial-vector-lidar", "global", 
                               "ne_110m_land", "ne_110m_land.shp")
worldBound = gpd.read_file(worldBound_path)
worldBound.crs
Out[31]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [32]:
# Create numpy array of x,y point locations
add_points = np.array([[-105.2519,   40.0274], 
                       [  10.75  ,   59.95  ], 
                       [   2.9833,   39.6167]])

# Turn points into list of x,y shapely points 
city_locations = [Point(xy) for xy in add_points]
# Create geodataframe using the points
city_locations = gpd.GeoDataFrame(city_locations, 
                                  columns=['geometry'],
                                  crs=worldBound.crs)
In [34]:
# Import graticule & world bounding box shapefile data
graticule_path = os.path.join("data", "spatial-vector-lidar", "global", 
                              "ne_110m_graticules_all", "ne_110m_graticules_15.shp")
graticule = gpd.read_file(graticule_path)

bbox_path = os.path.join("data", "spatial-vector-lidar", "global", 
                         "ne_110m_graticules_all", "ne_110m_wgs84_bounding_box.shp")
bbox = gpd.read_file(bbox_path)

# Create map axis object
fig, ax = plt.subplots(1, 1, figsize=(15, 8))
# Add bounding box and graticule layers
bbox.plot(ax=ax, alpha=.1, color='grey')
graticule.plot(ax=ax, color='lightgrey')
worldBound.plot(ax=ax, color='black')

# Add points to plot 
city_locations.plot(ax=ax, 
                    markersize=60, 
                    color='springgreen',
                    marker='*')
# Add title and axes labels
ax.set(title="World Map - Geographic Coordinate Reference System (long/lat degrees)",
       xlabel="X Coordinates (meters)",
       ylabel="Y Coordinates (meters)");

Robinson

In [35]:
# Reproject the data
worldBound_robin = worldBound.to_crs('+proj=robin')
graticule_robin = graticule.to_crs('+proj=robin')
city_locations_robin = city_locations.to_crs(worldBound_robin.crs)
bbox_robinson = bbox.to_crs('+proj=robin')
In [36]:
# Setup plot with 2 "rows" one for each map and one column
fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(13, 12))

# First plot
bbox.plot(ax=ax0,
          alpha=.1,
          color='grey')

graticule.plot(ax=ax0,
               color='lightgrey')

worldBound.plot(ax=ax0,
                color='k')

city_locations.plot(ax=ax0,
                    markersize=100,
                    color='springgreen')

ax0.set(title="World Map - Geographic (long/lat degrees)")

# Second plot
bbox_robinson.plot(ax=ax1,
                   alpha=.1,
                   color='grey')

graticule_robin.plot(ax=ax1,
                        color='lightgrey')

worldBound_robin.plot(ax=ax1,
                      color='k')

city_locations_robin.plot(ax=ax1,
                          markersize=100,
                          color='springgreen')

ax1.set(title="World Map Projected - Robinson (Meters)")

for axis in [ax1.xaxis, ax1.yaxis]:
    formatter = ScalarFormatter()
    formatter.set_scientific(False)
    axis.set_major_formatter(formatter)
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74c6ceb8>
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74c6ceb8>
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74c6ceb8>
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74c6ceb8>
Out[36]:
[Text(0.5, 1, 'World Map - Geographic (long/lat degrees)')]
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74261320>
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74261320>
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74261320>
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74261320>
Out[36]:
[Text(0.5, 1, 'World Map Projected - Robinson (Meters)')]

Reprojecting

In [37]:
# Import the data
sjer_roads_path = os.path.join("data", "spatial-vector-lidar", "california", 
                               "madera-county-roads", "tl_2013_06039_roads.shp")
sjer_roads = gpd.read_file(sjer_roads_path)

# aoi stands for area of interest
sjer_aoi_path = os.path.join("data", "spatial-vector-lidar", "california", 
                             "neon-sjer-site", "vector_data", "SJER_crop.shp")
sjer_aoi = gpd.read_file(sjer_aoi_path)
In [38]:
# View the Coordinate Reference System of both layers 
sjer_roads.crs
sjer_aoi.crs
Out[38]:
<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - NAD83
- bounds: (167.65, 14.92, -47.74, 86.46)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich
Out[38]:
<Projected CRS: EPSG:32611>
Name: WGS 84 / UTM zone 11N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: World - N hemisphere - 120°W to 114°W - by country
- bounds: (-120.0, 0.0, -114.0, 84.0)
Coordinate Operation:
- name: UTM zone 11N
- method: Transverse Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [39]:
# Reproject the aoi to match the roads layer
sjer_aoi_wgs84  = sjer_aoi.to_crs(epsg=4269)

# Plot the data
fig, ax = plt.subplots(figsize=(12, 8))

sjer_roads.plot(cmap='Greys', ax=ax, alpha=.5)
sjer_aoi_wgs84.plot(ax=ax, markersize=10, color='r')

ax.set_title("Madera County Roads with SJER AOI");

Census

In [40]:
# Import data into geopandas dataframe
state_boundary_us_path = os.path.join("data", "spatial-vector-lidar", 
                                      "usa", "usa-states-census-2014.shp")
state_boundary_us = gpd.read_file(state_boundary_us_path)

# View data structure
type(state_boundary_us)
state_boundary_us.crs
Out[40]:
geopandas.geodataframe.GeoDataFrame
Out[40]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [41]:
# Plot the data
fig, ax = plt.subplots(figsize = (12,8))
state_boundary_us.plot(ax = ax, facecolor = 'white', edgecolor = 'black')

# Add title to map
ax.set(title="Map of Continental US State Boundaries\n United States Census Bureau Data")

# Turn off the axis  
plt.axis('equal')
ax.set_axis_off()

plt.show()
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4b74492630>
Out[41]:
[Text(0.5, 1, 'Map of Continental US State Boundaries\n United States Census Bureau Data')]
Out[41]:
(-127.6146362, -64.06109779999998, 23.253819649999997, 50.62866934999999)
In [43]:
# Zoom in on just the area 
fig, ax = plt.subplots(figsize = (12,8))

state_boundary_us.plot(ax = ax,
                      linewidth=1,
                      edgecolor="black")

sjer_aoi_wgs84.plot(ax=ax, 
                    color='springgreen',
                   edgecolor = "r")

ax.set(title="Map of Continental US State Boundaries \n with SJER AOI")
ax.set(xlim=[-125, -116], ylim=[35, 40])

# Turn off axis  
ax.set(xticks = [], yticks = []);

Clipping

Points in Extent

In [3]:
# Import all of your data at the top of your notebook to keep things organized.
country_boundary_us_path = os.path.join("data", "spatial-vector-lidar", 
                                        "usa", "usa-boundary-dissolved.shp")
country_boundary_us = gpd.read_file(country_boundary_us_path)

state_boundary_us_path = os.path.join("data", "spatial-vector-lidar", 
                                      "usa", "usa-states-census-2014.shp")
state_boundary_us = gpd.read_file(state_boundary_us_path)

pop_places_path = os.path.join("data", "spatial-vector-lidar", "global", 
                               "ne_110m_populated_places_simple", "ne_110m_populated_places_simple.shp")
pop_places = gpd.read_file(pop_places_path)

# Are the data all in the same crs?
print("country_boundary_us", country_boundary_us.crs)
print("state_boundary_us", state_boundary_us.crs)
print("pop_places", pop_places.crs)
country_boundary_us epsg:4326
state_boundary_us epsg:4326
pop_places epsg:4326
In [4]:
# Plot the data
fig, ax = plt.subplots(figsize=(12, 8))

country_boundary_us.plot(alpha=.5,
                         ax=ax)

state_boundary_us.plot(cmap='Greys',
                       ax=ax,
                       alpha=.5)
pop_places.plot(ax=ax)

plt.axis('equal')
ax.set_axis_off()
plt.show()
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563f968400>
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563f968400>
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563f968400>
Out[4]:
(-193.4122428803924, 197.40832549680465, -47.04230625155682, 69.8923420176043)
In [5]:
# Clip the data using GeoPandas clip
points_clip = gpd.clip(pop_places, country_boundary_us)

# View the first 6 rows and a few select columns
points_clip[['name', 'geometry', 'scalerank', 'natscale', ]].head()
Out[5]:
name geometry scalerank natscale
175 San Francisco POINT (-122.41717 37.76920) 1 300
176 Denver POINT (-104.98596 39.74113) 1 300
177 Houston POINT (-95.34193 29.82192) 1 300
178 Miami POINT (-80.22605 25.78956) 1 300
179 Atlanta POINT (-84.40190 33.83196) 1 300
In [6]:
# Plot the data
fig, ax = plt.subplots(figsize=(12, 8))

country_boundary_us.plot(alpha=1,
                         color="white",
                         edgecolor="black",
                         ax=ax)

state_boundary_us.plot(cmap='Greys',
                       ax=ax,
                       alpha=.5)

points_clip.plot(ax=ax,
                 column='name')
ax.set_axis_off()
plt.axis('equal')

# Label each point - note this is just shown here optionally but is not required for your homework
points_clip.apply(lambda x: ax.annotate(s=x['name'],
                                        xy=x.geometry.coords[0],
                                        xytext=(6, 6), textcoords="offset points",
                                        backgroundcolor="white"),
                  axis=1)
plt.show()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563f95ac18>
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563f95ac18>
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563f95ac18>
Out[6]:
(-127.61463620000002,
 -64.06109779999998,
 23.253819649999997,
 50.628669349999996)
Out[6]:
175       Annotation(-122.417, 37.7692, 'San Francisco')
176              Annotation(-104.986, 39.7411, 'Denver')
177             Annotation(-95.3419, 29.8219, 'Houston')
178               Annotation(-80.2261, 25.7896, 'Miami')
179              Annotation(-84.4019, 33.832, 'Atlanta')
180              Annotation(-87.752, 41.8319, 'Chicago')
216         Annotation(-118.182, 33.9919, 'Los Angeles')
217    Annotation(-77.0114, 38.9015, 'Washington, D.C.')
218             Annotation(-73.982, 40.7519, 'New York')
dtype: object

Line or Polygon in Extent

In [7]:
# Open the roads layer
ne_roads_path = os.path.join("data", "spatial-vector-lidar", "global", 
                             "ne_10m_roads", "ne_10m_roads.shp")
ne_roads = gpd.read_file(ne_roads_path)

# Are both layers in the same CRS?
if (ne_roads.crs == country_boundary_us.crs):
    print("Both layers are in the same crs!",
          ne_roads.crs, country_boundary_us.crs)
Both layers are in the same crs! epsg:4326 epsg:4326
In [8]:
# Simplify the geometry of the clip extent for faster processing
# Use this with caution as it modifies your data.
country_boundary_us_sim = country_boundary_us.simplify(
    .2, preserve_topology=True)
In [9]:
# Clip data
ne_roads_clip = gpd.clip(ne_roads, country_boundary_us_sim)

# Ignore missing/empty geometries
ne_roads_clip = ne_roads_clip[~ne_roads_clip.is_empty]

print("The clipped data have fewer line objects (represented by rows):",
      ne_roads_clip.shape, ne_roads.shape)
The clipped data have fewer line objects (represented by rows): (7346, 32) (56601, 32)
/home/alal/anaconda3/envs/gds/lib/python3.7/site-packages/geopandas/geoseries.py:381: UserWarning: GeoSeries.notna() previously returned False for both missing (None) and empty geometries. Now, it only returns False for missing values. Since the calling GeoSeries contains empty geometries, the result has changed compared to previous versions of GeoPandas.
Given a GeoSeries 's', you can use '~s.is_empty & s.notna()' to get back the old behaviour.

To further ignore this warning, you can do: 
import warnings; warnings.filterwarnings('ignore', 'GeoSeries.notna', UserWarning)
  return self.notna()
In [11]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))

ne_roads.plot(ax=ax1)
country_boundary_us.plot(alpha=1,
                         color="white",
                         edgecolor="black",
                         ax=ax2)
ne_roads_clip.plot(ax=ax2)

ax1.set_title("Unclipped roads")
ax2.set_title("Clipped roads")

ax1.set_axis_off()
ax2.set_axis_off()

plt.axis('equal')
plt.show()
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563bdf02e8>
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b3838d0>
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b3838d0>
Out[11]:
Text(0.5, 1, 'Unclipped roads')
Out[11]:
Text(0.5, 1, 'Clipped roads')
Out[11]:
(-127.6146362, -64.0610978, 23.25381965, 50.62866935000001)

Other Spatial Operations

Simplifying polygons

In [12]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))

# Set a larger tolerance yields a blockier polygon
country_boundary_us.simplify(2, preserve_topology=True).plot(ax=ax1)

# Set  a larger tolerance yields a blockier polygon
country_boundary_us.simplify(.2, preserve_topology=True).plot(ax=ax2)

ax1.set_title( "Data with a higher tolerance value will become visually blockier as there are fewer vertices")
ax2.set_title( "Data with a very low tolerance value will look smoother but will take longer to process")

ax1.set_axis_off()
ax2.set_axis_off()
plt.show()
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b318f98>
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563a8fa358>
Out[12]:
Text(0.5, 1, 'Data with a higher tolerance value will become visually blockier as there are fewer vertices')
Out[12]:
Text(0.5, 1, 'Data with a very low tolerance value will look smoother but will take longer to process')

Dissolving Polygons

In [13]:
state_boundary = state_boundary_us[['LSAD', 'geometry']]
cont_usa = state_boundary.dissolve(by='LSAD')

# View the resulting geodataframe
cont_usa
Out[13]:
geometry
LSAD
00 MULTIPOLYGON Z (((-81.81169 24.56874 0.00000, ...
In [14]:
# Select the columns that you wish to retain in the data
state_boundary = state_boundary_us[['region', 'geometry', 'ALAND', 'AWATER']]

# Then summarize the quantative columns by 'sum'
regions_agg = state_boundary.dissolve(by='region', aggfunc='sum')
regions_agg
Out[14]:
geometry ALAND AWATER
region
Midwest MULTIPOLYGON Z (((-82.86334 41.69369 0.00000, ... 1943869253244 184383393833
Northeast MULTIPOLYGON Z (((-76.04621 38.02553 0.00000, ... 869066138232 108922434345
Southeast MULTIPOLYGON Z (((-81.81169 24.56874 0.00000, ... 1364632039655 103876652998
Southwest POLYGON Z ((-94.48587 33.63787 0.00000, -94.41... 1462631530997 24217682268
West MULTIPOLYGON Z (((-118.59403 33.03595 0.00000,... 2432336444730 57568049509
In [24]:
mc.Quantiles(regions_agg.ALAND, k=5).bins
# mc.Quantiles(regions_agg.AWATER, k=5)
Out[24]:
array([1.26551886e+12, 1.42343173e+12, 1.65512662e+12, 2.04156269e+12,
       2.43233644e+12])

Spatial Joins

In [25]:
# Roads within region
roads_region = gpd.sjoin(ne_roads_clip,
                         regions_agg, 
                         how="inner", 
                         op='intersects')

# Notice once you have joins the data - you have attributes 
# from the regions_object (i.e. the region) attached to each road feature
roads_region[["featurecla", "index_right", "ALAND"]].head()
Out[25]:
featurecla index_right ALAND
1 Road Midwest 1943869253244
3 Road Midwest 1943869253244
6 Road Midwest 1943869253244
7 Road Midwest 1943869253244
54 Road Midwest 1943869253244
In [27]:
# Reproject to Albers for plotting
country_albers = cont_usa.to_crs({'init': 'epsg:5070'})
roads_albers = roads_region.to_crs({'init': 'epsg:5070'})

# First, create a dictionary with the attributes of each legend item
road_attrs = {'Midwest': ['black'],
              'Northeast': ['grey'],
              'Southeast': ['m'],
              'Southwest': ['purple'],
              'West': ['green']}

# Plot the data
fig, ax = plt.subplots(figsize=(12, 8))

regions_agg.plot(edgecolor="black",
                 ax=ax)
country_albers.plot(alpha=1,
                    facecolor="none",
                    edgecolor="black",
                    zorder=10,
                    ax=ax)

for ctype, data in roads_albers.groupby('index_right'):
    data.plot(color=road_attrs[ctype][0],
              label=ctype,
              ax=ax)
    
# This approach works to place the legend when you have defined labels
plt.legend(bbox_to_anchor=(1.0, 1), loc=2)
ax.set_axis_off()
plt.axis('equal')
plt.show()
/home/alal/anaconda3/envs/gds/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  return _prepare_from_string(" ".join(pjargs))
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b1e4550>
Out[27]:
<matplotlib.legend.Legend at 0x7f563b1180f0>
Out[27]:
(-2586827.152087886, 2488867.850089355, -158260.34013497925, 3324006.101716564)

Calculate Line Segment Length

In [29]:
# Turn off scientific notation
pd.options.display.float_format = '{:.4f}'.format

# Calculate the total length of road 
road_albers_length = roads_albers[['index_right', 'length_km']]

# Sum existing columns
# roads_albers.groupby('index_right').sum()

roads_albers['rdlength'] = roads_albers.length
sub = roads_albers[['rdlength', 'index_right']].groupby('index_right').sum()
sub
Out[29]:
rdlength
index_right
Midwest 86575018.7717
Northeast 33786036.9011
Southeast 84343073.0339
Southwest 49373104.8209
West 61379832.9460

Rasters

In [50]:
from shapely.geometry import Polygon, mapping, box
import rasterio as rio
from rasterio.mask import mask
from rasterio.plot import show
import pprint

Metadata

In [52]:
# Define relative path to file
lidar_dem_path = os.path.join("data", "colorado-flood", "spatial", 
                              "boulder-leehill-rd", "pre-flood", "lidar",
                              "pre_DTM.tif")

# View crs of raster imported with rasterio
with rio.open(lidar_dem_path) as src:
    print(src.crs)
    print(src.bounds)
    print(src.res)
    print("---")
    pprint.pprint(src.meta)
EPSG:32613
BoundingBox(left=472000.0, bottom=4434000.0, right=476000.0, top=4436000.0)
(1.0, 1.0)
---
{'count': 1,
 'crs': CRS.from_dict(init='epsg:32613'),
 'driver': 'GTiff',
 'dtype': 'float32',
 'height': 2000,
 'nodata': -3.4028234663852886e+38,
 'transform': Affine(1.0, 0.0, 472000.0,
       0.0, -1.0, 4436000.0),
 'width': 4000}
In [33]:
# Each key of the dictionary is an EPSG code
print(list(et.epsg.keys())[:10])

# You can convert to proj4 like so:
proj4 = et.epsg['32613']
print(proj4)
crs_proj4 = rio.crs.CRS.from_string(proj4)
crs_proj4
['29188', '26733', '24600', '32189', '4899', '29189', '26734', '7402', '26951', '29190']
+proj=utm +zone=13 +datum=WGS84 +units=m +no_defs
Out[33]:
CRS.from_dict(init='epsg:32613')

Display

In [36]:
# Open raster data
lidar_dem = rio.open(lidar_dem_path)
# Plot the dem using raster.io
fig, ax = plt.subplots(figsize = (8,3))

show(lidar_dem, 
     title="Lidar Digital Elevation Model (DEM) \n Boulder Flood 2013", 
     ax=ax)

ax.set_axis_off()
lidar_dem.close()
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b05eeb8>
In [54]:
with rio.open(lidar_dem_path) as src:
    # Convert / read the data into a numpy array
    # masked = True turns `nodata` values to nan
    lidar_dem_im = src.read(1, masked=True)
    lidar_dem_mask = src.dataset_mask()
    # Create a spatial extent object using rio.plot.plotting
    spatial_extent = rio.plot.plotting_extent(src)

print("object shape:", lidar_dem_im.shape)
print("object type:", type(lidar_dem_im))
object shape: (2000, 4000)
object type: <class 'numpy.ma.core.MaskedArray'>

Mask

In [55]:
lidar_dem_mask
Out[55]:
array([[  0,   0,   0, ..., 255, 255, 255],
       [  0,   0,   0, ..., 255, 255, 255],
       [  0,   0,   0, ..., 255, 255, 255],
       ...,
       [  0,   0,   0, ..., 255, 255, 255],
       [  0,   0,   0, ..., 255, 255, 255],
       [  0,   0,   0, ..., 255, 255, 255]], dtype=uint8)
In [42]:
ep.plot_bands(lidar_dem_im,
              cmap='Blues',
              extent=spatial_extent,
              title="Digital Elevation Model - Pre 2013 Flood",
              cbar=False)
plt.show()
Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b0725f8>
In [43]:
#### Plot histogram
ep.hist(lidar_dem_im[~lidar_dem_im.mask].ravel(),
        bins=100,
        title="Lee Hill Road - Digital elevation (terrain) model - \nDistribution of elevation values")
plt.show()
Out[43]:
(<Figure size 864x864 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f563b0300f0>)
In [48]:
### Zoom in
# Define a spatial extent that is "smaller"
# minx, miny, maxx, maxy, ccw=True
zoomed_extent = [472500, 4434000, 473030, 4435030]
zoom_ext_gdf = gpd.GeoDataFrame()
zoom_ext_gdf.loc[0, 'geometry'] = box(*zoomed_extent)
fig, ax = plt.subplots(figsize=(8, 3))

ep.plot_bands(lidar_dem_im,
              extent=spatial_extent,
              title="Lidar Raster Full Spatial Extent w Zoom Box Overlayed",
              ax=ax,
              scale=False)

# Set x and y limits of the plot
ax.set_xlim(zoomed_extent[0], zoomed_extent[2])
ax.set_ylim(zoomed_extent[1], zoomed_extent[3])


ax.set_axis_off()
Out[48]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b145390>
Out[48]:
(472500, 473030)
Out[48]:
(4434000, 4435030)

Raster Processing

In [57]:
from rasterio.plot import plotting_extent

Subtract Arrays

In [58]:
# Define relative path to file
lidar_dem_path = os.path.join("data", "colorado-flood", "spatial", 
                              "boulder-leehill-rd", "pre-flood", "lidar",
                              "pre_DTM.tif")

# Open raster data
with rio.open(lidar_dem_path) as lidar_dem:
    lidar_dem_im = lidar_dem.read(1, masked=True)
    # Get bounds for plotting
    bounds = plotting_extent(lidar_dem)
    
# Define relative path to file
lidar_dsm_path = os.path.join("data", "colorado-flood", "spatial", 
                              "boulder-leehill-rd", "pre-flood", "lidar",
                              "pre_DSM.tif")

with rio.open(lidar_dsm_path) as lidar_dsm:
    lidar_dsm_im = lidar_dsm.read(1, masked=True)
    
# Are the bounds the same?
print("Is the spatial extent the same?", 
      lidar_dem.bounds == lidar_dsm.bounds)

# Is the resolution the same ??
print("Is the resolution the same?", 
      lidar_dem.res == lidar_dsm.res)
Is the spatial extent the same? True
Is the resolution the same? True

Tree height computation $$ CHM = DSM - DEM $$

In [59]:
# Calculate canopy height model
lidar_chm_im = lidar_dsm_im - lidar_dem_im
ep.plot_bands(lidar_chm_im, 
              cmap='viridis',
              title="Lidar Canopy Height Model (CHM)",
              scale=False)
Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563b142080>
In [60]:
ep.hist(lidar_chm_im, 
        colors = 'purple',
        title="Histogram of CHM Values")
Out[60]:
(<Figure size 864x864 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f563b1125f8>)
In [61]:
print('CHM minimum value: ', lidar_chm_im.min())
print('CHM max value: ', lidar_chm_im.max())
CHM minimum value:  0.0
CHM max value:  26.930054

Export array as raster

In [64]:
lidar_chm_im
type(lidar_chm_im)
Out[64]:
masked_array(
  data=[[--, --, --, ..., 0.0, 0.1700439453125, 0.9600830078125],
        [--, --, --, ..., 0.0, 0.090087890625, 1.6400146484375],
        [--, --, --, ..., 0.0, 0.0, 0.0799560546875],
        ...,
        [--, --, --, ..., 0.0, 0.0, 0.0],
        [--, --, --, ..., 0.0, 0.0, 0.0],
        [--, --, --, ..., 0.0, 0.0, 0.0]],
  mask=[[ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        ...,
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False]],
  fill_value=-3.402823e+38,
  dtype=float32)
Out[64]:
numpy.ma.core.MaskedArray
In [65]:
lidar_dem.meta
# Create dictionary copy
chm_meta = lidar_dem.meta.copy()
# Update the nodata value to be an easier to use number

# fill the masked pixels with a set no data value
nodatavalue = -999.0
lidar_chm_im_fi = np.ma.filled(lidar_chm_im, fill_value=nodatavalue)
lidar_chm_im_fi.min(), nodatavalue

chm_meta.update({'nodata': nodatavalue})
chm_meta
Out[65]:
{'driver': 'GTiff',
 'dtype': 'float32',
 'nodata': -3.4028234663852886e+38,
 'width': 4000,
 'height': 2000,
 'count': 1,
 'crs': CRS.from_dict(init='epsg:32613'),
 'transform': Affine(1.0, 0.0, 472000.0,
        0.0, -1.0, 4436000.0)}
Out[65]:
(-999.0, -999.0)
Out[65]:
{'driver': 'GTiff',
 'dtype': 'float32',
 'nodata': -999.0,
 'width': 4000,
 'height': 2000,
 'count': 1,
 'crs': CRS.from_dict(init='epsg:32613'),
 'transform': Affine(1.0, 0.0, 472000.0,
        0.0, -1.0, 4436000.0)}
In [67]:
%%time
out_path = "/home/alal/tmp/chm_export.tif"
with rio.open(out_path, 'w', **chm_meta) as outf:
    outf.write(lidar_chm_im_fi, 1)
CPU times: user 112 ms, sys: 104 ms, total: 216 ms
Wall time: 214 ms

Reclassify raster (to account for skew)

In [70]:
# Define bins that you want, and then classify the data
class_bins = [lidar_chm_im.min(), 2, 7, 12, np.iinfo(np.int32).max]

# Classify the original image array, then unravel it again for plotting
lidar_chm_im_class = np.digitize(lidar_chm_im, class_bins)

# Note that you have an extra class in the data (0)
print(np.unique(lidar_chm_im_class))
[0 1 2 3 4]
In [71]:
lidar_chm_class_ma = np.ma.masked_where(lidar_chm_im_class == 0,
                                        lidar_chm_im_class,
                                        copy=True)
lidar_chm_class_ma
Out[71]:
masked_array(
  data=[[--, --, --, ..., 1, 1, 1],
        [--, --, --, ..., 1, 1, 1],
        [--, --, --, ..., 1, 1, 1],
        ...,
        [--, --, --, ..., 1, 1, 1],
        [--, --, --, ..., 1, 1, 1],
        [--, --, --, ..., 1, 1, 1]],
  mask=[[ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        ...,
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False]],
  fill_value=999999)
In [73]:
from matplotlib.colors import ListedColormap, BoundaryNorm
In [74]:
# Plot newly classified and masked raster
# ep.plot_bands(lidar_chm_class_ma,
#               scale=False)
# Plot data using nicer colors
colors = ['linen', 'lightgreen', 'darkgreen', 'maroon']

cmap = ListedColormap(colors)
norm = BoundaryNorm(class_bins, len(colors))

ep.plot_bands(lidar_chm_class_ma,
              cmap=cmap,
              title="Classified Canopy Height Model",
              scale=False,
              norm=norm)
plt.show()
Out[74]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563af0f400>

Clip raster

In [77]:
lidar= rio.open(lidar_dem_path)

aoi = os.path.join("data", "colorado-flood", "spatial",
                   "boulder-leehill-rd", "clip-extent.shp")

# Open crop extent (your study area extent boundary)
crop_extent = gpd.read_file(aoi)
print('crop extent crs: ', crop_extent.crs)
print('lidar crs: ', lidar.crs)
crop extent crs:  epsg:32613
lidar crs:  EPSG:32613
In [79]:
fig, ax = plt.subplots(figsize=(10, 8))

ep.plot_bands(lidar_chm_im, cmap='terrain',
              extent=plotting_extent(lidar),
              ax=ax,
              title="Raster Layer with Shapefile Overlayed",
              cbar=False)

crop_extent.plot(ax=ax, alpha=.8)
/home/alal/anaconda3/envs/gds/lib/python3.7/site-packages/numpy/ma/core.py:1026: RuntimeWarning: overflow encountered in multiply
  result = self.f(da, db, *args, **kwargs)
Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563ad82e48>
Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563ad82e48>
In [81]:
lidar_chm_crop, lidar_chm_crop_meta = et.spatial.crop_image(lidar,crop_extent)
lidar_chm_crop_affine = lidar_chm_crop_meta["transform"]
# Create spatial plotting extent for the cropped layer
lidar_chm_extent = plotting_extent(lidar_chm_crop[0], lidar_chm_crop_affine)
In [82]:
ep.plot_bands(lidar_chm_crop[0],
              extent=lidar_chm_extent,
              cmap='viridis',
              title="Cropped Raster Dataset",
              scale=False)
Out[82]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f563ad19ac8>

Reproject Raster

In [85]:
from rasterio.warp import calculate_default_transform, reproject, Resampling
In [86]:
def reproject_et(inpath, outpath, new_crs):
    dst_crs = new_crs # CRS for web meractor 

    with rio.open(inpath) as src:
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rio.open(outpath, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rio.band(src, i),
                    destination=rio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)
In [90]:
inp = os.path.join("data", "colorado-flood", "spatial", 
                                   "boulder-leehill-rd", "pre-flood", "lidar", "pre_DTM.tif")
out = "/home/alal/tmp/reprojected_lidar.tif"
reproject_et(inpath = inp, 
             outpath = out,
             new_crs = 'EPSG:3857')
In [91]:
lidar_or   = rio.open(inp)
lidar_dem3 = rio.open(out)
pprint.pprint(lidar_or.meta)
pprint.pprint(lidar_dem3.meta)
lidar_or.close()
lidar_dem3.close()
{'count': 1,
 'crs': CRS.from_dict(init='epsg:32613'),
 'driver': 'GTiff',
 'dtype': 'float32',
 'height': 2000,
 'nodata': -3.4028234663852886e+38,
 'transform': Affine(1.0, 0.0, 472000.0,
       0.0, -1.0, 4436000.0),
 'width': 4000}
{'count': 1,
 'crs': CRS.from_dict(init='epsg:3857'),
 'driver': 'GTiff',
 'dtype': 'float32',
 'height': 2020,
 'nodata': -3.4028234663852886e+38,
 'transform': Affine(1.3063652820086313, 0.0, -11725101.307458913,
       0.0, -1.3063652820086313, 4876690.453258085),
 'width': 4004}

Multispectral Rasters

In [9]:
import earthpy.spatial as es

LANDSAT

image.png

In [3]:
%%time
# Download data and set working directory
data = et.data.get_data('cold-springs-fire')
Downloading from https://ndownloader.figshare.com/files/10960109
Extracted output to /home/alal/earth-analytics/data/cold-springs-fire/.
CPU times: user 12.3 s, sys: 2.43 s, total: 14.8 s
Wall time: 27.4 s
In [5]:
import glob
In [13]:
# Get list of all pre-cropped data and sort the data
path = os.path.join("data", "cold-springs-fire", "landsat_collect",
                    "LC080340322016072301T1-SC20180214145802", "crop")
all_landsat_post_bands =  glob.glob(path + '/*band*.tif')
all_landsat_post_bands.sort()
In [14]:
# Create an output array of all the landsat data stacked
landsat_post_fire_path = os.path.join("data", "cold-springs-fire",
                                      "outputs", "landsat_post_fire.tif")

# This will create a new stacked raster with all bands
land_stack, land_meta = es.stack(all_landsat_post_bands,
                                 landsat_post_fire_path)
In [15]:
with rio.open(landsat_post_fire_path) as src:
    landsat_post_fire = src.read()
In [24]:
f, ax = plt.subplots(1, 3, dpi = 140)
ep.plot_rgb(landsat_post_fire,
            rgb=[3, 2, 1],
            title="original", ax = ax[0])
ep.plot_rgb(landsat_post_fire,
            rgb=[3, 2, 1], stretch=True, str_clip= 1,
            title="stretched", ax = ax[1])

ep.plot_rgb(landsat_post_fire,
            rgb=[3, 2, 1], stretch=True, str_clip= 5,
            title="stretched 5", ax = ax[2])
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7acb5387f0>
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7acb241c18>
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7acb11d5f8>
In [25]:
# Plot all band histograms using earthpy
band_titles = ["Band 1", "Blue", "Green", "Red",
               "NIR", "Band 6", "Band7"]

ep.hist(landsat_post_fire,
        title=band_titles)
Out[25]:
(<Figure size 864x864 with 8 Axes>,
 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acb485f28>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acb6a0320>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acb3ed9b0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acaf29080>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acaecf710>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acaefada0>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acaeb0470>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acae5aac8>]],
       dtype=object))

Crop single image

In [26]:
fire_boundary_path = os.path.join("data",
                                  "cold-springs-fire",
                                  "vector_layers",
                                  "fire-boundary-geomac",
                                  "co_cold_springs_20160711_2200_dd83.shp")

fire_boundary = gpd.read_file(fire_boundary_path)

# Open a single band and plot
with rio.open(all_landsat_post_bands[3]) as src:

    # Reproject the fire boundary shapefile to be the same CRS as the Landsat data
    crop_raster_profile = src.profile
    fire_boundary_utmz13 = fire_boundary.to_crs(crop_raster_profile["crs"])

    # Crop the landsat image to the extent of the fire boundary
    landsat_band4, landsat_metadata = es.crop_image(src, fire_boundary_utmz13)

ep.plot_bands(landsat_band4[0],
              title="Landsat Cropped Band 4\nColdsprings Fire Scar",
              scale=False)
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7acad77dd8>

Crop all bands

In [27]:
cropped_folder = os.path.join("data",
                              "cold-springs-fire",
                              "outputs",
                              "cropped-images")

if not os.path.isdir(cropped_folder):
    os.mkdir(cropped_folder)

cropped_file_list = es.crop_all(all_landsat_post_bands,
                                cropped_folder,
                                fire_boundary_utmz13,
                                overwrite=True,
                                verbose=True)

cropped_file_list
Out[27]:
['data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band1_crop_crop.tif',
 'data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band2_crop_crop.tif',
 'data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band3_crop_crop.tif',
 'data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band4_crop_crop.tif',
 'data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band5_crop_crop.tif',
 'data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band6_crop_crop.tif',
 'data/cold-springs-fire/outputs/cropped-images/LC08_L1TP_034032_20160723_20180131_01_T1_sr_band7_crop_crop.tif']
In [28]:
landsat_post_fire_path = os.path.join("data", "cold-springs-fire",
                                      "outputs", "landsat_post_fire.tif")

# This will create a new stacked raster with all bands
land_stack, land_meta = es.stack(cropped_file_list,
                                 landsat_post_fire_path)
In [29]:
# Plot all bands using earthpy
band_titles = ["Band 1", "Blue", "Green", "Red",
               "NIR", "Band 6", "Band7"]

ep.plot_bands(land_stack,
              figsize=(11, 7),
              title=band_titles,
              cbar=False)
Out[29]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acac016a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acac292b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acabd9940>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acab8f320>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acabb8cc0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acab6b6a0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7acab21080>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acaac69e8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7acaa83400>]],
      dtype=object)
In [40]:
from shapely.geometry import Polygon, mapping, box
import rasterio as rio
from rasterio.mask import mask
from rasterio.plot import show
from pprint import pprint
In [35]:
naip_csf_path = os.path.join("data", "cold-springs-fire", 
                             "naip", "m_3910505_nw_13_1_20150919", 
                             "crop", "m_3910505_nw_13_1_20150919_crop.tif")

with rio.open(naip_csf_path) as src:
    naip_csf = src.read()
    naip_csf_meta = src.meta
pprint(naip_csf_meta)
{'count': 4,
 'crs': CRS.from_wkt('PROJCS["UTM Zone 13, Northern Hemisphere",GEOGCS["GRS 1980(IUGG, 1980)",DATUM["unknown",SPHEROID["GRS80",6378137,298.257222101],TOWGS84[0,0,0,0,0,0,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-105],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]'),
 'driver': 'GTiff',
 'dtype': 'int16',
 'height': 2312,
 'nodata': -32768.0,
 'transform': Affine(1.0, 0.0, 457163.0,
       0.0, -1.0, 4426952.0),
 'width': 4377}
In [41]:
ep.plot_bands(naip_csf[0],
              title="NAIP RGB Imagery - Band 1-Red\nCold Springs Fire Scar",
              cbar=False)
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7ac53eec18>
In [70]:
titles = ["Red Band", "Green Band", "Blue Band", "Near Infrared (NIR) Band"]
cmaps = ['Reds', 'Greens', 'Blues', 'Greys']
# f, ax = plt.subplots(2, 2, dpi = 140)
f, ax = plt.subplots(1, 4, figsize = (15, 5))
for ax, i in zip(ax, np.arange(0, 4)):
    ax.imshow(naip_csf[i], cmap = cmaps[i])
    ax.set_title(titles[i])
Out[70]:
<matplotlib.image.AxesImage at 0x7f7ac4f48978>
Out[70]:
Text(0.5, 1.0, 'Red Band')
Out[70]:
<matplotlib.image.AxesImage at 0x7f7ac5727f28>
Out[70]:
Text(0.5, 1.0, 'Green Band')
Out[70]:
<matplotlib.image.AxesImage at 0x7f7ac5789978>
Out[70]:
Text(0.5, 1.0, 'Blue Band')
Out[70]:
<matplotlib.image.AxesImage at 0x7f7ab9748748>
Out[70]:
Text(0.5, 1.0, 'Near Infrared (NIR) Band')
In [72]:
colors = ['r', 'g', 'b', 'k']
titles = ['red band', 'green band', 'blue band', 'near-infrared band']

ep.hist(naip_csf, 
        colors=colors, 
        title=titles, 
        cols=2)

plt.show()
Out[72]:
(<Figure size 864x864 with 4 Axes>,
 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f7ac0c155c0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x7f7ab967bbe0>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7ab97832b0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x7f7ac506d940>]],
       dtype=object))
In [71]:
ep.plot_bands(naip_csf, 
          figsize=(12, 5), 
          cols=2,
          title=titles,
          cbar=False)

ep.plot_rgb(naip_csf,
            rgb=[0, 1, 2],
            title="RGB Composite image - NAIP")
Out[71]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f7ac5544908>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7ac4ef8668>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f7ac51ca048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f7ac4e5b9e8>]],
      dtype=object)
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7ac5017588>