Archived
1
0

Created distance calculation.

This commit is contained in:
Shaun Setlock
2022-04-17 21:35:11 -04:00
parent 3a9acf8c6f
commit 56b4c0cb1c
5 changed files with 2456 additions and 2152 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
@@ -11,7 +11,8 @@
"import numpy as np\n",
"\n",
"from great_schools import get_nearby_schools\n",
"from secret import get_key"
"from distance import get_distance\n",
"from secret import get_key\n"
]
},
{
@@ -31,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@@ -49,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
@@ -64,6 +65,7 @@
" 'web-site',\n",
" 'overview-url',\n",
" 'rating-description',\n",
" 'distance',\n",
"]\n",
"\n",
"# Grab data for Boston.\n",
@@ -73,9 +75,11 @@
" boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n",
" boston_df = pd.DataFrame.from_dict(boston_schools)\n",
" boston_df.drop(columns=drops,inplace=True)\n",
" boston_df.to_csv(boston_nearby_schools_file)\n",
" boston_df.to_csv(boston_nearby_schools_file, )\n",
"else:\n",
" boston_df = pd.read_csv(boston_nearby_schools_file)\n",
" boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
" boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n",
"\n",
"# Grab data for Buffalo.\n",
"refresh = False\n",
@@ -86,7 +90,276 @@
" buffalo_df.drop(columns=drops,inplace=True)\n",
" buffalo_df.to_csv(buffalo_nearby_schools_file)\n",
"else:\n",
" buffalo_df = pd.read_csv(buffalo_nearby_schools_file)"
" buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n",
" buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
" buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Process the `lat` and `lon` columns from the API output into tuples.\n",
"\n",
"Then create two new columns:\n",
"- Distance to Downtown\n",
"- Distance to Work"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"# Form tuple to represent coordinates\n",
"boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n",
"#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n",
"\n",
"# Define coordinates of important places\n",
"downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n",
"work=(42.47381059540949, -71.25414135292398) # Hartwell\n",
"\n",
"# Create new columns to tabulate distance to these important places\n",
"boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n",
"boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For Boston, drop all schools that aren't in Massachusetts."
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state-id</th>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" <th>level-codes</th>\n",
" <th>level</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>zip</th>\n",
" <th>county</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>district-name</th>\n",
" <th>district-id</th>\n",
" <th>rating</th>\n",
" <th>year</th>\n",
" <th>coordinates</th>\n",
" <th>distance-to-downtown</th>\n",
" <th>distance-to-work</th>\n",
" </tr>\n",
" <tr>\n",
" <th>universal-id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2501042</th>\n",
" <td>7050505</td>\n",
" <td>Masconomet Regional High School</td>\n",
" <td>public</td>\n",
" <td>h</td>\n",
" <td>9,10,11,12</td>\n",
" <td>Boxford</td>\n",
" <td>MA</td>\n",
" <td>1921</td>\n",
" <td>Essex County</td>\n",
" <td>42.627754</td>\n",
" <td>-70.974693</td>\n",
" <td>Masconomet School District</td>\n",
" <td>259</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.627754, -70.974693)</td>\n",
" <td>30.005931</td>\n",
" <td>28.583420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2500337</th>\n",
" <td>350380</td>\n",
" <td>Young Achievers Science and Math School</td>\n",
" <td>public</td>\n",
" <td>p,e,m</td>\n",
" <td>PK,KG,1,2,3,4,5,6,7,8</td>\n",
" <td>Mattapan</td>\n",
" <td>MA</td>\n",
" <td>2126</td>\n",
" <td>Suffolk County</td>\n",
" <td>42.282269</td>\n",
" <td>-71.095016</td>\n",
" <td>Boston School District</td>\n",
" <td>99</td>\n",
" <td>2.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.282269, -71.095016)</td>\n",
" <td>9.673200</td>\n",
" <td>24.989359</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2500402</th>\n",
" <td>440017</td>\n",
" <td>Kennedy K-5 Elementary School</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5</td>\n",
" <td>Brockton</td>\n",
" <td>MA</td>\n",
" <td>2301</td>\n",
" <td>Plymouth County</td>\n",
" <td>42.059696</td>\n",
" <td>-71.037262</td>\n",
" <td>Brockton School District</td>\n",
" <td>111</td>\n",
" <td>4.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.059696, -71.037262)</td>\n",
" <td>34.339345</td>\n",
" <td>49.384728</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501682</th>\n",
" <td>3070010</td>\n",
" <td>Boyden</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5</td>\n",
" <td>Walpole</td>\n",
" <td>MA</td>\n",
" <td>2071</td>\n",
" <td>Norfolk County</td>\n",
" <td>42.105808</td>\n",
" <td>-71.258743</td>\n",
" <td>Walpole School District</td>\n",
" <td>426</td>\n",
" <td>6.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.105808, -71.258743)</td>\n",
" <td>32.933990</td>\n",
" <td>40.921772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501507</th>\n",
" <td>2760305</td>\n",
" <td>P. Brent Trottier Middle School</td>\n",
" <td>public</td>\n",
" <td>m</td>\n",
" <td>6,7,8</td>\n",
" <td>Southborough</td>\n",
" <td>MA</td>\n",
" <td>1772</td>\n",
" <td>Worcester County</td>\n",
" <td>42.299240</td>\n",
" <td>-71.542259</td>\n",
" <td>Southborough School District</td>\n",
" <td>387</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.29924, -71.542259)</td>\n",
" <td>39.445654</td>\n",
" <td>30.606258</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" state-id name type \\\n",
"universal-id \n",
"2501042 7050505 Masconomet Regional High School public \n",
"2500337 350380 Young Achievers Science and Math School public \n",
"2500402 440017 Kennedy K-5 Elementary School public \n",
"2501682 3070010 Boyden public \n",
"2501507 2760305 P. Brent Trottier Middle School public \n",
"\n",
" level-codes level city state zip \\\n",
"universal-id \n",
"2501042 h 9,10,11,12 Boxford MA 1921 \n",
"2500337 p,e,m PK,KG,1,2,3,4,5,6,7,8 Mattapan MA 2126 \n",
"2500402 e KG,1,2,3,4,5 Brockton MA 2301 \n",
"2501682 e KG,1,2,3,4,5 Walpole MA 2071 \n",
"2501507 m 6,7,8 Southborough MA 1772 \n",
"\n",
" county lat lon \\\n",
"universal-id \n",
"2501042 Essex County 42.627754 -70.974693 \n",
"2500337 Suffolk County 42.282269 -71.095016 \n",
"2500402 Plymouth County 42.059696 -71.037262 \n",
"2501682 Norfolk County 42.105808 -71.258743 \n",
"2501507 Worcester County 42.299240 -71.542259 \n",
"\n",
" district-name district-id rating year \\\n",
"universal-id \n",
"2501042 Masconomet School District 259 8.0 2021.0 \n",
"2500337 Boston School District 99 2.0 2021.0 \n",
"2500402 Brockton School District 111 4.0 2021.0 \n",
"2501682 Walpole School District 426 6.0 2021.0 \n",
"2501507 Southborough School District 387 8.0 2021.0 \n",
"\n",
" coordinates distance-to-downtown distance-to-work \n",
"universal-id \n",
"2501042 (42.627754, -70.974693) 30.005931 28.583420 \n",
"2500337 (42.282269, -71.095016) 9.673200 24.989359 \n",
"2500402 (42.059696, -71.037262) 34.339345 49.384728 \n",
"2501682 (42.105808, -71.258743) 32.933990 40.921772 \n",
"2501507 (42.29924, -71.542259) 39.445654 30.606258 "
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
"\n",
"boston_df.sample(5)"
]
}
],

31
main/distance.py Normal file
View File

@@ -0,0 +1,31 @@
#! /usr/bin/env
from math import radians, cos, sin, asin, sqrt
# helper to calculate geographical distance using lat, lon
def get_distance(p1: tuple, p2: tuple):
'''
Returns a float that is the distance, in miles, between two (lat,lon) tuples.
Parameters:
p1 (tuple): point 1
p2 (tuple): point 2
Returns:
distance (float): miles between points
'''
# The math module contains a function named
# radians which converts from degrees to radians.
p1 = (radians(p1[0]),radians(p1[1]))
p2 = (radians(p2[0]),radians(p2[1]))
# Haversine formula
dlon = p2[1] - p1[1]
dlat = p2[0] - p1[0]
a = sin(dlat / 2)**2 + cos(p1[0]) * cos(p2[0]) * sin(dlon / 2)**2
c = 2 * asin(sqrt(a))
# Radius of earth in kilometers. Use 3956 for miles
r = 3956
# calculate the result
return(c * r)