392 lines
13 KiB
Plaintext
392 lines
13 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 70,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"from great_schools import get_nearby_schools\n",
|
|
"from distance import get_distance\n",
|
|
"from secret import get_key\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Shaun and Daniela's Boston Public School Analysis\n",
|
|
"#### 2021.04.10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Fetch the API key from the local filesystem."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 71,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# get the API key\n",
|
|
"api_key_file = '../keys/api.key'\n",
|
|
"api_key = get_key(api_key_file)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Use the `nearby_schools` API endpoint to grab raw data of all schools within the maximum radius"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 72,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Some columns will dropped immediately as pre-processing.\n",
|
|
"drops = [\n",
|
|
" 'nces-id',\n",
|
|
" 'school-summary',\n",
|
|
" 'street',\n",
|
|
" 'fipscounty',\n",
|
|
" 'phone',\n",
|
|
" 'fax',\n",
|
|
" 'web-site',\n",
|
|
" 'overview-url',\n",
|
|
" 'rating-description',\n",
|
|
" 'distance',\n",
|
|
"]\n",
|
|
"\n",
|
|
"# Grab data for Boston.\n",
|
|
"refresh = False\n",
|
|
"boston_nearby_schools_file = '../data/nearby_schools/boston.csv'\n",
|
|
"if refresh:\n",
|
|
" boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n",
|
|
" boston_df = pd.DataFrame.from_dict(boston_schools)\n",
|
|
" boston_df.drop(columns=drops,inplace=True)\n",
|
|
" boston_df.to_csv(boston_nearby_schools_file, )\n",
|
|
"else:\n",
|
|
" boston_df = pd.read_csv(boston_nearby_schools_file)\n",
|
|
" boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
|
|
" boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n",
|
|
"\n",
|
|
"# Grab data for Buffalo.\n",
|
|
"refresh = False\n",
|
|
"buffalo_nearby_schools_file = '../data/nearby_schools/buffalo.csv'\n",
|
|
"if refresh:\n",
|
|
" buffalo_schools = get_nearby_schools(api_key,\"42.9625\",\"-78.7425\",\"50\")\n",
|
|
" buffalo_df = pd.DataFrame.from_dict(buffalo_schools)\n",
|
|
" buffalo_df.drop(columns=drops,inplace=True)\n",
|
|
" buffalo_df.to_csv(buffalo_nearby_schools_file)\n",
|
|
"else:\n",
|
|
" buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n",
|
|
" buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
|
|
" buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Process the `lat` and `lon` columns from the API output into tuples.\n",
|
|
"\n",
|
|
"Then create two new columns:\n",
|
|
"- Distance to Downtown\n",
|
|
"- Distance to Work"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 73,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Form tuple to represent coordinates\n",
|
|
"boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n",
|
|
"#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n",
|
|
"\n",
|
|
"# Define coordinates of important places\n",
|
|
"downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n",
|
|
"work=(42.47381059540949, -71.25414135292398) # Hartwell\n",
|
|
"\n",
|
|
"# Create new columns to tabulate distance to these important places\n",
|
|
"boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n",
|
|
"boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"For Boston, drop all schools that aren't in Massachusetts."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 74,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state-id</th>\n",
|
|
" <th>name</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>level-codes</th>\n",
|
|
" <th>level</th>\n",
|
|
" <th>city</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>zip</th>\n",
|
|
" <th>county</th>\n",
|
|
" <th>lat</th>\n",
|
|
" <th>lon</th>\n",
|
|
" <th>district-name</th>\n",
|
|
" <th>district-id</th>\n",
|
|
" <th>rating</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>coordinates</th>\n",
|
|
" <th>distance-to-downtown</th>\n",
|
|
" <th>distance-to-work</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>universal-id</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2501042</th>\n",
|
|
" <td>7050505</td>\n",
|
|
" <td>Masconomet Regional High School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>h</td>\n",
|
|
" <td>9,10,11,12</td>\n",
|
|
" <td>Boxford</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1921</td>\n",
|
|
" <td>Essex County</td>\n",
|
|
" <td>42.627754</td>\n",
|
|
" <td>-70.974693</td>\n",
|
|
" <td>Masconomet School District</td>\n",
|
|
" <td>259</td>\n",
|
|
" <td>8.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.627754, -70.974693)</td>\n",
|
|
" <td>30.005931</td>\n",
|
|
" <td>28.583420</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2500337</th>\n",
|
|
" <td>350380</td>\n",
|
|
" <td>Young Achievers Science and Math School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>p,e,m</td>\n",
|
|
" <td>PK,KG,1,2,3,4,5,6,7,8</td>\n",
|
|
" <td>Mattapan</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2126</td>\n",
|
|
" <td>Suffolk County</td>\n",
|
|
" <td>42.282269</td>\n",
|
|
" <td>-71.095016</td>\n",
|
|
" <td>Boston School District</td>\n",
|
|
" <td>99</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.282269, -71.095016)</td>\n",
|
|
" <td>9.673200</td>\n",
|
|
" <td>24.989359</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2500402</th>\n",
|
|
" <td>440017</td>\n",
|
|
" <td>Kennedy K-5 Elementary School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>e</td>\n",
|
|
" <td>KG,1,2,3,4,5</td>\n",
|
|
" <td>Brockton</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2301</td>\n",
|
|
" <td>Plymouth County</td>\n",
|
|
" <td>42.059696</td>\n",
|
|
" <td>-71.037262</td>\n",
|
|
" <td>Brockton School District</td>\n",
|
|
" <td>111</td>\n",
|
|
" <td>4.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.059696, -71.037262)</td>\n",
|
|
" <td>34.339345</td>\n",
|
|
" <td>49.384728</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501682</th>\n",
|
|
" <td>3070010</td>\n",
|
|
" <td>Boyden</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>e</td>\n",
|
|
" <td>KG,1,2,3,4,5</td>\n",
|
|
" <td>Walpole</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2071</td>\n",
|
|
" <td>Norfolk County</td>\n",
|
|
" <td>42.105808</td>\n",
|
|
" <td>-71.258743</td>\n",
|
|
" <td>Walpole School District</td>\n",
|
|
" <td>426</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.105808, -71.258743)</td>\n",
|
|
" <td>32.933990</td>\n",
|
|
" <td>40.921772</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501507</th>\n",
|
|
" <td>2760305</td>\n",
|
|
" <td>P. Brent Trottier Middle School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>m</td>\n",
|
|
" <td>6,7,8</td>\n",
|
|
" <td>Southborough</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1772</td>\n",
|
|
" <td>Worcester County</td>\n",
|
|
" <td>42.299240</td>\n",
|
|
" <td>-71.542259</td>\n",
|
|
" <td>Southborough School District</td>\n",
|
|
" <td>387</td>\n",
|
|
" <td>8.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.29924, -71.542259)</td>\n",
|
|
" <td>39.445654</td>\n",
|
|
" <td>30.606258</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" state-id name type \\\n",
|
|
"universal-id \n",
|
|
"2501042 7050505 Masconomet Regional High School public \n",
|
|
"2500337 350380 Young Achievers Science and Math School public \n",
|
|
"2500402 440017 Kennedy K-5 Elementary School public \n",
|
|
"2501682 3070010 Boyden public \n",
|
|
"2501507 2760305 P. Brent Trottier Middle School public \n",
|
|
"\n",
|
|
" level-codes level city state zip \\\n",
|
|
"universal-id \n",
|
|
"2501042 h 9,10,11,12 Boxford MA 1921 \n",
|
|
"2500337 p,e,m PK,KG,1,2,3,4,5,6,7,8 Mattapan MA 2126 \n",
|
|
"2500402 e KG,1,2,3,4,5 Brockton MA 2301 \n",
|
|
"2501682 e KG,1,2,3,4,5 Walpole MA 2071 \n",
|
|
"2501507 m 6,7,8 Southborough MA 1772 \n",
|
|
"\n",
|
|
" county lat lon \\\n",
|
|
"universal-id \n",
|
|
"2501042 Essex County 42.627754 -70.974693 \n",
|
|
"2500337 Suffolk County 42.282269 -71.095016 \n",
|
|
"2500402 Plymouth County 42.059696 -71.037262 \n",
|
|
"2501682 Norfolk County 42.105808 -71.258743 \n",
|
|
"2501507 Worcester County 42.299240 -71.542259 \n",
|
|
"\n",
|
|
" district-name district-id rating year \\\n",
|
|
"universal-id \n",
|
|
"2501042 Masconomet School District 259 8.0 2021.0 \n",
|
|
"2500337 Boston School District 99 2.0 2021.0 \n",
|
|
"2500402 Brockton School District 111 4.0 2021.0 \n",
|
|
"2501682 Walpole School District 426 6.0 2021.0 \n",
|
|
"2501507 Southborough School District 387 8.0 2021.0 \n",
|
|
"\n",
|
|
" coordinates distance-to-downtown distance-to-work \n",
|
|
"universal-id \n",
|
|
"2501042 (42.627754, -70.974693) 30.005931 28.583420 \n",
|
|
"2500337 (42.282269, -71.095016) 9.673200 24.989359 \n",
|
|
"2500402 (42.059696, -71.037262) 34.339345 49.384728 \n",
|
|
"2501682 (42.105808, -71.258743) 32.933990 40.921772 \n",
|
|
"2501507 (42.29924, -71.542259) 39.445654 30.606258 "
|
|
]
|
|
},
|
|
"execution_count": 74,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
|
|
"\n",
|
|
"boston_df.sample(5)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "4fc861b332db140b7b363b167627eee6a3238262e7c99e0237067fec0875fee7"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3.8.10 ('venv': venv)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.10"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|