Archived
1
0
This repository has been archived on 2025-04-27. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
schools/main/analysis.ipynb
2022-05-29 22:43:39 -04:00

675 lines
28 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from great_schools import get_nearby_schools, get_demographics\n",
"from distance import get_distance\n",
"from secret import get_key\n",
"from district_score import get_overall_rating"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Shaun and Daniela's Boston Public School Analysis\n",
"#### 2021.04.10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fetch the API key from the local filesystem."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# get the API key\n",
"api_key_file = '../keys/api.key'\n",
"api_key = get_key(api_key_file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use the `nearby_schools` API endpoint to grab raw data of all schools within the maximum radius"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Some columns will dropped immediately as pre-processing.\n",
"drops = [\n",
" 'nces-id',\n",
" 'school-summary',\n",
" 'street',\n",
" 'fipscounty',\n",
" 'phone',\n",
" 'fax',\n",
" 'web-site',\n",
" 'overview-url',\n",
" 'rating-description',\n",
" 'distance',\n",
"]\n",
"\n",
"# Grab data for Boston.\n",
"refresh = False\n",
"boston_nearby_schools_file = '../data/nearby_schools/boston.csv'\n",
"if refresh:\n",
" boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n",
" boston_df = pd.DataFrame.from_dict(boston_schools)\n",
" boston_df.drop(columns=drops,inplace=True)\n",
" boston_df.to_csv(boston_nearby_schools_file, )\n",
"else:\n",
" boston_df = pd.read_csv(boston_nearby_schools_file)\n",
" boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
" boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n",
"\n",
"# Grab data for Buffalo.\n",
"refresh = False\n",
"buffalo_nearby_schools_file = '../data/nearby_schools/buffalo.csv'\n",
"if refresh:\n",
" buffalo_schools = get_nearby_schools(api_key,\"42.9625\",\"-78.7425\",\"50\")\n",
" buffalo_df = pd.DataFrame.from_dict(buffalo_schools)\n",
" buffalo_df.drop(columns=drops,inplace=True)\n",
" buffalo_df.to_csv(buffalo_nearby_schools_file)\n",
"else:\n",
" buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n",
" buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
" buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Process the `lat` and `lon` columns from the API output into tuples.\n",
"\n",
"Then create two new columns:\n",
"- Distance to Downtown\n",
"- Distance to Work"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Form tuple to represent coordinates\n",
"boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n",
"#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n",
"\n",
"# Define coordinates of important places\n",
"downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n",
"work=(42.47381059540949, -71.25414135292398) # Hartwell\n",
"\n",
"# Create new columns to tabulate distance to these important places\n",
"boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n",
"boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We should definitely removal all schools that aren't in Massachusetts."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 1789 schools from the original API results.\n",
"Allowing only schools from Massachusetts reduces the dataset to 1375 schools.\n"
]
}
],
"source": [
"print(f'There are {len(boston_df)} schools from the original API results.')\n",
"\n",
"# only allow from MA\n",
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
"print(f'Allowing only schools from Massachusetts reduces the dataset to {len(boston_df)} schools.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many unique district id's are there?"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"There are 230 unique school districts.\n",
"\n"
]
}
],
"source": [
"# get unique districts\n",
"districts = boston_df[\"district-id\"].unique()\n",
"print(f'\\nThere are {len(districts)} unique school districts.\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Which of these districts are close to both work and downtown boston?"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"There are 90 school districts within reasonable proximity to downtown and work.\n",
"\n",
"There are 699 schools within these proximal districts.\n",
"\n"
]
}
],
"source": [
"# calculate distance to PoI using geo-center of districts\n",
"distances_to_downtown = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-downtown']}\n",
"distances_to_work = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-work']}\n",
"\n",
"df_downtown = pd.DataFrame.from_dict(distances_to_downtown, orient='index')\n",
"df_work = pd.DataFrame.from_dict(distances_to_work, orient='index')\n",
"\n",
"# merge these new columns\n",
"both_df = pd.merge(left=df_downtown, right=df_work, how='inner', left_index=True, right_index=True)\n",
"both_df.rename(columns={'0_x': \"downtown\", '0_y': \"work\"}, inplace=True)\n",
"\n",
"both_df = both_df[both_df[\"downtown\"] < 35.0]\n",
"both_df = both_df[both_df[\"work\"] < 20.0]\n",
"\n",
"print(f'\\nThere are {len(both_df)} school districts within reasonable proximity to downtown and work.\\n')\n",
"\n",
"# filter out all schools which aren't in proximal districts\n",
"proximal_district_ids = list(both_df.index)\n",
"boston_df = boston_df[boston_df['district-id'].isin(proximal_district_ids)]\n",
"\n",
"print(f'There are {len(boston_df)} schools within these proximal districts.\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's drop any districts that have an average rating below the school population mean."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Of the remaining 699 schools, the average rating is 5.664546899841017.\n",
"\n",
"There are 56 districts remaining after pruning districts whose collective average is below the population mean rating.\n",
"\n",
"Which are, ['Acton-Boxborough School District', 'Andover School District', 'Arlington Public Schools', 'Ashland School District', 'Assabet Valley Regional Vocational Technical School District', 'Bedford School District', 'Belmont School District', 'Billerica School District', 'Boxford School District', 'Brookline School District', 'Burlington School District', 'Cambridge School District', 'Carlisle School District', 'Chelmsford School District', 'Concord School District', 'Concord-Carlisle School District', 'Dover School District', 'Dover-Sherborn School District', 'Dracut School District', 'Essex North Shore Agricultural and Technical School District', 'Groton-Dunstable School District', 'Harvard School District', 'Lexington School District', 'Lincoln-Sudbury School District', 'Littleton School District', 'Lynnfield School District', 'Marblehead School District', 'Masconomet School District', 'Melrose School District', 'Middleton School District', 'Milton School District', 'Nahant School District', 'Nashoba School District', 'Natick School District', 'Needham School District', 'Newton School District', 'North Andover School District', 'North Reading School District', 'Norwood School District', 'Quincy School District', 'Reading School District', 'Shawsheen Valley Regional Vocational Technical School District', 'Sherborn School District', 'Southborough School District', 'Stoneham School District', 'Sudbury School District', 'Topsfield School District', 'Tyngsborough School District', 'Wakefield School District', 'Wayland School District', 'Wellesley School District', 'Westford School District', 'Weston School District', 'Westwood School District', 'Wilmington School District', 'Winchester School District']\n"
]
}
],
"source": [
"# get the mean rating from the entire population of schools\n",
"mean_rating = boston_df['rating'].mean()\n",
"std_rating = boston_df['rating'].std()\n",
"\n",
"print(f'\\nOf the remaining {len(boston_df)} schools, the average rating is {mean_rating}.')\n",
"\n",
"# compute the average rating for each district\n",
"ave_ratings = {k: np.mean(v) for k, v in boston_df.groupby(by='district-id')['rating']}\n",
"\n",
"# keep only districts that are above the population mean\n",
"not_low_performing = [k for k, v in ave_ratings.items() if v > mean_rating]\n",
"boston_df = boston_df[boston_df['district-id'].isin(not_low_performing)]\n",
"\n",
"districts = sorted(list(boston_df['district-name'].unique()))\n",
"print(f'\\nThere are {len(districts)} districts remaining after pruning districts whose collective average is below the population mean rating.\\n')\n",
"print(f'Which are, {districts}')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>universal-id</th>\n",
" <th>state-id</th>\n",
" <th>type</th>\n",
" <th>level-codes</th>\n",
" <th>level</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>zip</th>\n",
" <th>county</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>district-id</th>\n",
" <th>rating</th>\n",
" <th>year</th>\n",
" <th>coordinates</th>\n",
" <th>distance-to-downtown</th>\n",
" <th>distance-to-work</th>\n",
" </tr>\n",
" <tr>\n",
" <th>district-name</th>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Quincy School District</th>\n",
" <th>Central Middle School</th>\n",
" <td>2501385</td>\n",
" <td>2430315</td>\n",
" <td>public</td>\n",
" <td>m</td>\n",
" <td>6,7,8</td>\n",
" <td>Quincy</td>\n",
" <td>MA</td>\n",
" <td>2170</td>\n",
" <td>Norfolk County</td>\n",
" <td>42.261284</td>\n",
" <td>-71.011436</td>\n",
" <td>349</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.261284, -71.011436)</td>\n",
" <td>7.944981</td>\n",
" <td>19.199411</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Natick School District</th>\n",
" <th>Brown</th>\n",
" <td>2501156</td>\n",
" <td>1980010</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4</td>\n",
" <td>Natick</td>\n",
" <td>MA</td>\n",
" <td>1760</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.292339</td>\n",
" <td>-71.384941</td>\n",
" <td>294</td>\n",
" <td>7.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.292339, -71.384941)</td>\n",
" <td>16.826936</td>\n",
" <td>14.194884</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wakefield School District</th>\n",
" <th>Dolbeare Elementary School</th>\n",
" <td>2501667</td>\n",
" <td>3050005</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4</td>\n",
" <td>Wakefield</td>\n",
" <td>MA</td>\n",
" <td>1880</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.518574</td>\n",
" <td>-71.065102</td>\n",
" <td>424</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.518574, -71.065102)</td>\n",
" <td>10.436914</td>\n",
" <td>10.107841</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Southborough School District</th>\n",
" <th>Albert S. Woodward Memorial School</th>\n",
" <td>2503218</td>\n",
" <td>2760050</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>2,3</td>\n",
" <td>Southborough</td>\n",
" <td>MA</td>\n",
" <td>1772</td>\n",
" <td>Worcester County</td>\n",
" <td>42.304043</td>\n",
" <td>-71.527367</td>\n",
" <td>387</td>\n",
" <td>NaN</td>\n",
" <td>2020.0</td>\n",
" <td>(42.304043, -71.527367)</td>\n",
" <td>23.683438</td>\n",
" <td>18.208098</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wilmington School District</th>\n",
" <th>Wildwood</th>\n",
" <td>2501816</td>\n",
" <td>3420015</td>\n",
" <td>public</td>\n",
" <td>p,e</td>\n",
" <td>PK,KG</td>\n",
" <td>Wilmington</td>\n",
" <td>MA</td>\n",
" <td>1887</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.550678</td>\n",
" <td>-71.153496</td>\n",
" <td>462</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(42.550678, -71.153496)</td>\n",
" <td>13.322938</td>\n",
" <td>7.376087</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" universal-id \\\n",
"district-name name \n",
"Quincy School District Central Middle School 2501385 \n",
"Natick School District Brown 2501156 \n",
"Wakefield School District Dolbeare Elementary School 2501667 \n",
"Southborough School District Albert S. Woodward Memorial School 2503218 \n",
"Wilmington School District Wildwood 2501816 \n",
"\n",
" state-id \\\n",
"district-name name \n",
"Quincy School District Central Middle School 2430315 \n",
"Natick School District Brown 1980010 \n",
"Wakefield School District Dolbeare Elementary School 3050005 \n",
"Southborough School District Albert S. Woodward Memorial School 2760050 \n",
"Wilmington School District Wildwood 3420015 \n",
"\n",
" type \\\n",
"district-name name \n",
"Quincy School District Central Middle School public \n",
"Natick School District Brown public \n",
"Wakefield School District Dolbeare Elementary School public \n",
"Southborough School District Albert S. Woodward Memorial School public \n",
"Wilmington School District Wildwood public \n",
"\n",
" level-codes \\\n",
"district-name name \n",
"Quincy School District Central Middle School m \n",
"Natick School District Brown e \n",
"Wakefield School District Dolbeare Elementary School e \n",
"Southborough School District Albert S. Woodward Memorial School e \n",
"Wilmington School District Wildwood p,e \n",
"\n",
" level \\\n",
"district-name name \n",
"Quincy School District Central Middle School 6,7,8 \n",
"Natick School District Brown KG,1,2,3,4 \n",
"Wakefield School District Dolbeare Elementary School KG,1,2,3,4 \n",
"Southborough School District Albert S. Woodward Memorial School 2,3 \n",
"Wilmington School District Wildwood PK,KG \n",
"\n",
" city \\\n",
"district-name name \n",
"Quincy School District Central Middle School Quincy \n",
"Natick School District Brown Natick \n",
"Wakefield School District Dolbeare Elementary School Wakefield \n",
"Southborough School District Albert S. Woodward Memorial School Southborough \n",
"Wilmington School District Wildwood Wilmington \n",
"\n",
" state zip \\\n",
"district-name name \n",
"Quincy School District Central Middle School MA 2170 \n",
"Natick School District Brown MA 1760 \n",
"Wakefield School District Dolbeare Elementary School MA 1880 \n",
"Southborough School District Albert S. Woodward Memorial School MA 1772 \n",
"Wilmington School District Wildwood MA 1887 \n",
"\n",
" county \\\n",
"district-name name \n",
"Quincy School District Central Middle School Norfolk County \n",
"Natick School District Brown Middlesex County \n",
"Wakefield School District Dolbeare Elementary School Middlesex County \n",
"Southborough School District Albert S. Woodward Memorial School Worcester County \n",
"Wilmington School District Wildwood Middlesex County \n",
"\n",
" lat \\\n",
"district-name name \n",
"Quincy School District Central Middle School 42.261284 \n",
"Natick School District Brown 42.292339 \n",
"Wakefield School District Dolbeare Elementary School 42.518574 \n",
"Southborough School District Albert S. Woodward Memorial School 42.304043 \n",
"Wilmington School District Wildwood 42.550678 \n",
"\n",
" lon \\\n",
"district-name name \n",
"Quincy School District Central Middle School -71.011436 \n",
"Natick School District Brown -71.384941 \n",
"Wakefield School District Dolbeare Elementary School -71.065102 \n",
"Southborough School District Albert S. Woodward Memorial School -71.527367 \n",
"Wilmington School District Wildwood -71.153496 \n",
"\n",
" district-id \\\n",
"district-name name \n",
"Quincy School District Central Middle School 349 \n",
"Natick School District Brown 294 \n",
"Wakefield School District Dolbeare Elementary School 424 \n",
"Southborough School District Albert S. Woodward Memorial School 387 \n",
"Wilmington School District Wildwood 462 \n",
"\n",
" rating \\\n",
"district-name name \n",
"Quincy School District Central Middle School 8.0 \n",
"Natick School District Brown 7.0 \n",
"Wakefield School District Dolbeare Elementary School 8.0 \n",
"Southborough School District Albert S. Woodward Memorial School NaN \n",
"Wilmington School District Wildwood NaN \n",
"\n",
" year \\\n",
"district-name name \n",
"Quincy School District Central Middle School 2021.0 \n",
"Natick School District Brown 2021.0 \n",
"Wakefield School District Dolbeare Elementary School 2021.0 \n",
"Southborough School District Albert S. Woodward Memorial School 2020.0 \n",
"Wilmington School District Wildwood NaN \n",
"\n",
" coordinates \\\n",
"district-name name \n",
"Quincy School District Central Middle School (42.261284, -71.011436) \n",
"Natick School District Brown (42.292339, -71.384941) \n",
"Wakefield School District Dolbeare Elementary School (42.518574, -71.065102) \n",
"Southborough School District Albert S. Woodward Memorial School (42.304043, -71.527367) \n",
"Wilmington School District Wildwood (42.550678, -71.153496) \n",
"\n",
" distance-to-downtown \\\n",
"district-name name \n",
"Quincy School District Central Middle School 7.944981 \n",
"Natick School District Brown 16.826936 \n",
"Wakefield School District Dolbeare Elementary School 10.436914 \n",
"Southborough School District Albert S. Woodward Memorial School 23.683438 \n",
"Wilmington School District Wildwood 13.322938 \n",
"\n",
" distance-to-work \n",
"district-name name \n",
"Quincy School District Central Middle School 19.199411 \n",
"Natick School District Brown 14.194884 \n",
"Wakefield School District Dolbeare Elementary School 10.107841 \n",
"Southborough School District Albert S. Woodward Memorial School 18.208098 \n",
"Wilmington School District Wildwood 7.376087 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boston_df.reset_index(inplace=True)\n",
"boston_df.set_index(['district-name','name'],inplace=True)\n",
"boston_df.sort_index(inplace=True)\n",
"boston_df.sample(5)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#print(boston_df['level-codes'].unique()) "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"#boston_df.loc[\"Quincy School District\"]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from district_score import get_overall_rating\n",
"\n",
"overall_df = get_overall_rating(boston_df)\n",
"overall_df = overall_df.set_index(keys='district-name', drop=True)\n",
"overall_df.sort_values(by='weighted_ave', ascending=False, inplace=True)\n",
"overall_df.reset_index(inplace=True)\n",
"overall_df.index += 1\n",
"#print(overall_df.to_markdown())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'message': 'Too Many Requests'}\n"
]
}
],
"source": [
"boston_schools = get_demographics(api_key,'2500919')"
]
}
],
"metadata": {
"interpreter": {
"hash": "4fc861b332db140b7b363b167627eee6a3238262e7c99e0237067fec0875fee7"
},
"kernelspec": {
"display_name": "Python 3.8.10 ('venv': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}