Archived
1
0
This repository has been archived on 2025-04-27. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
schools/main/analysis.ipynb
2022-05-15 10:19:28 -04:00

1007 lines
55 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from great_schools import get_nearby_schools\n",
"from distance import get_distance\n",
"from secret import get_key\n",
"from district_score import get_overall_rating"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Shaun and Daniela's Boston Public School Analysis\n",
"#### 2021.04.10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fetch the API key from the local filesystem."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# get the API key\n",
"api_key_file = '../keys/api.key'\n",
"api_key = get_key(api_key_file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use the `nearby_schools` API endpoint to grab raw data of all schools within the maximum radius"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Some columns will dropped immediately as pre-processing.\n",
"drops = [\n",
" 'nces-id',\n",
" 'school-summary',\n",
" 'street',\n",
" 'fipscounty',\n",
" 'phone',\n",
" 'fax',\n",
" 'web-site',\n",
" 'overview-url',\n",
" 'rating-description',\n",
" 'distance',\n",
"]\n",
"\n",
"# Grab data for Boston.\n",
"refresh = False\n",
"boston_nearby_schools_file = '../data/nearby_schools/boston.csv'\n",
"if refresh:\n",
" boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n",
" boston_df = pd.DataFrame.from_dict(boston_schools)\n",
" boston_df.drop(columns=drops,inplace=True)\n",
" boston_df.to_csv(boston_nearby_schools_file, )\n",
"else:\n",
" boston_df = pd.read_csv(boston_nearby_schools_file)\n",
" boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
" boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n",
"\n",
"# Grab data for Buffalo.\n",
"refresh = False\n",
"buffalo_nearby_schools_file = '../data/nearby_schools/buffalo.csv'\n",
"if refresh:\n",
" buffalo_schools = get_nearby_schools(api_key,\"42.9625\",\"-78.7425\",\"50\")\n",
" buffalo_df = pd.DataFrame.from_dict(buffalo_schools)\n",
" buffalo_df.drop(columns=drops,inplace=True)\n",
" buffalo_df.to_csv(buffalo_nearby_schools_file)\n",
"else:\n",
" buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n",
" buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
" buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Process the `lat` and `lon` columns from the API output into tuples.\n",
"\n",
"Then create two new columns:\n",
"- Distance to Downtown\n",
"- Distance to Work"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Form tuple to represent coordinates\n",
"boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n",
"#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n",
"\n",
"# Define coordinates of important places\n",
"downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n",
"work=(42.47381059540949, -71.25414135292398) # Hartwell\n",
"\n",
"# Create new columns to tabulate distance to these important places\n",
"boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n",
"boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We should definitely removal all schools that aren't in Massachusetts."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 1789 schools from the original API results.\n",
"Allowing only schools from Massachusetts reduces the dataset to 1375 schools.\n"
]
}
],
"source": [
"print(f'There are {len(boston_df)} schools from the original API results.')\n",
"\n",
"# only allow from MA\n",
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
"print(f'Allowing only schools from Massachusetts reduces the dataset to {len(boston_df)} schools.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many unique district id's are there?"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"There are 230 unique school districts.\n",
"\n"
]
}
],
"source": [
"# get unique districts\n",
"districts = boston_df[\"district-id\"].unique()\n",
"print(f'\\nThere are {len(districts)} unique school districts.\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Which of these districts are close to both work and downtown boston?"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"There are 90 school districts within reasonable proximity to downtown and work.\n",
"\n",
"There are 699 schools within these proximal districts.\n",
"\n"
]
}
],
"source": [
"# calculate distance to PoI using geo-center of districts\n",
"distances_to_downtown = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-downtown']}\n",
"distances_to_work = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-work']}\n",
"\n",
"df_downtown = pd.DataFrame.from_dict(distances_to_downtown, orient='index')\n",
"df_work = pd.DataFrame.from_dict(distances_to_work, orient='index')\n",
"\n",
"# merge these new columns\n",
"both_df = pd.merge(left=df_downtown, right=df_work, how='inner', left_index=True, right_index=True)\n",
"both_df.rename(columns={'0_x': \"downtown\", '0_y': \"work\"}, inplace=True)\n",
"\n",
"both_df = both_df[both_df[\"downtown\"] < 35.0]\n",
"both_df = both_df[both_df[\"work\"] < 20.0]\n",
"\n",
"print(f'\\nThere are {len(both_df)} school districts within reasonable proximity to downtown and work.\\n')\n",
"\n",
"# filter out all schools which aren't in proximal districts\n",
"proximal_district_ids = list(both_df.index)\n",
"boston_df = boston_df[boston_df['district-id'].isin(proximal_district_ids)]\n",
"\n",
"print(f'There are {len(boston_df)} schools within these proximal districts.\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's drop any districts that have an average rating below the school population mean."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Of the remaining 699 schools, the average rating is 5.664546899841017.\n",
"\n",
"There are 56 districts remaining after pruning districts whose collective average is below the population mean rating.\n",
"\n",
"Which are, ['Acton-Boxborough School District', 'Andover School District', 'Arlington Public Schools', 'Ashland School District', 'Assabet Valley Regional Vocational Technical School District', 'Bedford School District', 'Belmont School District', 'Billerica School District', 'Boxford School District', 'Brookline School District', 'Burlington School District', 'Cambridge School District', 'Carlisle School District', 'Chelmsford School District', 'Concord School District', 'Concord-Carlisle School District', 'Dover School District', 'Dover-Sherborn School District', 'Dracut School District', 'Essex North Shore Agricultural and Technical School District', 'Groton-Dunstable School District', 'Harvard School District', 'Lexington School District', 'Lincoln-Sudbury School District', 'Littleton School District', 'Lynnfield School District', 'Marblehead School District', 'Masconomet School District', 'Melrose School District', 'Middleton School District', 'Milton School District', 'Nahant School District', 'Nashoba School District', 'Natick School District', 'Needham School District', 'Newton School District', 'North Andover School District', 'North Reading School District', 'Norwood School District', 'Quincy School District', 'Reading School District', 'Shawsheen Valley Regional Vocational Technical School District', 'Sherborn School District', 'Southborough School District', 'Stoneham School District', 'Sudbury School District', 'Topsfield School District', 'Tyngsborough School District', 'Wakefield School District', 'Wayland School District', 'Wellesley School District', 'Westford School District', 'Weston School District', 'Westwood School District', 'Wilmington School District', 'Winchester School District']\n"
]
}
],
"source": [
"# get the mean rating from the entire population of schools\n",
"mean_rating = boston_df['rating'].mean()\n",
"std_rating = boston_df['rating'].std()\n",
"\n",
"print(f'\\nOf the remaining {len(boston_df)} schools, the average rating is {mean_rating}.')\n",
"\n",
"# compute the average rating for each district\n",
"ave_ratings = {k: np.mean(v) for k, v in boston_df.groupby(by='district-id')['rating']}\n",
"\n",
"# keep only districts that are above the population mean\n",
"not_low_performing = [k for k, v in ave_ratings.items() if v > mean_rating]\n",
"boston_df = boston_df[boston_df['district-id'].isin(not_low_performing)]\n",
"\n",
"districts = sorted(list(boston_df['district-name'].unique()))\n",
"print(f'\\nThere are {len(districts)} districts remaining after pruning districts whose collective average is below the population mean rating.\\n')\n",
"print(f'Which are, {districts}')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>state-id</th>\n",
" <th>type</th>\n",
" <th>level-codes</th>\n",
" <th>level</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>zip</th>\n",
" <th>county</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>district-id</th>\n",
" <th>rating</th>\n",
" <th>year</th>\n",
" <th>coordinates</th>\n",
" <th>distance-to-downtown</th>\n",
" <th>distance-to-work</th>\n",
" </tr>\n",
" <tr>\n",
" <th>district-name</th>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">Acton-Boxborough School District</th>\n",
" <th>Acton-Boxborough Regional High School</th>\n",
" <td>6000505</td>\n",
" <td>public</td>\n",
" <td>h</td>\n",
" <td>9,10,11,12,UG</td>\n",
" <td>Acton</td>\n",
" <td>MA</td>\n",
" <td>1720</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.479694</td>\n",
" <td>-71.458084</td>\n",
" <td>59</td>\n",
" <td>9.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.479694, -71.458084)</td>\n",
" <td>21.179084</td>\n",
" <td>10.393596</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Blanchard Memorial School</th>\n",
" <td>6000005</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5,6</td>\n",
" <td>Boxborough</td>\n",
" <td>MA</td>\n",
" <td>1719</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.482014</td>\n",
" <td>-71.505814</td>\n",
" <td>59</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.482014, -71.505814)</td>\n",
" <td>23.513033</td>\n",
" <td>12.828547</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C.T. Douglas Elementary School</th>\n",
" <td>6000020</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5,6</td>\n",
" <td>Acton</td>\n",
" <td>MA</td>\n",
" <td>1720</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.481873</td>\n",
" <td>-71.471588</td>\n",
" <td>59</td>\n",
" <td>6.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.481873, -71.471588)</td>\n",
" <td>21.874635</td>\n",
" <td>11.087128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Luther Conant School</th>\n",
" <td>6000030</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5,6</td>\n",
" <td>Acton</td>\n",
" <td>MA</td>\n",
" <td>1720</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.475239</td>\n",
" <td>-71.436340</td>\n",
" <td>59</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.475239, -71.43634)</td>\n",
" <td>20.036107</td>\n",
" <td>9.279210</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mccarthy-Towne School</th>\n",
" <td>6000015</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5,6</td>\n",
" <td>Acton</td>\n",
" <td>MA</td>\n",
" <td>1720</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.476936</td>\n",
" <td>-71.453590</td>\n",
" <td>59</td>\n",
" <td>5.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.476936, -71.45359)</td>\n",
" <td>20.896860</td>\n",
" <td>10.159317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">Winchester School District</th>\n",
" <th>Lynch Elementary School</th>\n",
" <td>3440020</td>\n",
" <td>public</td>\n",
" <td>p,e</td>\n",
" <td>PK,KG,1,2,3,4,5</td>\n",
" <td>Winchester</td>\n",
" <td>MA</td>\n",
" <td>1890</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.460964</td>\n",
" <td>-71.150330</td>\n",
" <td>464</td>\n",
" <td>7.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.460964, -71.15033)</td>\n",
" <td>7.607210</td>\n",
" <td>5.361206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mccall Middle School</th>\n",
" <td>3440305</td>\n",
" <td>public</td>\n",
" <td>m</td>\n",
" <td>6,7,8</td>\n",
" <td>Winchester</td>\n",
" <td>MA</td>\n",
" <td>1890</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.449566</td>\n",
" <td>-71.134735</td>\n",
" <td>464</td>\n",
" <td>7.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.449566, -71.134735)</td>\n",
" <td>6.523996</td>\n",
" <td>6.308317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Muraco Elementary School</th>\n",
" <td>3440040</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5</td>\n",
" <td>Winchester</td>\n",
" <td>MA</td>\n",
" <td>1890</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.463272</td>\n",
" <td>-71.131409</td>\n",
" <td>464</td>\n",
" <td>9.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.463272, -71.131409)</td>\n",
" <td>7.288029</td>\n",
" <td>6.293099</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Vinson-Owen Elementary School</th>\n",
" <td>3440025</td>\n",
" <td>public</td>\n",
" <td>p,e</td>\n",
" <td>PK,KG,1,2,3,4,5</td>\n",
" <td>Winchester</td>\n",
" <td>MA</td>\n",
" <td>1890</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.449741</td>\n",
" <td>-71.175018</td>\n",
" <td>464</td>\n",
" <td>9.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.449741, -71.175018)</td>\n",
" <td>7.758212</td>\n",
" <td>4.359471</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Winchester High School</th>\n",
" <td>3440505</td>\n",
" <td>public</td>\n",
" <td>h</td>\n",
" <td>9,10,11,12</td>\n",
" <td>Winchester</td>\n",
" <td>MA</td>\n",
" <td>1890</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.455719</td>\n",
" <td>-71.134201</td>\n",
" <td>464</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.455719, -71.134201)</td>\n",
" <td>6.883474</td>\n",
" <td>6.235460</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>325 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" state-id \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 6000505 \n",
" Blanchard Memorial School 6000005 \n",
" C.T. Douglas Elementary School 6000020 \n",
" Luther Conant School 6000030 \n",
" Mccarthy-Towne School 6000015 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 3440020 \n",
" Mccall Middle School 3440305 \n",
" Muraco Elementary School 3440040 \n",
" Vinson-Owen Elementary School 3440025 \n",
" Winchester High School 3440505 \n",
"\n",
" type \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School public \n",
" Blanchard Memorial School public \n",
" C.T. Douglas Elementary School public \n",
" Luther Conant School public \n",
" Mccarthy-Towne School public \n",
"... ... \n",
"Winchester School District Lynch Elementary School public \n",
" Mccall Middle School public \n",
" Muraco Elementary School public \n",
" Vinson-Owen Elementary School public \n",
" Winchester High School public \n",
"\n",
" level-codes \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School h \n",
" Blanchard Memorial School e \n",
" C.T. Douglas Elementary School e \n",
" Luther Conant School e \n",
" Mccarthy-Towne School e \n",
"... ... \n",
"Winchester School District Lynch Elementary School p,e \n",
" Mccall Middle School m \n",
" Muraco Elementary School e \n",
" Vinson-Owen Elementary School p,e \n",
" Winchester High School h \n",
"\n",
" level \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 9,10,11,12,UG \n",
" Blanchard Memorial School KG,1,2,3,4,5,6 \n",
" C.T. Douglas Elementary School KG,1,2,3,4,5,6 \n",
" Luther Conant School KG,1,2,3,4,5,6 \n",
" Mccarthy-Towne School KG,1,2,3,4,5,6 \n",
"... ... \n",
"Winchester School District Lynch Elementary School PK,KG,1,2,3,4,5 \n",
" Mccall Middle School 6,7,8 \n",
" Muraco Elementary School KG,1,2,3,4,5 \n",
" Vinson-Owen Elementary School PK,KG,1,2,3,4,5 \n",
" Winchester High School 9,10,11,12 \n",
"\n",
" city \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School Acton \n",
" Blanchard Memorial School Boxborough \n",
" C.T. Douglas Elementary School Acton \n",
" Luther Conant School Acton \n",
" Mccarthy-Towne School Acton \n",
"... ... \n",
"Winchester School District Lynch Elementary School Winchester \n",
" Mccall Middle School Winchester \n",
" Muraco Elementary School Winchester \n",
" Vinson-Owen Elementary School Winchester \n",
" Winchester High School Winchester \n",
"\n",
" state \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School MA \n",
" Blanchard Memorial School MA \n",
" C.T. Douglas Elementary School MA \n",
" Luther Conant School MA \n",
" Mccarthy-Towne School MA \n",
"... ... \n",
"Winchester School District Lynch Elementary School MA \n",
" Mccall Middle School MA \n",
" Muraco Elementary School MA \n",
" Vinson-Owen Elementary School MA \n",
" Winchester High School MA \n",
"\n",
" zip \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 1720 \n",
" Blanchard Memorial School 1719 \n",
" C.T. Douglas Elementary School 1720 \n",
" Luther Conant School 1720 \n",
" Mccarthy-Towne School 1720 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 1890 \n",
" Mccall Middle School 1890 \n",
" Muraco Elementary School 1890 \n",
" Vinson-Owen Elementary School 1890 \n",
" Winchester High School 1890 \n",
"\n",
" county \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School Middlesex County \n",
" Blanchard Memorial School Middlesex County \n",
" C.T. Douglas Elementary School Middlesex County \n",
" Luther Conant School Middlesex County \n",
" Mccarthy-Towne School Middlesex County \n",
"... ... \n",
"Winchester School District Lynch Elementary School Middlesex County \n",
" Mccall Middle School Middlesex County \n",
" Muraco Elementary School Middlesex County \n",
" Vinson-Owen Elementary School Middlesex County \n",
" Winchester High School Middlesex County \n",
"\n",
" lat \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 42.479694 \n",
" Blanchard Memorial School 42.482014 \n",
" C.T. Douglas Elementary School 42.481873 \n",
" Luther Conant School 42.475239 \n",
" Mccarthy-Towne School 42.476936 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 42.460964 \n",
" Mccall Middle School 42.449566 \n",
" Muraco Elementary School 42.463272 \n",
" Vinson-Owen Elementary School 42.449741 \n",
" Winchester High School 42.455719 \n",
"\n",
" lon \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School -71.458084 \n",
" Blanchard Memorial School -71.505814 \n",
" C.T. Douglas Elementary School -71.471588 \n",
" Luther Conant School -71.436340 \n",
" Mccarthy-Towne School -71.453590 \n",
"... ... \n",
"Winchester School District Lynch Elementary School -71.150330 \n",
" Mccall Middle School -71.134735 \n",
" Muraco Elementary School -71.131409 \n",
" Vinson-Owen Elementary School -71.175018 \n",
" Winchester High School -71.134201 \n",
"\n",
" district-id \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 59 \n",
" Blanchard Memorial School 59 \n",
" C.T. Douglas Elementary School 59 \n",
" Luther Conant School 59 \n",
" Mccarthy-Towne School 59 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 464 \n",
" Mccall Middle School 464 \n",
" Muraco Elementary School 464 \n",
" Vinson-Owen Elementary School 464 \n",
" Winchester High School 464 \n",
"\n",
" rating \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 9.0 \n",
" Blanchard Memorial School 8.0 \n",
" C.T. Douglas Elementary School 6.0 \n",
" Luther Conant School 8.0 \n",
" Mccarthy-Towne School 5.0 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 7.0 \n",
" Mccall Middle School 7.0 \n",
" Muraco Elementary School 9.0 \n",
" Vinson-Owen Elementary School 9.0 \n",
" Winchester High School 8.0 \n",
"\n",
" year \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 2021.0 \n",
" Blanchard Memorial School 2021.0 \n",
" C.T. Douglas Elementary School 2021.0 \n",
" Luther Conant School 2021.0 \n",
" Mccarthy-Towne School 2021.0 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 2021.0 \n",
" Mccall Middle School 2021.0 \n",
" Muraco Elementary School 2021.0 \n",
" Vinson-Owen Elementary School 2021.0 \n",
" Winchester High School 2021.0 \n",
"\n",
" coordinates \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School (42.479694, -71.458084) \n",
" Blanchard Memorial School (42.482014, -71.505814) \n",
" C.T. Douglas Elementary School (42.481873, -71.471588) \n",
" Luther Conant School (42.475239, -71.43634) \n",
" Mccarthy-Towne School (42.476936, -71.45359) \n",
"... ... \n",
"Winchester School District Lynch Elementary School (42.460964, -71.15033) \n",
" Mccall Middle School (42.449566, -71.134735) \n",
" Muraco Elementary School (42.463272, -71.131409) \n",
" Vinson-Owen Elementary School (42.449741, -71.175018) \n",
" Winchester High School (42.455719, -71.134201) \n",
"\n",
" distance-to-downtown \\\n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 21.179084 \n",
" Blanchard Memorial School 23.513033 \n",
" C.T. Douglas Elementary School 21.874635 \n",
" Luther Conant School 20.036107 \n",
" Mccarthy-Towne School 20.896860 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 7.607210 \n",
" Mccall Middle School 6.523996 \n",
" Muraco Elementary School 7.288029 \n",
" Vinson-Owen Elementary School 7.758212 \n",
" Winchester High School 6.883474 \n",
"\n",
" distance-to-work \n",
"district-name name \n",
"Acton-Boxborough School District Acton-Boxborough Regional High School 10.393596 \n",
" Blanchard Memorial School 12.828547 \n",
" C.T. Douglas Elementary School 11.087128 \n",
" Luther Conant School 9.279210 \n",
" Mccarthy-Towne School 10.159317 \n",
"... ... \n",
"Winchester School District Lynch Elementary School 5.361206 \n",
" Mccall Middle School 6.308317 \n",
" Muraco Elementary School 6.293099 \n",
" Vinson-Owen Elementary School 4.359471 \n",
" Winchester High School 6.235460 \n",
"\n",
"[325 rows x 16 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boston_df.set_index(['district-name','name'],inplace=True)\n",
"boston_df.sort_index(inplace=True)\n",
"boston_df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['h' 'e' 'm' 'e,m' 'p,h' 'p,e' 'p,e,m' 'p,e,h' 'm,h']\n"
]
}
],
"source": [
"print(boston_df['level-codes'].unique()) "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state-id</th>\n",
" <th>type</th>\n",
" <th>level-codes</th>\n",
" <th>level</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>zip</th>\n",
" <th>county</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>district-id</th>\n",
" <th>rating</th>\n",
" <th>year</th>\n",
" <th>coordinates</th>\n",
" <th>distance-to-downtown</th>\n",
" <th>distance-to-work</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Harry Lee Cole</th>\n",
" <td>380005</td>\n",
" <td>public</td>\n",
" <td>p,e</td>\n",
" <td>PK,KG,1,2</td>\n",
" <td>Boxford</td>\n",
" <td>MA</td>\n",
" <td>1921</td>\n",
" <td>Essex County</td>\n",
" <td>42.660408</td>\n",
" <td>-71.001770</td>\n",
" <td>102</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(42.660408, -71.00177)</td>\n",
" <td>20.532659</td>\n",
" <td>18.184645</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Spofford Pond</th>\n",
" <td>380013</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>3,4,5,6</td>\n",
" <td>Boxford</td>\n",
" <td>MA</td>\n",
" <td>1921</td>\n",
" <td>Essex County</td>\n",
" <td>42.697018</td>\n",
" <td>-71.017365</td>\n",
" <td>102</td>\n",
" <td>7.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.697018, -71.017365)</td>\n",
" <td>22.917933</td>\n",
" <td>19.554889</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" state-id type level-codes level city state zip \\\n",
"name \n",
"Harry Lee Cole 380005 public p,e PK,KG,1,2 Boxford MA 1921 \n",
"Spofford Pond 380013 public e 3,4,5,6 Boxford MA 1921 \n",
"\n",
" county lat lon district-id rating \\\n",
"name \n",
"Harry Lee Cole Essex County 42.660408 -71.001770 102 NaN \n",
"Spofford Pond Essex County 42.697018 -71.017365 102 7.0 \n",
"\n",
" year coordinates distance-to-downtown \\\n",
"name \n",
"Harry Lee Cole NaN (42.660408, -71.00177) 20.532659 \n",
"Spofford Pond 2021.0 (42.697018, -71.017365) 22.917933 \n",
"\n",
" distance-to-work \n",
"name \n",
"Harry Lee Cole 18.184645 \n",
"Spofford Pond 19.554889 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boston_df.loc[\"Boxford School District\"]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "If using all scalar values, you must pass an index",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/shaun/Code/git/schools/main/analysis.ipynb Cell 20'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/shaun/Code/git/schools/main/analysis.ipynb#ch0000020?line=0'>1</a>\u001b[0m get_overall_rating(boston_df)\n",
"File \u001b[0;32m~/Code/git/schools/main/district_score.py:56\u001b[0m, in \u001b[0;36mget_overall_rating\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/main/district_score.py?line=53'>54</a>\u001b[0m district_dict \u001b[39m=\u001b[39m {\u001b[39m'\u001b[39m\u001b[39mdistrict-name\u001b[39m\u001b[39m'\u001b[39m: district}\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/main/district_score.py?line=54'>55</a>\u001b[0m district_dict \u001b[39m=\u001b[39m {\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mdistrict_dict, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mstats}\n\u001b[0;32m---> <a href='file:///home/shaun/Code/git/schools/main/district_score.py?line=55'>56</a>\u001b[0m overall \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame(district_dict)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/main/district_score.py?line=56'>57</a>\u001b[0m \u001b[39mprint\u001b[39m(overall)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/main/district_score.py?line=57'>58</a>\u001b[0m \u001b[39mreturn\u001b[39;00m\n",
"File \u001b[0;32m~/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py:636\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=629'>630</a>\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=630'>631</a>\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=631'>632</a>\u001b[0m )\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=633'>634</a>\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=634'>635</a>\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[0;32m--> <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=635'>636</a>\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39;49mdtype, copy\u001b[39m=\u001b[39;49mcopy, typ\u001b[39m=\u001b[39;49mmanager)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=636'>637</a>\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/frame.py?line=637'>638</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmrecords\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mmrecords\u001b[39;00m\n",
"File \u001b[0;32m~/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py:502\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=493'>494</a>\u001b[0m arrays \u001b[39m=\u001b[39m [\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=494'>495</a>\u001b[0m x\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=495'>496</a>\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mor\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(x\u001b[39m.\u001b[39mdtype, ExtensionDtype)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=496'>497</a>\u001b[0m \u001b[39melse\u001b[39;00m x\u001b[39m.\u001b[39mcopy()\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=497'>498</a>\u001b[0m \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=498'>499</a>\u001b[0m ]\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=499'>500</a>\u001b[0m \u001b[39m# TODO: can we get rid of the dt64tz special case above?\u001b[39;00m\n\u001b[0;32m--> <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=501'>502</a>\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39;49mdtype, typ\u001b[39m=\u001b[39;49mtyp, consolidate\u001b[39m=\u001b[39;49mcopy)\n",
"File \u001b[0;32m~/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py:120\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=116'>117</a>\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=117'>118</a>\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=118'>119</a>\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=119'>120</a>\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=120'>121</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=121'>122</a>\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n",
"File \u001b[0;32m~/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py:664\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=660'>661</a>\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mPer-column arrays must each be 1-dimensional\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=662'>663</a>\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m indexes \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m raw_lengths:\n\u001b[0;32m--> <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=663'>664</a>\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mIf using all scalar values, you must pass an index\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=665'>666</a>\u001b[0m \u001b[39melif\u001b[39;00m have_series:\n\u001b[1;32m <a href='file:///home/shaun/Code/git/schools/venv/lib64/python3.10/site-packages/pandas/core/internals/construction.py?line=666'>667</a>\u001b[0m index \u001b[39m=\u001b[39m union_indexes(indexes)\n",
"\u001b[0;31mValueError\u001b[0m: If using all scalar values, you must pass an index"
]
}
],
"source": [
"get_overall_rating(boston_df)"
]
}
],
"metadata": {
"interpreter": {
"hash": "dae3ff348533214ca96918f3de2a71cf7a825bd7ff89c94afcc9465db0c546d9"
},
"kernelspec": {
"display_name": "Python 3.10.4 ('venv': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}