diff --git a/main/analysis.ipynb b/main/analysis.ipynb
index 8c6ca6c..853617f 100644
--- a/main/analysis.ipynb
+++ b/main/analysis.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -50,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -108,7 +108,7 @@
},
{
"cell_type": "code",
- "execution_count": 73,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
@@ -129,14 +129,81 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "For Boston, drop all schools that aren't in Massachusetts."
+ "We should definitely removal all schools that aren't in Massachusetts."
]
},
{
"cell_type": "code",
- "execution_count": 74,
+ "execution_count": 36,
"metadata": {},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "There are 1789 schools from the original API results.\n",
+ "Allowing only schools from Massachusetts reduces the dataset to 1375 schools.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'There are {len(boston_df)} schools from the original API results.')\n",
+ "\n",
+ "# only allow from MA\n",
+ "boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
+ "print(f'Allowing only schools from Massachusetts reduces the dataset to {len(boston_df)} schools.')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "How many unique district id's are there?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "There are 230 unique school districts.\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# get unique districts\n",
+ "districts = boston_df[\"district-id\"].unique()\n",
+ "print(f'\\nThere are {len(districts)} unique school districts.\\n')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Which of these districts are close to both work and downtown boston?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "There are 116 school districts within reasonable proximity to downtown and work.\n",
+ "\n",
+ "There are 820 schools within these proximal districts.\n",
+ "\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -201,165 +268,342 @@
" \n",
"
\n",
" \n",
- " | 2501042 | \n",
- " 7050505 | \n",
- " Masconomet Regional High School | \n",
+ " 2500363 | \n",
+ " 380013 | \n",
+ " Spofford Pond | \n",
" public | \n",
- " h | \n",
- " 9,10,11,12 | \n",
+ " e | \n",
+ " 3,4,5,6 | \n",
" Boxford | \n",
" MA | \n",
" 1921 | \n",
" Essex County | \n",
- " 42.627754 | \n",
- " -70.974693 | \n",
- " Masconomet School District | \n",
- " 259 | \n",
- " 8.0 | \n",
+ " 42.697018 | \n",
+ " -71.017365 | \n",
+ " Boxford School District | \n",
+ " 102 | \n",
+ " 7.0 | \n",
" 2021.0 | \n",
- " (42.627754, -70.974693) | \n",
- " 30.005931 | \n",
- " 28.583420 | \n",
+ " (42.697018, -71.017365) | \n",
+ " 22.917933 | \n",
+ " 19.554889 | \n",
"
\n",
" \n",
- " | 2500337 | \n",
- " 350380 | \n",
- " Young Achievers Science and Math School | \n",
+ " 2506356 | \n",
+ " 100305 | \n",
+ " Gibbs School | \n",
" public | \n",
- " p,e,m | \n",
- " PK,KG,1,2,3,4,5,6,7,8 | \n",
- " Mattapan | \n",
+ " e,m | \n",
+ " 6 | \n",
+ " Arlington | \n",
" MA | \n",
- " 2126 | \n",
- " Suffolk County | \n",
- " 42.282269 | \n",
- " -71.095016 | \n",
- " Boston School District | \n",
- " 99 | \n",
- " 2.0 | \n",
+ " 2474 | \n",
+ " Middlesex County | \n",
+ " 42.410576 | \n",
+ " -71.145081 | \n",
+ " Arlington Public Schools | \n",
+ " 69 | \n",
+ " 7.0 | \n",
" 2021.0 | \n",
- " (42.282269, -71.095016) | \n",
- " 9.673200 | \n",
- " 24.989359 | \n",
+ " (42.410576, -71.145081) | \n",
+ " 4.794958 | \n",
+ " 7.066929 | \n",
"
\n",
" \n",
- " | 2500402 | \n",
- " 440017 | \n",
- " Kennedy K-5 Elementary School | \n",
- " public | \n",
- " e | \n",
- " KG,1,2,3,4,5 | \n",
- " Brockton | \n",
- " MA | \n",
- " 2301 | \n",
- " Plymouth County | \n",
- " 42.059696 | \n",
- " -71.037262 | \n",
- " Brockton School District | \n",
- " 111 | \n",
- " 4.0 | \n",
- " 2021.0 | \n",
- " (42.059696, -71.037262) | \n",
- " 34.339345 | \n",
- " 49.384728 | \n",
- "
\n",
- " \n",
- " | 2501682 | \n",
- " 3070010 | \n",
- " Boyden | \n",
- " public | \n",
- " e | \n",
- " KG,1,2,3,4,5 | \n",
- " Walpole | \n",
- " MA | \n",
- " 2071 | \n",
- " Norfolk County | \n",
- " 42.105808 | \n",
- " -71.258743 | \n",
- " Walpole School District | \n",
- " 426 | \n",
- " 6.0 | \n",
- " 2021.0 | \n",
- " (42.105808, -71.258743) | \n",
- " 32.933990 | \n",
- " 40.921772 | \n",
- "
\n",
- " \n",
- " | 2501507 | \n",
- " 2760305 | \n",
- " P. Brent Trottier Middle School | \n",
+ " 2501835 | \n",
+ " 3470410 | \n",
+ " Daniel L Joyce Middle School | \n",
" public | \n",
" m | \n",
" 6,7,8 | \n",
- " Southborough | \n",
+ " Woburn | \n",
" MA | \n",
- " 1772 | \n",
- " Worcester County | \n",
- " 42.299240 | \n",
- " -71.542259 | \n",
- " Southborough School District | \n",
- " 387 | \n",
+ " 1801 | \n",
+ " Middlesex County | \n",
+ " 42.477467 | \n",
+ " -71.175484 | \n",
+ " Woburn School District | \n",
+ " 467 | \n",
+ " 4.0 | \n",
+ " 2021.0 | \n",
+ " (42.477467, -71.175484) | \n",
+ " 9.264922 | \n",
+ " 4.013598 | \n",
+ "
\n",
+ " \n",
+ " | 2501714 | \n",
+ " 3150005 | \n",
+ " Claypit Hill School | \n",
+ " public | \n",
+ " e | \n",
+ " KG,1,2,3,4,5 | \n",
+ " Wayland | \n",
+ " MA | \n",
+ " 1778 | \n",
+ " Middlesex County | \n",
+ " 42.373108 | \n",
+ " -71.344765 | \n",
+ " Wayland School District | \n",
+ " 434 | \n",
" 8.0 | \n",
" 2021.0 | \n",
- " (42.29924, -71.542259) | \n",
- " 39.445654 | \n",
- " 30.606258 | \n",
+ " (42.373108, -71.344765) | \n",
+ " 13.952791 | \n",
+ " 8.347379 | \n",
+ "
\n",
+ " \n",
+ " | 2502631 | \n",
+ " 1810055 | \n",
+ " Tenney Grammar School | \n",
+ " public | \n",
+ " p,e,m | \n",
+ " PK,KG,1,2,3,4,5,6,7,8 | \n",
+ " Methuen | \n",
+ " MA | \n",
+ " 1844 | \n",
+ " Essex County | \n",
+ " 42.732357 | \n",
+ " -71.177345 | \n",
+ " Methuen School District | \n",
+ " 270 | \n",
+ " 3.0 | \n",
+ " 2021.0 | \n",
+ " (42.732357, -71.177345) | \n",
+ " 25.763243 | \n",
+ " 18.273064 | \n",
+ "
\n",
+ " \n",
+ " | 2500515 | \n",
+ " 710505 | \n",
+ " Danvers High School | \n",
+ " public | \n",
+ " h | \n",
+ " 9,10,11,12,UG | \n",
+ " Danvers | \n",
+ " MA | \n",
+ " 1923 | \n",
+ " Essex County | \n",
+ " 42.582523 | \n",
+ " -70.931618 | \n",
+ " Danvers School District | \n",
+ " 141 | \n",
+ " 6.0 | \n",
+ " 2021.0 | \n",
+ " (42.582523, -70.931618) | \n",
+ " 16.464503 | \n",
+ " 18.045917 | \n",
+ "
\n",
+ " \n",
+ " | 2501498 | \n",
+ " 2740410 | \n",
+ " Next Wave Junior High School | \n",
+ " public | \n",
+ " m | \n",
+ " 7,8 | \n",
+ " Somerville | \n",
+ " MA | \n",
+ " 2145 | \n",
+ " Middlesex County | \n",
+ " 42.387581 | \n",
+ " -71.087326 | \n",
+ " Somerville School District | \n",
+ " 383 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " (42.387581, -71.087326) | \n",
+ " 1.609308 | \n",
+ " 10.378716 | \n",
+ "
\n",
+ " \n",
+ " | 2501384 | \n",
+ " 2430310 | \n",
+ " Broad Meadows Middle School | \n",
+ " public | \n",
+ " m | \n",
+ " 6,7,8 | \n",
+ " Quincy | \n",
+ " MA | \n",
+ " 2169 | \n",
+ " Norfolk County | \n",
+ " 42.259659 | \n",
+ " -70.985237 | \n",
+ " Quincy School District | \n",
+ " 349 | \n",
+ " 4.0 | \n",
+ " 2021.0 | \n",
+ " (42.259659, -70.985237) | \n",
+ " 8.646003 | \n",
+ " 20.169491 | \n",
+ "
\n",
+ " \n",
+ " | 2500916 | \n",
+ " 1570006 | \n",
+ " Hanscom Primary School | \n",
+ " public | \n",
+ " p,e | \n",
+ " PK,KG,1,2,3 | \n",
+ " Hanscom Air Force Bs | \n",
+ " MA | \n",
+ " 1731 | \n",
+ " Middlesex County | \n",
+ " 42.456898 | \n",
+ " -71.278549 | \n",
+ " Lincoln School District | \n",
+ " 242 | \n",
+ " 3.0 | \n",
+ " 2021.0 | \n",
+ " (42.456898, -71.278549) | \n",
+ " 12.234463 | \n",
+ " 1.705602 | \n",
+ "
\n",
+ " \n",
+ " | 2501788 | \n",
+ " 3360065 | \n",
+ " Lawrence W Pingree | \n",
+ " public | \n",
+ " e | \n",
+ " KG,1,2,3,4 | \n",
+ " Weymouth | \n",
+ " MA | \n",
+ " 2189 | \n",
+ " Norfolk County | \n",
+ " 42.217670 | \n",
+ " -70.925240 | \n",
+ " Weymouth School District | \n",
+ " 455 | \n",
+ " 8.0 | \n",
+ " 2021.0 | \n",
+ " (42.21767, -70.92524) | \n",
+ " 12.754639 | \n",
+ " 24.381842 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " state-id name type \\\n",
- "universal-id \n",
- "2501042 7050505 Masconomet Regional High School public \n",
- "2500337 350380 Young Achievers Science and Math School public \n",
- "2500402 440017 Kennedy K-5 Elementary School public \n",
- "2501682 3070010 Boyden public \n",
- "2501507 2760305 P. Brent Trottier Middle School public \n",
+ " state-id name type level-codes \\\n",
+ "universal-id \n",
+ "2500363 380013 Spofford Pond public e \n",
+ "2506356 100305 Gibbs School public e,m \n",
+ "2501835 3470410 Daniel L Joyce Middle School public m \n",
+ "2501714 3150005 Claypit Hill School public e \n",
+ "2502631 1810055 Tenney Grammar School public p,e,m \n",
+ "2500515 710505 Danvers High School public h \n",
+ "2501498 2740410 Next Wave Junior High School public m \n",
+ "2501384 2430310 Broad Meadows Middle School public m \n",
+ "2500916 1570006 Hanscom Primary School public p,e \n",
+ "2501788 3360065 Lawrence W Pingree public e \n",
"\n",
- " level-codes level city state zip \\\n",
- "universal-id \n",
- "2501042 h 9,10,11,12 Boxford MA 1921 \n",
- "2500337 p,e,m PK,KG,1,2,3,4,5,6,7,8 Mattapan MA 2126 \n",
- "2500402 e KG,1,2,3,4,5 Brockton MA 2301 \n",
- "2501682 e KG,1,2,3,4,5 Walpole MA 2071 \n",
- "2501507 m 6,7,8 Southborough MA 1772 \n",
+ " level city state zip \\\n",
+ "universal-id \n",
+ "2500363 3,4,5,6 Boxford MA 1921 \n",
+ "2506356 6 Arlington MA 2474 \n",
+ "2501835 6,7,8 Woburn MA 1801 \n",
+ "2501714 KG,1,2,3,4,5 Wayland MA 1778 \n",
+ "2502631 PK,KG,1,2,3,4,5,6,7,8 Methuen MA 1844 \n",
+ "2500515 9,10,11,12,UG Danvers MA 1923 \n",
+ "2501498 7,8 Somerville MA 2145 \n",
+ "2501384 6,7,8 Quincy MA 2169 \n",
+ "2500916 PK,KG,1,2,3 Hanscom Air Force Bs MA 1731 \n",
+ "2501788 KG,1,2,3,4 Weymouth MA 2189 \n",
"\n",
" county lat lon \\\n",
"universal-id \n",
- "2501042 Essex County 42.627754 -70.974693 \n",
- "2500337 Suffolk County 42.282269 -71.095016 \n",
- "2500402 Plymouth County 42.059696 -71.037262 \n",
- "2501682 Norfolk County 42.105808 -71.258743 \n",
- "2501507 Worcester County 42.299240 -71.542259 \n",
+ "2500363 Essex County 42.697018 -71.017365 \n",
+ "2506356 Middlesex County 42.410576 -71.145081 \n",
+ "2501835 Middlesex County 42.477467 -71.175484 \n",
+ "2501714 Middlesex County 42.373108 -71.344765 \n",
+ "2502631 Essex County 42.732357 -71.177345 \n",
+ "2500515 Essex County 42.582523 -70.931618 \n",
+ "2501498 Middlesex County 42.387581 -71.087326 \n",
+ "2501384 Norfolk County 42.259659 -70.985237 \n",
+ "2500916 Middlesex County 42.456898 -71.278549 \n",
+ "2501788 Norfolk County 42.217670 -70.925240 \n",
"\n",
- " district-name district-id rating year \\\n",
- "universal-id \n",
- "2501042 Masconomet School District 259 8.0 2021.0 \n",
- "2500337 Boston School District 99 2.0 2021.0 \n",
- "2500402 Brockton School District 111 4.0 2021.0 \n",
- "2501682 Walpole School District 426 6.0 2021.0 \n",
- "2501507 Southborough School District 387 8.0 2021.0 \n",
+ " district-name district-id rating year \\\n",
+ "universal-id \n",
+ "2500363 Boxford School District 102 7.0 2021.0 \n",
+ "2506356 Arlington Public Schools 69 7.0 2021.0 \n",
+ "2501835 Woburn School District 467 4.0 2021.0 \n",
+ "2501714 Wayland School District 434 8.0 2021.0 \n",
+ "2502631 Methuen School District 270 3.0 2021.0 \n",
+ "2500515 Danvers School District 141 6.0 2021.0 \n",
+ "2501498 Somerville School District 383 NaN NaN \n",
+ "2501384 Quincy School District 349 4.0 2021.0 \n",
+ "2500916 Lincoln School District 242 3.0 2021.0 \n",
+ "2501788 Weymouth School District 455 8.0 2021.0 \n",
"\n",
" coordinates distance-to-downtown distance-to-work \n",
"universal-id \n",
- "2501042 (42.627754, -70.974693) 30.005931 28.583420 \n",
- "2500337 (42.282269, -71.095016) 9.673200 24.989359 \n",
- "2500402 (42.059696, -71.037262) 34.339345 49.384728 \n",
- "2501682 (42.105808, -71.258743) 32.933990 40.921772 \n",
- "2501507 (42.29924, -71.542259) 39.445654 30.606258 "
+ "2500363 (42.697018, -71.017365) 22.917933 19.554889 \n",
+ "2506356 (42.410576, -71.145081) 4.794958 7.066929 \n",
+ "2501835 (42.477467, -71.175484) 9.264922 4.013598 \n",
+ "2501714 (42.373108, -71.344765) 13.952791 8.347379 \n",
+ "2502631 (42.732357, -71.177345) 25.763243 18.273064 \n",
+ "2500515 (42.582523, -70.931618) 16.464503 18.045917 \n",
+ "2501498 (42.387581, -71.087326) 1.609308 10.378716 \n",
+ "2501384 (42.259659, -70.985237) 8.646003 20.169491 \n",
+ "2500916 (42.456898, -71.278549) 12.234463 1.705602 \n",
+ "2501788 (42.21767, -70.92524) 12.754639 24.381842 "
]
},
- "execution_count": 74,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
+ "# calculate distance to PoI using geo-center of districts\n",
+ "distances_to_downtown = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-downtown']}\n",
+ "distances_to_work = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-work']}\n",
"\n",
- "boston_df.sample(5)"
+ "df_downtown = pd.DataFrame.from_dict(distances_to_downtown, orient='index')\n",
+ "df_work = pd.DataFrame.from_dict(distances_to_work, orient='index')\n",
+ "\n",
+ "# merge these new columns\n",
+ "both_df = pd.merge(left=df_downtown, right=df_work, how='inner', left_index=True, right_index=True)\n",
+ "both_df.rename(columns={'0_x': \"downtown\", '0_y': \"work\"}, inplace=True)\n",
+ "\n",
+ "both_df = both_df[both_df[\"downtown\"] < 35.0]\n",
+ "both_df = both_df[both_df[\"work\"] < 25.0]\n",
+ "\n",
+ "print(f'There are {len(both_df)} school districts within reasonable proximity to downtown and work.\\n')\n",
+ "\n",
+ "# filter out all schools which aren't in proximal districts\n",
+ "proximal_district_ids = list(both_df.index)\n",
+ "boston_df = boston_df[boston_df['district-id'].isin(proximal_district_ids)]\n",
+ "\n",
+ "print(f'There are {len(boston_df)} schools within these proximal districts.\\n')\n",
+ "\n",
+ "boston_df.sample(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Some of these districts don't have enough rating data. Those should be dropped."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "boston_df.groupby(['district-id'])"
]
}
],