Archived
1
0

Working on jupyter notebook, finished proximity filtering.

This commit is contained in:
Shaun Setlock
2022-04-24 21:32:26 -04:00
parent 56b4c0cb1c
commit 3668c65cb9

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -108,7 +108,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
@@ -129,14 +129,81 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"For Boston, drop all schools that aren't in Massachusetts."
"We should definitely removal all schools that aren't in Massachusetts."
]
},
{
"cell_type": "code",
"execution_count": 74,
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 1789 schools from the original API results.\n",
"Allowing only schools from Massachusetts reduces the dataset to 1375 schools.\n"
]
}
],
"source": [
"print(f'There are {len(boston_df)} schools from the original API results.')\n",
"\n",
"# only allow from MA\n",
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
"print(f'Allowing only schools from Massachusetts reduces the dataset to {len(boston_df)} schools.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many unique district id's are there?"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"There are 230 unique school districts.\n",
"\n"
]
}
],
"source": [
"# get unique districts\n",
"districts = boston_df[\"district-id\"].unique()\n",
"print(f'\\nThere are {len(districts)} unique school districts.\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Which of these districts are close to both work and downtown boston?"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 116 school districts within reasonable proximity to downtown and work.\n",
"\n",
"There are 820 schools within these proximal districts.\n",
"\n"
]
},
{
"data": {
"text/html": [
@@ -201,165 +268,342 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2501042</th>\n",
" <td>7050505</td>\n",
" <td>Masconomet Regional High School</td>\n",
" <th>2500363</th>\n",
" <td>380013</td>\n",
" <td>Spofford Pond</td>\n",
" <td>public</td>\n",
" <td>h</td>\n",
" <td>9,10,11,12</td>\n",
" <td>e</td>\n",
" <td>3,4,5,6</td>\n",
" <td>Boxford</td>\n",
" <td>MA</td>\n",
" <td>1921</td>\n",
" <td>Essex County</td>\n",
" <td>42.627754</td>\n",
" <td>-70.974693</td>\n",
" <td>Masconomet School District</td>\n",
" <td>259</td>\n",
" <td>8.0</td>\n",
" <td>42.697018</td>\n",
" <td>-71.017365</td>\n",
" <td>Boxford School District</td>\n",
" <td>102</td>\n",
" <td>7.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.627754, -70.974693)</td>\n",
" <td>30.005931</td>\n",
" <td>28.583420</td>\n",
" <td>(42.697018, -71.017365)</td>\n",
" <td>22.917933</td>\n",
" <td>19.554889</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2500337</th>\n",
" <td>350380</td>\n",
" <td>Young Achievers Science and Math School</td>\n",
" <th>2506356</th>\n",
" <td>100305</td>\n",
" <td>Gibbs School</td>\n",
" <td>public</td>\n",
" <td>p,e,m</td>\n",
" <td>PK,KG,1,2,3,4,5,6,7,8</td>\n",
" <td>Mattapan</td>\n",
" <td>e,m</td>\n",
" <td>6</td>\n",
" <td>Arlington</td>\n",
" <td>MA</td>\n",
" <td>2126</td>\n",
" <td>Suffolk County</td>\n",
" <td>42.282269</td>\n",
" <td>-71.095016</td>\n",
" <td>Boston School District</td>\n",
" <td>99</td>\n",
" <td>2.0</td>\n",
" <td>2474</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.410576</td>\n",
" <td>-71.145081</td>\n",
" <td>Arlington Public Schools</td>\n",
" <td>69</td>\n",
" <td>7.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.282269, -71.095016)</td>\n",
" <td>9.673200</td>\n",
" <td>24.989359</td>\n",
" <td>(42.410576, -71.145081)</td>\n",
" <td>4.794958</td>\n",
" <td>7.066929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2500402</th>\n",
" <td>440017</td>\n",
" <td>Kennedy K-5 Elementary School</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5</td>\n",
" <td>Brockton</td>\n",
" <td>MA</td>\n",
" <td>2301</td>\n",
" <td>Plymouth County</td>\n",
" <td>42.059696</td>\n",
" <td>-71.037262</td>\n",
" <td>Brockton School District</td>\n",
" <td>111</td>\n",
" <td>4.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.059696, -71.037262)</td>\n",
" <td>34.339345</td>\n",
" <td>49.384728</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501682</th>\n",
" <td>3070010</td>\n",
" <td>Boyden</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5</td>\n",
" <td>Walpole</td>\n",
" <td>MA</td>\n",
" <td>2071</td>\n",
" <td>Norfolk County</td>\n",
" <td>42.105808</td>\n",
" <td>-71.258743</td>\n",
" <td>Walpole School District</td>\n",
" <td>426</td>\n",
" <td>6.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.105808, -71.258743)</td>\n",
" <td>32.933990</td>\n",
" <td>40.921772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501507</th>\n",
" <td>2760305</td>\n",
" <td>P. Brent Trottier Middle School</td>\n",
" <th>2501835</th>\n",
" <td>3470410</td>\n",
" <td>Daniel L Joyce Middle School</td>\n",
" <td>public</td>\n",
" <td>m</td>\n",
" <td>6,7,8</td>\n",
" <td>Southborough</td>\n",
" <td>Woburn</td>\n",
" <td>MA</td>\n",
" <td>1772</td>\n",
" <td>Worcester County</td>\n",
" <td>42.299240</td>\n",
" <td>-71.542259</td>\n",
" <td>Southborough School District</td>\n",
" <td>387</td>\n",
" <td>1801</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.477467</td>\n",
" <td>-71.175484</td>\n",
" <td>Woburn School District</td>\n",
" <td>467</td>\n",
" <td>4.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.477467, -71.175484)</td>\n",
" <td>9.264922</td>\n",
" <td>4.013598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501714</th>\n",
" <td>3150005</td>\n",
" <td>Claypit Hill School</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4,5</td>\n",
" <td>Wayland</td>\n",
" <td>MA</td>\n",
" <td>1778</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.373108</td>\n",
" <td>-71.344765</td>\n",
" <td>Wayland School District</td>\n",
" <td>434</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.29924, -71.542259)</td>\n",
" <td>39.445654</td>\n",
" <td>30.606258</td>\n",
" <td>(42.373108, -71.344765)</td>\n",
" <td>13.952791</td>\n",
" <td>8.347379</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2502631</th>\n",
" <td>1810055</td>\n",
" <td>Tenney Grammar School</td>\n",
" <td>public</td>\n",
" <td>p,e,m</td>\n",
" <td>PK,KG,1,2,3,4,5,6,7,8</td>\n",
" <td>Methuen</td>\n",
" <td>MA</td>\n",
" <td>1844</td>\n",
" <td>Essex County</td>\n",
" <td>42.732357</td>\n",
" <td>-71.177345</td>\n",
" <td>Methuen School District</td>\n",
" <td>270</td>\n",
" <td>3.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.732357, -71.177345)</td>\n",
" <td>25.763243</td>\n",
" <td>18.273064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2500515</th>\n",
" <td>710505</td>\n",
" <td>Danvers High School</td>\n",
" <td>public</td>\n",
" <td>h</td>\n",
" <td>9,10,11,12,UG</td>\n",
" <td>Danvers</td>\n",
" <td>MA</td>\n",
" <td>1923</td>\n",
" <td>Essex County</td>\n",
" <td>42.582523</td>\n",
" <td>-70.931618</td>\n",
" <td>Danvers School District</td>\n",
" <td>141</td>\n",
" <td>6.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.582523, -70.931618)</td>\n",
" <td>16.464503</td>\n",
" <td>18.045917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501498</th>\n",
" <td>2740410</td>\n",
" <td>Next Wave Junior High School</td>\n",
" <td>public</td>\n",
" <td>m</td>\n",
" <td>7,8</td>\n",
" <td>Somerville</td>\n",
" <td>MA</td>\n",
" <td>2145</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.387581</td>\n",
" <td>-71.087326</td>\n",
" <td>Somerville School District</td>\n",
" <td>383</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(42.387581, -71.087326)</td>\n",
" <td>1.609308</td>\n",
" <td>10.378716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501384</th>\n",
" <td>2430310</td>\n",
" <td>Broad Meadows Middle School</td>\n",
" <td>public</td>\n",
" <td>m</td>\n",
" <td>6,7,8</td>\n",
" <td>Quincy</td>\n",
" <td>MA</td>\n",
" <td>2169</td>\n",
" <td>Norfolk County</td>\n",
" <td>42.259659</td>\n",
" <td>-70.985237</td>\n",
" <td>Quincy School District</td>\n",
" <td>349</td>\n",
" <td>4.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.259659, -70.985237)</td>\n",
" <td>8.646003</td>\n",
" <td>20.169491</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2500916</th>\n",
" <td>1570006</td>\n",
" <td>Hanscom Primary School</td>\n",
" <td>public</td>\n",
" <td>p,e</td>\n",
" <td>PK,KG,1,2,3</td>\n",
" <td>Hanscom Air Force Bs</td>\n",
" <td>MA</td>\n",
" <td>1731</td>\n",
" <td>Middlesex County</td>\n",
" <td>42.456898</td>\n",
" <td>-71.278549</td>\n",
" <td>Lincoln School District</td>\n",
" <td>242</td>\n",
" <td>3.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.456898, -71.278549)</td>\n",
" <td>12.234463</td>\n",
" <td>1.705602</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2501788</th>\n",
" <td>3360065</td>\n",
" <td>Lawrence W Pingree</td>\n",
" <td>public</td>\n",
" <td>e</td>\n",
" <td>KG,1,2,3,4</td>\n",
" <td>Weymouth</td>\n",
" <td>MA</td>\n",
" <td>2189</td>\n",
" <td>Norfolk County</td>\n",
" <td>42.217670</td>\n",
" <td>-70.925240</td>\n",
" <td>Weymouth School District</td>\n",
" <td>455</td>\n",
" <td>8.0</td>\n",
" <td>2021.0</td>\n",
" <td>(42.21767, -70.92524)</td>\n",
" <td>12.754639</td>\n",
" <td>24.381842</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" state-id name type \\\n",
" state-id name type level-codes \\\n",
"universal-id \n",
"2501042 7050505 Masconomet Regional High School public \n",
"2500337 350380 Young Achievers Science and Math School public \n",
"2500402 440017 Kennedy K-5 Elementary School public \n",
"2501682 3070010 Boyden public \n",
"2501507 2760305 P. Brent Trottier Middle School public \n",
"2500363 380013 Spofford Pond public e \n",
"2506356 100305 Gibbs School public e,m \n",
"2501835 3470410 Daniel L Joyce Middle School public m \n",
"2501714 3150005 Claypit Hill School public e \n",
"2502631 1810055 Tenney Grammar School public p,e,m \n",
"2500515 710505 Danvers High School public h \n",
"2501498 2740410 Next Wave Junior High School public m \n",
"2501384 2430310 Broad Meadows Middle School public m \n",
"2500916 1570006 Hanscom Primary School public p,e \n",
"2501788 3360065 Lawrence W Pingree public e \n",
"\n",
" level-codes level city state zip \\\n",
" level city state zip \\\n",
"universal-id \n",
"2501042 h 9,10,11,12 Boxford MA 1921 \n",
"2500337 p,e,m PK,KG,1,2,3,4,5,6,7,8 Mattapan MA 2126 \n",
"2500402 e KG,1,2,3,4,5 Brockton MA 2301 \n",
"2501682 e KG,1,2,3,4,5 Walpole MA 2071 \n",
"2501507 m 6,7,8 Southborough MA 1772 \n",
"2500363 3,4,5,6 Boxford MA 1921 \n",
"2506356 6 Arlington MA 2474 \n",
"2501835 6,7,8 Woburn MA 1801 \n",
"2501714 KG,1,2,3,4,5 Wayland MA 1778 \n",
"2502631 PK,KG,1,2,3,4,5,6,7,8 Methuen MA 1844 \n",
"2500515 9,10,11,12,UG Danvers MA 1923 \n",
"2501498 7,8 Somerville MA 2145 \n",
"2501384 6,7,8 Quincy MA 2169 \n",
"2500916 PK,KG,1,2,3 Hanscom Air Force Bs MA 1731 \n",
"2501788 KG,1,2,3,4 Weymouth MA 2189 \n",
"\n",
" county lat lon \\\n",
"universal-id \n",
"2501042 Essex County 42.627754 -70.974693 \n",
"2500337 Suffolk County 42.282269 -71.095016 \n",
"2500402 Plymouth County 42.059696 -71.037262 \n",
"2501682 Norfolk County 42.105808 -71.258743 \n",
"2501507 Worcester County 42.299240 -71.542259 \n",
"2500363 Essex County 42.697018 -71.017365 \n",
"2506356 Middlesex County 42.410576 -71.145081 \n",
"2501835 Middlesex County 42.477467 -71.175484 \n",
"2501714 Middlesex County 42.373108 -71.344765 \n",
"2502631 Essex County 42.732357 -71.177345 \n",
"2500515 Essex County 42.582523 -70.931618 \n",
"2501498 Middlesex County 42.387581 -71.087326 \n",
"2501384 Norfolk County 42.259659 -70.985237 \n",
"2500916 Middlesex County 42.456898 -71.278549 \n",
"2501788 Norfolk County 42.217670 -70.925240 \n",
"\n",
" district-name district-id rating year \\\n",
"universal-id \n",
"2501042 Masconomet School District 259 8.0 2021.0 \n",
"2500337 Boston School District 99 2.0 2021.0 \n",
"2500402 Brockton School District 111 4.0 2021.0 \n",
"2501682 Walpole School District 426 6.0 2021.0 \n",
"2501507 Southborough School District 387 8.0 2021.0 \n",
"2500363 Boxford School District 102 7.0 2021.0 \n",
"2506356 Arlington Public Schools 69 7.0 2021.0 \n",
"2501835 Woburn School District 467 4.0 2021.0 \n",
"2501714 Wayland School District 434 8.0 2021.0 \n",
"2502631 Methuen School District 270 3.0 2021.0 \n",
"2500515 Danvers School District 141 6.0 2021.0 \n",
"2501498 Somerville School District 383 NaN NaN \n",
"2501384 Quincy School District 349 4.0 2021.0 \n",
"2500916 Lincoln School District 242 3.0 2021.0 \n",
"2501788 Weymouth School District 455 8.0 2021.0 \n",
"\n",
" coordinates distance-to-downtown distance-to-work \n",
"universal-id \n",
"2501042 (42.627754, -70.974693) 30.005931 28.583420 \n",
"2500337 (42.282269, -71.095016) 9.673200 24.989359 \n",
"2500402 (42.059696, -71.037262) 34.339345 49.384728 \n",
"2501682 (42.105808, -71.258743) 32.933990 40.921772 \n",
"2501507 (42.29924, -71.542259) 39.445654 30.606258 "
"2500363 (42.697018, -71.017365) 22.917933 19.554889 \n",
"2506356 (42.410576, -71.145081) 4.794958 7.066929 \n",
"2501835 (42.477467, -71.175484) 9.264922 4.013598 \n",
"2501714 (42.373108, -71.344765) 13.952791 8.347379 \n",
"2502631 (42.732357, -71.177345) 25.763243 18.273064 \n",
"2500515 (42.582523, -70.931618) 16.464503 18.045917 \n",
"2501498 (42.387581, -71.087326) 1.609308 10.378716 \n",
"2501384 (42.259659, -70.985237) 8.646003 20.169491 \n",
"2500916 (42.456898, -71.278549) 12.234463 1.705602 \n",
"2501788 (42.21767, -70.92524) 12.754639 24.381842 "
]
},
"execution_count": 74,
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
"# calculate distance to PoI using geo-center of districts\n",
"distances_to_downtown = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-downtown']}\n",
"distances_to_work = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-work']}\n",
"\n",
"boston_df.sample(5)"
"df_downtown = pd.DataFrame.from_dict(distances_to_downtown, orient='index')\n",
"df_work = pd.DataFrame.from_dict(distances_to_work, orient='index')\n",
"\n",
"# merge these new columns\n",
"both_df = pd.merge(left=df_downtown, right=df_work, how='inner', left_index=True, right_index=True)\n",
"both_df.rename(columns={'0_x': \"downtown\", '0_y': \"work\"}, inplace=True)\n",
"\n",
"both_df = both_df[both_df[\"downtown\"] < 35.0]\n",
"both_df = both_df[both_df[\"work\"] < 25.0]\n",
"\n",
"print(f'There are {len(both_df)} school districts within reasonable proximity to downtown and work.\\n')\n",
"\n",
"# filter out all schools which aren't in proximal districts\n",
"proximal_district_ids = list(both_df.index)\n",
"boston_df = boston_df[boston_df['district-id'].isin(proximal_district_ids)]\n",
"\n",
"print(f'There are {len(boston_df)} schools within these proximal districts.\\n')\n",
"\n",
"boston_df.sample(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Some of these districts don't have enough rating data. Those should be dropped."
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f54f95addf0>"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"boston_df.groupby(['district-id'])"
]
}
],