{ "cells": [ { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "# imports\n", "import pandas as pd\n", "import numpy as np\n", "\n", "from great_schools import get_nearby_schools\n", "from distance import get_distance\n", "from secret import get_key\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Shaun and Daniela's Boston Public School Analysis\n", "#### 2021.04.10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fetch the API key from the local filesystem." ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "# get the API key\n", "api_key_file = '../keys/api.key'\n", "api_key = get_key(api_key_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the `nearby_schools` API endpoint to grab raw data of all schools within the maximum radius" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "# Some columns will dropped immediately as pre-processing.\n", "drops = [\n", " 'nces-id',\n", " 'school-summary',\n", " 'street',\n", " 'fipscounty',\n", " 'phone',\n", " 'fax',\n", " 'web-site',\n", " 'overview-url',\n", " 'rating-description',\n", " 'distance',\n", "]\n", "\n", "# Grab data for Boston.\n", "refresh = False\n", "boston_nearby_schools_file = '../data/nearby_schools/boston.csv'\n", "if refresh:\n", " boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n", " boston_df = pd.DataFrame.from_dict(boston_schools)\n", " boston_df.drop(columns=drops,inplace=True)\n", " boston_df.to_csv(boston_nearby_schools_file, )\n", "else:\n", " boston_df = pd.read_csv(boston_nearby_schools_file)\n", " boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n", " boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n", "\n", "# Grab data for Buffalo.\n", "refresh = False\n", "buffalo_nearby_schools_file = '../data/nearby_schools/buffalo.csv'\n", "if refresh:\n", " buffalo_schools = get_nearby_schools(api_key,\"42.9625\",\"-78.7425\",\"50\")\n", " buffalo_df = pd.DataFrame.from_dict(buffalo_schools)\n", " buffalo_df.drop(columns=drops,inplace=True)\n", " buffalo_df.to_csv(buffalo_nearby_schools_file)\n", "else:\n", " buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n", " buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n", " buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Process the `lat` and `lon` columns from the API output into tuples.\n", "\n", "Then create two new columns:\n", "- Distance to Downtown\n", "- Distance to Work" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "# Form tuple to represent coordinates\n", "boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n", "#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n", "\n", "# Define coordinates of important places\n", "downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n", "work=(42.47381059540949, -71.25414135292398) # Hartwell\n", "\n", "# Create new columns to tabulate distance to these important places\n", "boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n", "boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For Boston, drop all schools that aren't in Massachusetts." ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state-idnametypelevel-codeslevelcitystatezipcountylatlondistrict-namedistrict-idratingyearcoordinatesdistance-to-downtowndistance-to-work
universal-id
25010427050505Masconomet Regional High Schoolpublich9,10,11,12BoxfordMA1921Essex County42.627754-70.974693Masconomet School District2598.02021.0(42.627754, -70.974693)30.00593128.583420
2500337350380Young Achievers Science and Math Schoolpublicp,e,mPK,KG,1,2,3,4,5,6,7,8MattapanMA2126Suffolk County42.282269-71.095016Boston School District992.02021.0(42.282269, -71.095016)9.67320024.989359
2500402440017Kennedy K-5 Elementary SchoolpubliceKG,1,2,3,4,5BrocktonMA2301Plymouth County42.059696-71.037262Brockton School District1114.02021.0(42.059696, -71.037262)34.33934549.384728
25016823070010BoydenpubliceKG,1,2,3,4,5WalpoleMA2071Norfolk County42.105808-71.258743Walpole School District4266.02021.0(42.105808, -71.258743)32.93399040.921772
25015072760305P. Brent Trottier Middle Schoolpublicm6,7,8SouthboroughMA1772Worcester County42.299240-71.542259Southborough School District3878.02021.0(42.29924, -71.542259)39.44565430.606258
\n", "
" ], "text/plain": [ " state-id name type \\\n", "universal-id \n", "2501042 7050505 Masconomet Regional High School public \n", "2500337 350380 Young Achievers Science and Math School public \n", "2500402 440017 Kennedy K-5 Elementary School public \n", "2501682 3070010 Boyden public \n", "2501507 2760305 P. Brent Trottier Middle School public \n", "\n", " level-codes level city state zip \\\n", "universal-id \n", "2501042 h 9,10,11,12 Boxford MA 1921 \n", "2500337 p,e,m PK,KG,1,2,3,4,5,6,7,8 Mattapan MA 2126 \n", "2500402 e KG,1,2,3,4,5 Brockton MA 2301 \n", "2501682 e KG,1,2,3,4,5 Walpole MA 2071 \n", "2501507 m 6,7,8 Southborough MA 1772 \n", "\n", " county lat lon \\\n", "universal-id \n", "2501042 Essex County 42.627754 -70.974693 \n", "2500337 Suffolk County 42.282269 -71.095016 \n", "2500402 Plymouth County 42.059696 -71.037262 \n", "2501682 Norfolk County 42.105808 -71.258743 \n", "2501507 Worcester County 42.299240 -71.542259 \n", "\n", " district-name district-id rating year \\\n", "universal-id \n", "2501042 Masconomet School District 259 8.0 2021.0 \n", "2500337 Boston School District 99 2.0 2021.0 \n", "2500402 Brockton School District 111 4.0 2021.0 \n", "2501682 Walpole School District 426 6.0 2021.0 \n", "2501507 Southborough School District 387 8.0 2021.0 \n", "\n", " coordinates distance-to-downtown distance-to-work \n", "universal-id \n", "2501042 (42.627754, -70.974693) 30.005931 28.583420 \n", "2500337 (42.282269, -71.095016) 9.673200 24.989359 \n", "2500402 (42.059696, -71.037262) 34.339345 49.384728 \n", "2501682 (42.105808, -71.258743) 32.933990 40.921772 \n", "2501507 (42.29924, -71.542259) 39.445654 30.606258 " ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "boston_df = boston_df[boston_df['state'] == \"MA\"]\n", "\n", "boston_df.sample(5)" ] } ], "metadata": { "interpreter": { "hash": "4fc861b332db140b7b363b167627eee6a3238262e7c99e0237067fec0875fee7" }, "kernelspec": { "display_name": "Python 3.8.10 ('venv': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }