{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting Started" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This module provides cached loading of open datasets from Faculty. To view\n", "the available datasets:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from faculty_extras import opendata" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['higgs_boson/README.md',\n", " 'higgs_boson/higgs.csv',\n", " 'higgs_boson/higgs_test.csv',\n", " 'higgs_boson/higgs_train.csv',\n", " 'higgs_boson/higgs_validate.csv',\n", " 'tutorials/supermarkets/.ipynb_checkpoints/modify-checkpoint.ipynb',\n", " 'tutorials/supermarkets/README.md',\n", " 'tutorials/supermarkets/lidl.csv',\n", " 'tutorials/supermarkets/waitrose.csv',\n", " 'uk_2011_census/census_by_outputarea.csv',\n", " 'uk_2011_census/census_variable_info.csv',\n", " 'uk_2011_census/outputarea_localauthority_mapping.csv',\n", " 'uk_2011_census/outputarea_lsoa_msoa_mapping.csv',\n", " 'uk_2011_census/outputarea_parliamentaryconstituency_mapping.csv',\n", " 'uk_2011_census/postcode_outputarea_mapping.csv',\n", " 'uk_2011_census/ukpostcodes.csv',\n", " 'uk_statistical_boundaries/geojson/local_authorities.json',\n", " 'uk_statistical_boundaries/geojson/lower_super_output_areas.json',\n", " 'uk_statistical_boundaries/geojson/middle_super_output_areas.json',\n", " 'uk_statistical_boundaries/geojson/output_areas.json',\n", " 'uk_statistical_boundaries/geojson/parliamentary_constituencies.json',\n", " 'uk_statistical_boundaries/topojson/uk_statistical_boundaries.json',\n", " 'us_flights/README.md',\n", " 'us_flights/us_flights_1987.csv',\n", " 'us_flights/us_flights_1988.csv',\n", " 'us_flights/us_flights_1989.csv',\n", " 'us_flights/us_flights_1990.csv',\n", " 'us_flights/us_flights_1991.csv',\n", " 'us_flights/us_flights_1992.csv',\n", " 'us_flights/us_flights_1993.csv',\n", " 'us_flights/us_flights_1994.csv',\n", " 'us_flights/us_flights_1995.csv',\n", " 'us_flights/us_flights_1996.csv',\n", " 'us_flights/us_flights_1997.csv',\n", " 'us_flights/us_flights_1998.csv',\n", " 'us_flights/us_flights_1999.csv',\n", " 'us_flights/us_flights_2000.csv',\n", " 'us_flights/us_flights_2001.csv',\n", " 'us_flights/us_flights_2002.csv',\n", " 'us_flights/us_flights_2003.csv',\n", " 'us_flights/us_flights_2004.csv',\n", " 'us_flights/us_flights_2005.csv',\n", " 'us_flights/us_flights_2006.csv',\n", " 'us_flights/us_flights_2007.csv',\n", " 'us_flights/us_flights_2008.csv',\n", " 'us_flights/us_flights_dtypes.json']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendata.ls()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To load one of the datasets into a pandas DataFrame:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = opendata.load(\"uk_2011_census/census_by_outputarea.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OATotal_PopulationTotal_HouseholdsTotal_DwellingsTotal_Household_SpacesTotal_Population_16_and_overTotal_Population_16_to_74Total_Pop_No_NI_Students_16_to_74Total_Employment_16_to_74Total_Pop_in_Housesholds_16_and_over...u158u159u160u161u162u163u164u165u166u167
0E0000000119499115115173148148102173...6185714922000
1E00000003250112125125218199199147218...10247432621215
2E00000005367217241241337304304241337...1637117521279304
3E000000071238310310311311111186113...4183620902001
4E000000101027879799786865997...12111616605005
\n", "

5 rows × 178 columns

\n", "
" ], "text/plain": [ " OA Total_Population Total_Households Total_Dwellings \\\n", "0 E00000001 194 99 115 \n", "1 E00000003 250 112 125 \n", "2 E00000005 367 217 241 \n", "3 E00000007 123 83 103 \n", "4 E00000010 102 78 79 \n", "\n", " Total_Household_Spaces Total_Population_16_and_over \\\n", "0 115 173 \n", "1 125 218 \n", "2 241 337 \n", "3 103 113 \n", "4 79 97 \n", "\n", " Total_Population_16_to_74 Total_Pop_No_NI_Students_16_to_74 \\\n", "0 148 148 \n", "1 199 199 \n", "2 304 304 \n", "3 111 111 \n", "4 86 86 \n", "\n", " Total_Employment_16_to_74 Total_Pop_in_Housesholds_16_and_over ... \\\n", "0 102 173 ... \n", "1 147 218 ... \n", "2 241 337 ... \n", "3 86 113 ... \n", "4 59 97 ... \n", "\n", " u158 u159 u160 u161 u162 u163 u164 u165 u166 u167 \n", "0 6 18 57 14 9 2 2 0 0 0 \n", "1 10 24 74 32 6 2 1 2 1 5 \n", "2 16 37 117 52 12 7 9 3 0 4 \n", "3 4 18 36 20 9 0 2 0 0 1 \n", "4 12 11 16 16 6 0 5 0 0 5 \n", "\n", "[5 rows x 178 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's it! The data will be cached on disk so as to not download it again.\n", "In addition, it will be cached in memory for performance. If the file gets\n", "updated on Faculty, this module ensures you always have the latest version." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }