{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Partitioning and formatting of the raw vioscreen information.\n",
"\n",
"This notebook consumes the raw vioscreen data, and splits it up into a series of pivot tables.\n",
"\n",
"The pivot tables are indexed by the survey ID. A survey ID corresponds to a single instance of a vioscreen FFQ. Separate mappings, not included at present, relate the survey ID to a host subject identifier and/or sample barcode.\n",
"\n",
"All headers are scrubbed to avoid annoying characters like spaces. This is done for the benefit of tools like QIIME, however it does reduce readability in some cases.\n",
"\n",
"A roundtrip test going from raw data to the summarized micro / macronutrients is included at the end of this notebook."
]
},
{
"cell_type": "code",
"execution_count": 283,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"import numpy as np\n",
"from collections import defaultdict"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# full raw vioscreen data\n",
"full_vios = pd.read_csv('vioscreen_dump.tsv.gz', sep='\\t', dtype=str)"
]
},
{
"cell_type": "code",
"execution_count": 355,
"metadata": {},
"outputs": [],
"source": [
"# a raw dump of the ag.source_barcodes_surveys table\n",
"survey_sample_map_raw = pd.read_csv('sid_bc.tsv', \n",
" sep='\\t', dtype=str).set_index('survey_id')['barcode']\n",
"survey_sample_map = defaultdict(list)\n",
"for i, s in survey_sample_map_raw.items():\n",
" survey_sample_map[i].append('10317.%s' % s)"
]
},
{
"cell_type": "code",
"execution_count": 339,
"metadata": {},
"outputs": [],
"source": [
"def drop_null_cols(df):\n",
" \"\"\"Drop a column that is entirely null\"\"\"\n",
" drop = []\n",
" n = len(df)\n",
" for c in df.columns:\n",
" if df[c].isnull().sum() == n:\n",
" drop.append(c)\n",
" return df.drop(columns=drop)\n",
"\n",
"replacer = re.compile(\"[ \\-,()%':]\")\n",
"def sanitize_columns(df):\n",
" \"\"\"Remove possibly annoying characters\"\"\"\n",
" df.columns = [replacer.sub('_', c) for c in df.columns]\n",
" return df\n",
"\n",
"def remap_to_sample_id(df):\n",
" new_rows = []\n",
" sample_ids = []\n",
" for _, row in df.reset_index().iterrows():\n",
" for id_ in survey_sample_map.get(row['survey_id'], []):\n",
" new_rows.append(row.copy())\n",
" sample_ids.append(id_)\n",
" new_df = pd.DataFrame(new_rows)\n",
" new_df['#SampleID'] = sample_ids\n",
" return new_df.set_index('#SampleID')"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"# partition the raw vioscreen data into logically related units.\n",
"# within each partition, columns that are entirely null are removed.\n",
"\n",
"# a few specific values are used for this:\n",
"# - \"code\" should correspond to a specific micro/macro nutrient or summarized \n",
"# value (e.g., % calories from alcohol)\n",
"# - \"data\" contains the raw micro/macro nutrient information for the given food\n",
"# entry for the given survey scaled (I believe) \"servingFrequencyText\" and the \"servingSizeText\". \n",
"\n",
"# where the \"code\" column is not null\n",
"vios_coded = drop_null_cols(full_vios[~full_vios.code.isnull()])\n",
"\n",
"# what should be food components (e.g., % protein)\n",
"vios_coded_percents = drop_null_cols(vios_coded[~vios_coded.foodComponentType.isnull()])\n",
"\n",
"# what should be only micro and macronutriet data\n",
"vios_coded_micromacro = drop_null_cols(vios_coded[np.logical_and(vios_coded.foodComponentType.isnull(),\n",
" vios_coded.description != 'Eating Pattern')])\n",
"\n",
"# what vioscreen terms eating patterns (e.g., added fats per day)\n",
"vios_coded_eatingpattern = drop_null_cols(vios_coded[vios_coded.description == 'Eating Pattern'])\n",
"\n",
"# entries that are not coded\n",
"vios_not_coded = drop_null_cols(full_vios[full_vios.code.isnull()])\n",
"\n",
"# contains the raw vioscreen json output (more below)\n",
"vios_with_data = drop_null_cols(vios_not_coded[~vios_not_coded.data.isnull()])\n",
"\n",
"# the vioscreen scores (e.g., Total Fruits)\n",
"vios_no_data = drop_null_cols(vios_not_coded[vios_not_coded.data.isnull()])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting food components"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" foodComponentType | \n",
" precision | \n",
" shortDescription | \n",
" survey_id | \n",
" units | \n",
"
\n",
" \n",
" \n",
" \n",
" 153 | \n",
" 15.49482152820182 | \n",
" %protein | \n",
" Percent of calories from Protein | \n",
" 1.0 | \n",
" 0.0 | \n",
" Protein | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 154 | \n",
" 44.07939989979141 | \n",
" %fat | \n",
" Percent of calories from Fat | \n",
" 1.0 | \n",
" 0.0 | \n",
" Fat | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 155 | \n",
" 40.42577857200676 | \n",
" %carbo | \n",
" Percent of calories from Carbohydrate | \n",
" 1.0 | \n",
" 0.0 | \n",
" Carbohydrate | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 156 | \n",
" 0.0 | \n",
" %alcohol | \n",
" Percent of calories from Alcohol | \n",
" 1.0 | \n",
" 0.0 | \n",
" Alcohol | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 157 | \n",
" 16.382593411285388 | \n",
" %sfatot | \n",
" Percent of calories from Saturated Fat | \n",
" 1.0 | \n",
" 0.0 | \n",
" Saturated Fat | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description \\\n",
"153 15.49482152820182 %protein Percent of calories from Protein \n",
"154 44.07939989979141 %fat Percent of calories from Fat \n",
"155 40.42577857200676 %carbo Percent of calories from Carbohydrate \n",
"156 0.0 %alcohol Percent of calories from Alcohol \n",
"157 16.382593411285388 %sfatot Percent of calories from Saturated Fat \n",
"\n",
" foodComponentType precision shortDescription survey_id units \n",
"153 1.0 0.0 Protein 80043f5209506497 % \n",
"154 1.0 0.0 Fat 80043f5209506497 % \n",
"155 1.0 0.0 Carbohydrate 80043f5209506497 % \n",
"156 1.0 0.0 Alcohol 80043f5209506497 % \n",
"157 1.0 0.0 Saturated Fat 80043f5209506497 % "
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_percents.head()"
]
},
{
"cell_type": "code",
"execution_count": 356,
"metadata": {},
"outputs": [],
"source": [
"vios_coded_percents_pivot = vios_coded_percents.pivot(index='survey_id', columns='description', values='amount')\n",
"vios_coded_percents_pivot = remap_to_sample_id(sanitize_columns(vios_coded_percents_pivot))\n",
"vios_coded_percents_pivot.to_csv('vioscreen_component_percents.tsv', sep='\\t', index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 357,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" Percent_of_calories_from_Added_Sugar | \n",
" Percent_of_calories_from_Alcohol | \n",
" Percent_of_calories_from_Carbohydrate | \n",
" Percent_of_calories_from_Fat | \n",
" Percent_of_calories_from_Monounsaturated_Fat | \n",
" Percent_of_calories_from_Polyunsaturated_Fat | \n",
" Percent_of_calories_from_Protein | \n",
" Percent_of_calories_from_Saturated_Fat | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 0.5278094148896552 | \n",
" 4.405707930741896 | \n",
" 27.628189670463787 | \n",
" 45.084477618669254 | \n",
" 20.95621331900778 | \n",
" 12.559912015883315 | \n",
" 22.881624780125065 | \n",
" 10.631030850565056 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 0.46065971747342294 | \n",
" 0.0 | \n",
" 23.474779328659373 | \n",
" 55.745954131731786 | \n",
" 22.489476549272094 | \n",
" 8.219601812309241 | \n",
" 20.77926653960884 | \n",
" 22.733751192146702 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 0.8657785646763633 | \n",
" 0.00023962357002141254 | \n",
" 37.5279841722288 | \n",
" 43.04814628870771 | \n",
" 21.854142563950166 | \n",
" 10.351928530673 | \n",
" 19.42362991549346 | \n",
" 9.169527931007428 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 0.864436614273603 | \n",
" 7.643624875670007 | \n",
" 38.41905488083203 | \n",
" 41.82753132412285 | \n",
" 21.020284676343078 | \n",
" 8.6645383097221 | \n",
" 12.109788919375113 | \n",
" 11.298838478082244 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 0.864436614273603 | \n",
" 7.643624875670007 | \n",
" 38.41905488083203 | \n",
" 41.82753132412285 | \n",
" 21.020284676343078 | \n",
" 8.6645383097221 | \n",
" 12.109788919375113 | \n",
" 11.298838478082244 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id Percent_of_calories_from_Added_Sugar \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 0.5278094148896552 \n",
"10317.000107687 000fc7ac57776579 0.46065971747342294 \n",
"10317.000087663 0012cf765bb386ea 0.8657785646763633 \n",
"10317.000030578 001b4c3e1c848f58 0.864436614273603 \n",
"10317.000030579 001b4c3e1c848f58 0.864436614273603 \n",
"\n",
" Percent_of_calories_from_Alcohol \\\n",
"#SampleID \n",
"10317.000072291 4.405707930741896 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.00023962357002141254 \n",
"10317.000030578 7.643624875670007 \n",
"10317.000030579 7.643624875670007 \n",
"\n",
" Percent_of_calories_from_Carbohydrate \\\n",
"#SampleID \n",
"10317.000072291 27.628189670463787 \n",
"10317.000107687 23.474779328659373 \n",
"10317.000087663 37.5279841722288 \n",
"10317.000030578 38.41905488083203 \n",
"10317.000030579 38.41905488083203 \n",
"\n",
" Percent_of_calories_from_Fat \\\n",
"#SampleID \n",
"10317.000072291 45.084477618669254 \n",
"10317.000107687 55.745954131731786 \n",
"10317.000087663 43.04814628870771 \n",
"10317.000030578 41.82753132412285 \n",
"10317.000030579 41.82753132412285 \n",
"\n",
" Percent_of_calories_from_Monounsaturated_Fat \\\n",
"#SampleID \n",
"10317.000072291 20.95621331900778 \n",
"10317.000107687 22.489476549272094 \n",
"10317.000087663 21.854142563950166 \n",
"10317.000030578 21.020284676343078 \n",
"10317.000030579 21.020284676343078 \n",
"\n",
" Percent_of_calories_from_Polyunsaturated_Fat \\\n",
"#SampleID \n",
"10317.000072291 12.559912015883315 \n",
"10317.000107687 8.219601812309241 \n",
"10317.000087663 10.351928530673 \n",
"10317.000030578 8.6645383097221 \n",
"10317.000030579 8.6645383097221 \n",
"\n",
" Percent_of_calories_from_Protein \\\n",
"#SampleID \n",
"10317.000072291 22.881624780125065 \n",
"10317.000107687 20.77926653960884 \n",
"10317.000087663 19.42362991549346 \n",
"10317.000030578 12.109788919375113 \n",
"10317.000030579 12.109788919375113 \n",
"\n",
" Percent_of_calories_from_Saturated_Fat \n",
"#SampleID \n",
"10317.000072291 10.631030850565056 \n",
"10317.000107687 22.733751192146702 \n",
"10317.000087663 9.169527931007428 \n",
"10317.000030578 11.298838478082244 \n",
"10317.000030579 11.298838478082244 "
]
},
"execution_count": 357,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_percents_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting micro and macro nutrients\n",
"\n",
"NOTE: headers are augmented to include the unit of measurement (e.g., grams)."
]
},
{
"cell_type": "code",
"execution_count": 358,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" survey_id | \n",
" units | \n",
" valueType | \n",
" description_augmented | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 80.4870014190674 | \n",
" acesupot | \n",
" Acesulfame Potassium | \n",
" 80043f5209506497 | \n",
" mg | \n",
" Amount | \n",
" Acesulfame Potassium_in_mg | \n",
"
\n",
" \n",
" 1 | \n",
" 23.9946937142986 | \n",
" addsugar | \n",
" Added Sugars (by Available Carbohydrate) | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
" Added Sugars (by Available Carbohydrate)_in_g | \n",
"
\n",
" \n",
" 2 | \n",
" 22.221421156844 | \n",
" adsugtot | \n",
" Added Sugars (by Total Sugars) | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
" Added Sugars (by Total Sugars)_in_g | \n",
"
\n",
" \n",
" 3 | \n",
" 2.82355429149195 | \n",
" alanine | \n",
" Alanine | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
" Alanine_in_g | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" alcohol | \n",
" Alcohol | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
" Alcohol_in_g | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description \\\n",
"0 80.4870014190674 acesupot Acesulfame Potassium \n",
"1 23.9946937142986 addsugar Added Sugars (by Available Carbohydrate) \n",
"2 22.221421156844 adsugtot Added Sugars (by Total Sugars) \n",
"3 2.82355429149195 alanine Alanine \n",
"4 0.0 alcohol Alcohol \n",
"\n",
" survey_id units valueType \\\n",
"0 80043f5209506497 mg Amount \n",
"1 80043f5209506497 g Amount \n",
"2 80043f5209506497 g Amount \n",
"3 80043f5209506497 g Amount \n",
"4 80043f5209506497 g Amount \n",
"\n",
" description_augmented \n",
"0 Acesulfame Potassium_in_mg \n",
"1 Added Sugars (by Available Carbohydrate)_in_g \n",
"2 Added Sugars (by Total Sugars)_in_g \n",
"3 Alanine_in_g \n",
"4 Alcohol_in_g "
]
},
"execution_count": 358,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_micromacro.head()"
]
},
{
"cell_type": "code",
"execution_count": 359,
"metadata": {},
"outputs": [],
"source": [
"vios_coded_micromacro = vios_coded_micromacro.copy()\n",
"vios_coded_micromacro['description_augmented'] = [\"%s_in_%s\" % (desc, u)\n",
" for desc, u in zip(vios_coded_micromacro['description'],\n",
" vios_coded_micromacro['units'])]\n",
"vios_coded_micromacro_pivot = vios_coded_micromacro.pivot(index='survey_id', \n",
" columns='description_augmented', \n",
" values='amount')\n",
"vios_coded_micromacro_pivot = remap_to_sample_id(sanitize_columns(vios_coded_micromacro_pivot))\n",
"vios_coded_micromacro_pivot.to_csv('vioscreen_micromacro.tsv', sep='\\t', index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 360,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" 3_Methylhistidine_in_mg | \n",
" Acesulfame_Potassium_in_mg | \n",
" Added_Sugars__by_Available_Carbohydrate__in_g | \n",
" Added_Sugars__by_Total_Sugars__in_g | \n",
" Alanine_in_g | \n",
" Alcohol_in_g | \n",
" Alpha_Carotene__provitamin_A_carotenoid__in_mcg | \n",
" Alpha_Tocopherol_in_mg | \n",
" Animal_Protein_in_g | \n",
" ... | \n",
" Vitamin_D__calciferol__in_mcg | \n",
" Vitamin_D2__ergocalciferol__in_mcg | \n",
" Vitamin_D3__cholecalciferol__in_mcg | \n",
" Vitamin_D_in_IU | \n",
" Vitamin_E_in_IU | \n",
" Vitamin_K__phylloquinone__in_mcg | \n",
" Water_in_g | \n",
" Whole_Grains__ounce_equivalents__in_oz_eq | \n",
" Xylitol_in_g | \n",
" Zinc_in_mg | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 27.1747373814452 | \n",
" 0.0 | \n",
" 13.4562635816126 | \n",
" 12.4120058199281 | \n",
" 5.51489136698938 | \n",
" 12.3349310056407 | \n",
" 1138.36933677242 | \n",
" 28.5702428804122 | \n",
" 65.1852377337893 | \n",
" ... | \n",
" 12.4742957697532 | \n",
" 0.200216711868774 | \n",
" 12.2740790684938 | \n",
" 498.971830790127 | \n",
" 50.9459459997824 | \n",
" 224.194887651232 | \n",
" 2149.69964033154 | \n",
" 0.617644916526757 | \n",
" 0.0367491587143106 | \n",
" 12.8837308617062 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 28.2962962869096 | \n",
" 0.0 | \n",
" 12.5258816620258 | \n",
" 11.6483954938307 | \n",
" 6.42827878920708 | \n",
" 0.0 | \n",
" 3101.36641244233 | \n",
" 15.1735223541489 | \n",
" 94.5589348006414 | \n",
" ... | \n",
" 8.9346003949231 | \n",
" 0.0 | \n",
" 8.9346003949231 | \n",
" 357.384015796924 | \n",
" 22.6390968368538 | \n",
" 931.249354337616 | \n",
" 3629.797712835 | \n",
" 0.431386317945506 | \n",
" 0.0211558774590833 | \n",
" 14.6265771027999 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 18.2712649474405 | \n",
" 0.0 | \n",
" 20.7296489630817 | \n",
" 15.8732604976236 | \n",
" 3.50057925906138 | \n",
" 0.000514735905194579 | \n",
" 565.525633238059 | \n",
" 15.836220214478 | \n",
" 39.3703097824671 | \n",
" ... | \n",
" 2.22169212308429 | \n",
" 0.0 | \n",
" 2.22169212308429 | \n",
" 88.8676849233717 | \n",
" 23.6360169491142 | \n",
" 162.85388100445 | \n",
" 4291.30569341481 | \n",
" 0.0 | \n",
" 0.0236280034849236 | \n",
" 10.289681270179 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 2.60684169804397 | \n",
" 0.0 | \n",
" 25.4372404710912 | \n",
" 20.4762200586827 | \n",
" 2.14788575606249 | \n",
" 17.7944461190833 | \n",
" 2840.69044111298 | \n",
" 11.3451129336108 | \n",
" 16.6596165030072 | \n",
" ... | \n",
" 3.58194392826441 | \n",
" 1.31924199302752 | \n",
" 2.26270193523689 | \n",
" 143.277757130576 | \n",
" 16.909548815799 | \n",
" 201.226017515073 | \n",
" 3558.75827935317 | \n",
" 1.69406249866094 | \n",
" 0.0342438208502127 | \n",
" 8.10011550652904 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 2.60684169804397 | \n",
" 0.0 | \n",
" 25.4372404710912 | \n",
" 20.4762200586827 | \n",
" 2.14788575606249 | \n",
" 17.7944461190833 | \n",
" 2840.69044111298 | \n",
" 11.3451129336108 | \n",
" 16.6596165030072 | \n",
" ... | \n",
" 3.58194392826441 | \n",
" 1.31924199302752 | \n",
" 2.26270193523689 | \n",
" 143.277757130576 | \n",
" 16.909548815799 | \n",
" 201.226017515073 | \n",
" 3558.75827935317 | \n",
" 1.69406249866094 | \n",
" 0.0342438208502127 | \n",
" 8.10011550652904 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 194 columns
\n",
"
"
],
"text/plain": [
" survey_id 3_Methylhistidine_in_mg \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 27.1747373814452 \n",
"10317.000107687 000fc7ac57776579 28.2962962869096 \n",
"10317.000087663 0012cf765bb386ea 18.2712649474405 \n",
"10317.000030578 001b4c3e1c848f58 2.60684169804397 \n",
"10317.000030579 001b4c3e1c848f58 2.60684169804397 \n",
"\n",
" Acesulfame_Potassium_in_mg \\\n",
"#SampleID \n",
"10317.000072291 0.0 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 0.0 \n",
"10317.000030579 0.0 \n",
"\n",
" Added_Sugars__by_Available_Carbohydrate__in_g \\\n",
"#SampleID \n",
"10317.000072291 13.4562635816126 \n",
"10317.000107687 12.5258816620258 \n",
"10317.000087663 20.7296489630817 \n",
"10317.000030578 25.4372404710912 \n",
"10317.000030579 25.4372404710912 \n",
"\n",
" Added_Sugars__by_Total_Sugars__in_g Alanine_in_g \\\n",
"#SampleID \n",
"10317.000072291 12.4120058199281 5.51489136698938 \n",
"10317.000107687 11.6483954938307 6.42827878920708 \n",
"10317.000087663 15.8732604976236 3.50057925906138 \n",
"10317.000030578 20.4762200586827 2.14788575606249 \n",
"10317.000030579 20.4762200586827 2.14788575606249 \n",
"\n",
" Alcohol_in_g \\\n",
"#SampleID \n",
"10317.000072291 12.3349310056407 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.000514735905194579 \n",
"10317.000030578 17.7944461190833 \n",
"10317.000030579 17.7944461190833 \n",
"\n",
" Alpha_Carotene__provitamin_A_carotenoid__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 1138.36933677242 \n",
"10317.000107687 3101.36641244233 \n",
"10317.000087663 565.525633238059 \n",
"10317.000030578 2840.69044111298 \n",
"10317.000030579 2840.69044111298 \n",
"\n",
" Alpha_Tocopherol_in_mg Animal_Protein_in_g ... \\\n",
"#SampleID ... \n",
"10317.000072291 28.5702428804122 65.1852377337893 ... \n",
"10317.000107687 15.1735223541489 94.5589348006414 ... \n",
"10317.000087663 15.836220214478 39.3703097824671 ... \n",
"10317.000030578 11.3451129336108 16.6596165030072 ... \n",
"10317.000030579 11.3451129336108 16.6596165030072 ... \n",
"\n",
" Vitamin_D__calciferol__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 12.4742957697532 \n",
"10317.000107687 8.9346003949231 \n",
"10317.000087663 2.22169212308429 \n",
"10317.000030578 3.58194392826441 \n",
"10317.000030579 3.58194392826441 \n",
"\n",
" Vitamin_D2__ergocalciferol__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 0.200216711868774 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 1.31924199302752 \n",
"10317.000030579 1.31924199302752 \n",
"\n",
" Vitamin_D3__cholecalciferol__in_mcg Vitamin_D_in_IU \\\n",
"#SampleID \n",
"10317.000072291 12.2740790684938 498.971830790127 \n",
"10317.000107687 8.9346003949231 357.384015796924 \n",
"10317.000087663 2.22169212308429 88.8676849233717 \n",
"10317.000030578 2.26270193523689 143.277757130576 \n",
"10317.000030579 2.26270193523689 143.277757130576 \n",
"\n",
" Vitamin_E_in_IU Vitamin_K__phylloquinone__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 50.9459459997824 224.194887651232 \n",
"10317.000107687 22.6390968368538 931.249354337616 \n",
"10317.000087663 23.6360169491142 162.85388100445 \n",
"10317.000030578 16.909548815799 201.226017515073 \n",
"10317.000030579 16.909548815799 201.226017515073 \n",
"\n",
" Water_in_g Whole_Grains__ounce_equivalents__in_oz_eq \\\n",
"#SampleID \n",
"10317.000072291 2149.69964033154 0.617644916526757 \n",
"10317.000107687 3629.797712835 0.431386317945506 \n",
"10317.000087663 4291.30569341481 0.0 \n",
"10317.000030578 3558.75827935317 1.69406249866094 \n",
"10317.000030579 3558.75827935317 1.69406249866094 \n",
"\n",
" Xylitol_in_g Zinc_in_mg \n",
"#SampleID \n",
"10317.000072291 0.0367491587143106 12.8837308617062 \n",
"10317.000107687 0.0211558774590833 14.6265771027999 \n",
"10317.000087663 0.0236280034849236 10.289681270179 \n",
"10317.000030578 0.0342438208502127 8.10011550652904 \n",
"10317.000030579 0.0342438208502127 8.10011550652904 \n",
"\n",
"[5 rows x 194 columns]"
]
},
"execution_count": 360,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_micromacro_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting eating patterns\n",
"\n",
"NOTE: headers are augmented to include the time unit (e.g., per day)."
]
},
{
"cell_type": "code",
"execution_count": 361,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" survey_id | \n",
" units | \n",
" valueType | \n",
" code_augmented | \n",
"
\n",
" \n",
" \n",
" \n",
" 201 | \n",
" 6.83359985351562 | \n",
" ADDEDFATS | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
" ADDEDFATS_PerDay | \n",
"
\n",
" \n",
" 202 | \n",
" 0.0 | \n",
" ALCOHOLSERV | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
" ALCOHOLSERV_PerDay | \n",
"
\n",
" \n",
" 203 | \n",
" 1.24581536668154 | \n",
" ANIMALPROTEIN | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
" ANIMALPROTEIN_PerDay | \n",
"
\n",
" \n",
" 204 | \n",
" 1.8388000404998 | \n",
" CALCDAIRYSERV | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
" CALCDAIRYSERV_PerDay | \n",
"
\n",
" \n",
" 205 | \n",
" 3.4041772177448 | \n",
" CALCSERV | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
" CALCSERV_PerDay | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description survey_id \\\n",
"201 6.83359985351562 ADDEDFATS Eating Pattern 80043f5209506497 \n",
"202 0.0 ALCOHOLSERV Eating Pattern 80043f5209506497 \n",
"203 1.24581536668154 ANIMALPROTEIN Eating Pattern 80043f5209506497 \n",
"204 1.8388000404998 CALCDAIRYSERV Eating Pattern 80043f5209506497 \n",
"205 3.4041772177448 CALCSERV Eating Pattern 80043f5209506497 \n",
"\n",
" units valueType code_augmented \n",
"201 PerDay Amount ADDEDFATS_PerDay \n",
"202 PerDay Amount ALCOHOLSERV_PerDay \n",
"203 PerDay Amount ANIMALPROTEIN_PerDay \n",
"204 PerDay Amount CALCDAIRYSERV_PerDay \n",
"205 PerDay Amount CALCSERV_PerDay "
]
},
"execution_count": 361,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_eatingpattern.head()"
]
},
{
"cell_type": "code",
"execution_count": 362,
"metadata": {},
"outputs": [],
"source": [
"vios_coded_eatingpattern = vios_coded_eatingpattern.copy()\n",
"vios_coded_eatingpattern['code_augmented'] = ['%s_%s' % (c, u)\n",
" for c, u in zip(vios_coded_eatingpattern['code'],\n",
" vios_coded_eatingpattern['units'])]\n",
"vios_coded_eatingpattern_pivot = vios_coded_eatingpattern.pivot(index='survey_id', \n",
" columns='code_augmented', \n",
" values='amount')\n",
"vios_coded_eatingpattern_pivot = remap_to_sample_id(sanitize_columns(vios_coded_eatingpattern_pivot))\n",
"vios_coded_eatingpattern_pivot.to_csv('vioscreen_eatingpatterns.tsv', sep='\\t', index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 363,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" ADDEDFATS_PerDay | \n",
" ALCOHOLSERV_PerDay | \n",
" ANIMALPROTEIN_PerDay | \n",
" CALCDAIRYSERV_PerDay | \n",
" CALCSERV_PerDay | \n",
" FISHSERV_PerWeek | \n",
" FRIEDFISH_PerWeek | \n",
" FRTSUMM_PerDay | \n",
" GRAINSERV_PerDay | \n",
" JUICESERV_PerDay | \n",
" LOWFATDAIRYSERV_PerDay | \n",
" NOFRYFISHSERV_PerWeek | \n",
" NONFATDAIRY_PerDay | \n",
" PLANTPROTEIN_PerDay | \n",
" SALADSERV_PerDay | \n",
" SOYFOODS_PerDay | \n",
" VEGSUMM_PerDay | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 13.7111325807085 | \n",
" 0.811984630480205 | \n",
" 3.12011660969654 | \n",
" 0.588968275238422 | \n",
" 2.43930048364662 | \n",
" 0.757184681663774 | \n",
" 0.0 | \n",
" 3.9613478141107 | \n",
" 0.203613660210178 | \n",
" 1.56846226815002 | \n",
" 0.10680657461898 | \n",
" 0.757184681663774 | \n",
" 0.0 | \n",
" 0.788205997952043 | \n",
" 4.27983387202433 | \n",
" 0.019353452018679 | \n",
" 6.78067953556929 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 16.9556567945546 | \n",
" 0.0 | \n",
" 5.55031021924401 | \n",
" 1.02710044945619 | \n",
" 2.85550994853688 | \n",
" 0.429514730793156 | \n",
" 0.0 | \n",
" 1.09213853647856 | \n",
" 0.547190726293276 | \n",
" 0.0 | \n",
" 0.373823011166429 | \n",
" 0.429514730793156 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05764470002423 | \n",
" 0.0 | \n",
" 8.40219293535005 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 10.460175704561 | \n",
" 0.0 | \n",
" 1.75556821008173 | \n",
" 0.143241657294231 | \n",
" 1.7933502932502 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.27800895759504 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0412194728851318 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0961474784433025 | \n",
" 6.61006004091812 | \n",
" 0.202395007014275 | \n",
" 8.88691729763619 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 9.4877522015245 | \n",
" 1.18359554067103 | \n",
" 1.35898643631758 | \n",
" 0.508965186039879 | \n",
" 2.37677246201077 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 1.45998473791023 | \n",
" 1.76516303281359 | \n",
" 0.0 | \n",
" 0.10680657461898 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 0.712004397299192 | \n",
" 2.13061859428066 | \n",
" 0.450974494893012 | \n",
" 5.41913946629678 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 9.4877522015245 | \n",
" 1.18359554067103 | \n",
" 1.35898643631758 | \n",
" 0.508965186039879 | \n",
" 2.37677246201077 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 1.45998473791023 | \n",
" 1.76516303281359 | \n",
" 0.0 | \n",
" 0.10680657461898 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 0.712004397299192 | \n",
" 2.13061859428066 | \n",
" 0.450974494893012 | \n",
" 5.41913946629678 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id ADDEDFATS_PerDay ALCOHOLSERV_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 13.7111325807085 0.811984630480205 \n",
"10317.000107687 000fc7ac57776579 16.9556567945546 0.0 \n",
"10317.000087663 0012cf765bb386ea 10.460175704561 0.0 \n",
"10317.000030578 001b4c3e1c848f58 9.4877522015245 1.18359554067103 \n",
"10317.000030579 001b4c3e1c848f58 9.4877522015245 1.18359554067103 \n",
"\n",
" ANIMALPROTEIN_PerDay CALCDAIRYSERV_PerDay CALCSERV_PerDay \\\n",
"#SampleID \n",
"10317.000072291 3.12011660969654 0.588968275238422 2.43930048364662 \n",
"10317.000107687 5.55031021924401 1.02710044945619 2.85550994853688 \n",
"10317.000087663 1.75556821008173 0.143241657294231 1.7933502932502 \n",
"10317.000030578 1.35898643631758 0.508965186039879 2.37677246201077 \n",
"10317.000030579 1.35898643631758 0.508965186039879 2.37677246201077 \n",
"\n",
" FISHSERV_PerWeek FRIEDFISH_PerWeek FRTSUMM_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0.757184681663774 0.0 3.9613478141107 \n",
"10317.000107687 0.429514730793156 0.0 1.09213853647856 \n",
"10317.000087663 0.0 0.0 3.27800895759504 \n",
"10317.000030578 0.0330395946763966 0.0 1.45998473791023 \n",
"10317.000030579 0.0330395946763966 0.0 1.45998473791023 \n",
"\n",
" GRAINSERV_PerDay JUICESERV_PerDay LOWFATDAIRYSERV_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0.203613660210178 1.56846226815002 0.10680657461898 \n",
"10317.000107687 0.547190726293276 0.0 0.373823011166429 \n",
"10317.000087663 0.0 0.0 0.0412194728851318 \n",
"10317.000030578 1.76516303281359 0.0 0.10680657461898 \n",
"10317.000030579 1.76516303281359 0.0 0.10680657461898 \n",
"\n",
" NOFRYFISHSERV_PerWeek NONFATDAIRY_PerDay PLANTPROTEIN_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0.757184681663774 0.0 0.788205997952043 \n",
"10317.000107687 0.429514730793156 0.0 0.0 \n",
"10317.000087663 0.0 0.0 0.0961474784433025 \n",
"10317.000030578 0.0330395946763966 0.0 0.712004397299192 \n",
"10317.000030579 0.0330395946763966 0.0 0.712004397299192 \n",
"\n",
" SALADSERV_PerDay SOYFOODS_PerDay VEGSUMM_PerDay \n",
"#SampleID \n",
"10317.000072291 4.27983387202433 0.019353452018679 6.78067953556929 \n",
"10317.000107687 1.05764470002423 0.0 8.40219293535005 \n",
"10317.000087663 6.61006004091812 0.202395007014275 8.88691729763619 \n",
"10317.000030578 2.13061859428066 0.450974494893012 5.41913946629678 \n",
"10317.000030579 2.13061859428066 0.450974494893012 5.41913946629678 "
]
},
"execution_count": 363,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_eatingpattern_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting vioscreen scores\n",
"\n",
"NOTE: a \"bounds\" file is also produced here as vioscreen provides upper and lower limits."
]
},
{
"cell_type": "code",
"execution_count": 364,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" lowerLimit | \n",
" name | \n",
" score | \n",
" survey_id | \n",
" type | \n",
" upperLimit | \n",
"
\n",
" \n",
" \n",
" \n",
" 227 | \n",
" 0.0 | \n",
" Total Vegetables | \n",
" 2.3880099595267104 | \n",
" 80043f5209506497 | \n",
" TotalVegetables | \n",
" 5.0 | \n",
"
\n",
" \n",
" 228 | \n",
" 0.0 | \n",
" Greens and Beans | \n",
" 0.0 | \n",
" 80043f5209506497 | \n",
" GreensAndBeans | \n",
" 5.0 | \n",
"
\n",
" \n",
" 229 | \n",
" 0.0 | \n",
" Total Fruit | \n",
" 5.0 | \n",
" 80043f5209506497 | \n",
" TotalFruit | \n",
" 5.0 | \n",
"
\n",
" \n",
" 230 | \n",
" 0.0 | \n",
" Whole Fruit | \n",
" 5.0 | \n",
" 80043f5209506497 | \n",
" WholeFruit | \n",
" 5.0 | \n",
"
\n",
" \n",
" 231 | \n",
" 0.0 | \n",
" Whole Grains | \n",
" 5.6810242143311065 | \n",
" 80043f5209506497 | \n",
" WholeGrains | \n",
" 10.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" lowerLimit name score survey_id \\\n",
"227 0.0 Total Vegetables 2.3880099595267104 80043f5209506497 \n",
"228 0.0 Greens and Beans 0.0 80043f5209506497 \n",
"229 0.0 Total Fruit 5.0 80043f5209506497 \n",
"230 0.0 Whole Fruit 5.0 80043f5209506497 \n",
"231 0.0 Whole Grains 5.6810242143311065 80043f5209506497 \n",
"\n",
" type upperLimit \n",
"227 TotalVegetables 5.0 \n",
"228 GreensAndBeans 5.0 \n",
"229 TotalFruit 5.0 \n",
"230 WholeFruit 5.0 \n",
"231 WholeGrains 10.0 "
]
},
"execution_count": 364,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_no_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 366,
"metadata": {},
"outputs": [],
"source": [
"vios_no_data_pivot = vios_no_data.pivot(index='survey_id', columns='type', values='score')\n",
"vios_no_data_pivot = remap_to_sample_id(sanitize_columns(vios_no_data_pivot))\n",
"vios_no_data_pivot.to_csv('vioscreen_scores.tsv', sep='\\t', index=True, header=True)\n",
"\n",
"vios_no_data_pivot_desc = vios_no_data[['type', 'lowerLimit', 'upperLimit']]\n",
"vios_no_data_pivot_desc = vios_no_data_pivot_desc[~vios_no_data_pivot_desc.duplicated()]\n",
"vios_no_data_pivot_desc = sanitize_columns(vios_no_data_pivot_desc)\n",
"vios_no_data_pivot_desc.to_csv('vioscreen_scores_bounds.tsv', sep='\\t', index=False, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 367,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" Dairy | \n",
" EmptyCalories | \n",
" FattyAcids | \n",
" GreensAndBeans | \n",
" RefinedGrains | \n",
" SeafoodAndPlantProteins | \n",
" Sodium | \n",
" TotalFruit | \n",
" TotalProteins | \n",
" TotalScore | \n",
" TotalVegetables | \n",
" WholeFruit | \n",
" WholeGrains | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 2.773913251852978 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 2.9116395144107985 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 76.42011495192892 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 0.7345621856651521 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 3.4494368436822747 | \n",
" 10.414684587341299 | \n",
" 1.0945527410632858 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 1.4900690155742815 | \n",
" 5.0 | \n",
" 51.00709571319465 | \n",
" 5.0 | \n",
" 2.965681423347652 | \n",
" 1.5926711021858588 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 0.7657591447261929 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 70.76575914472619 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 3.744019504399623 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 2.8927585756303893 | \n",
" 5.0 | \n",
" 79.09795635014056 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 7.461178270110556 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 3.744019504399623 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 2.8927585756303893 | \n",
" 5.0 | \n",
" 79.09795635014056 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 7.461178270110556 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id Dairy EmptyCalories \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 2.773913251852978 20.0 \n",
"10317.000107687 000fc7ac57776579 3.4494368436822747 10.414684587341299 \n",
"10317.000087663 0012cf765bb386ea 0.7657591447261929 20.0 \n",
"10317.000030578 001b4c3e1c848f58 3.744019504399623 20.0 \n",
"10317.000030579 001b4c3e1c848f58 3.744019504399623 20.0 \n",
"\n",
" FattyAcids GreensAndBeans RefinedGrains \\\n",
"#SampleID \n",
"10317.000072291 10.0 5.0 10.0 \n",
"10317.000107687 1.0945527410632858 5.0 10.0 \n",
"10317.000087663 10.0 5.0 10.0 \n",
"10317.000030578 10.0 5.0 10.0 \n",
"10317.000030579 10.0 5.0 10.0 \n",
"\n",
" SeafoodAndPlantProteins Sodium \\\n",
"#SampleID \n",
"10317.000072291 5.0 2.9116395144107985 \n",
"10317.000107687 5.0 0.0 \n",
"10317.000087663 5.0 0.0 \n",
"10317.000030578 5.0 0.0 \n",
"10317.000030579 5.0 0.0 \n",
"\n",
" TotalFruit TotalProteins TotalScore \\\n",
"#SampleID \n",
"10317.000072291 5.0 5.0 76.42011495192892 \n",
"10317.000107687 1.4900690155742815 5.0 51.00709571319465 \n",
"10317.000087663 5.0 5.0 70.76575914472619 \n",
"10317.000030578 2.8927585756303893 5.0 79.09795635014056 \n",
"10317.000030579 2.8927585756303893 5.0 79.09795635014056 \n",
"\n",
" TotalVegetables WholeFruit WholeGrains \n",
"#SampleID \n",
"10317.000072291 5.0 5.0 0.7345621856651521 \n",
"10317.000107687 5.0 2.965681423347652 1.5926711021858588 \n",
"10317.000087663 5.0 5.0 0.0 \n",
"10317.000030578 5.0 5.0 7.461178270110556 \n",
"10317.000030579 5.0 5.0 7.461178270110556 "
]
},
"execution_count": 367,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_no_data_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Format frequency information on the foods consumed\n",
"\n",
"NOTE: two entries are unexpectedly duplicated and are removed."
]
},
{
"cell_type": "code",
"execution_count": 368,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" consumptionAdjustment | \n",
" created | \n",
" data | \n",
" description | \n",
" foodCode | \n",
" foodGroup | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" survey_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 218 | \n",
" 3.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Enchiladas and tamales | \n",
" 40076 | \n",
" Asian, Mexican and Soy Foods | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 219 | \n",
" 2.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Fresh garlic, including in cooking | \n",
" 30043 | \n",
" Sauces and Seasonings | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 cloves | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 220 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Lard, bacon fat or meat drippings (Fat used in... | \n",
" 10005 | \n",
" Oil or Fat Used in Cooking | \n",
" 1460.0 | \n",
" 4 per day | \n",
" 2 teaspoons | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 221 | \n",
" 1.5 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Meal replacement drinks and shakes such as Sli... | \n",
" 90015 | \n",
" Meal Replacement Drinks, Sports and Granola Bars | \n",
" 365.0 | \n",
" 1 per day | \n",
" 1 1/2 cups (12 oz) | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 222 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Other candy, such as Lifesavers, licorice and ... | \n",
" 80015 | \n",
" Sweets | \n",
" 104.0 | \n",
" 2 per week | \n",
" 4 pieces hard candy, 10 Life Savers<SUP>®<... | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount consumptionAdjustment created \\\n",
"218 3.0 1.0 2017-07-29T06:56:24.553 \n",
"219 2.0 1.0 2017-07-29T06:56:24.553 \n",
"220 1.0 1.0 2017-07-29T06:56:24.553 \n",
"221 1.5 1.0 2017-07-29T06:56:24.553 \n",
"222 1.0 1.0 2017-07-29T06:56:24.553 \n",
"\n",
" data \\\n",
"218 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"219 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"220 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"221 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"222 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"\n",
" description foodCode \\\n",
"218 Enchiladas and tamales 40076 \n",
"219 Fresh garlic, including in cooking 30043 \n",
"220 Lard, bacon fat or meat drippings (Fat used in... 10005 \n",
"221 Meal replacement drinks and shakes such as Sli... 90015 \n",
"222 Other candy, such as Lifesavers, licorice and ... 80015 \n",
"\n",
" foodGroup frequency \\\n",
"218 Asian, Mexican and Soy Foods 286.0 \n",
"219 Sauces and Seasonings 365.0 \n",
"220 Oil or Fat Used in Cooking 1460.0 \n",
"221 Meal Replacement Drinks, Sports and Granola Bars 365.0 \n",
"222 Sweets 104.0 \n",
"\n",
" servingFrequencyText servingSizeText \\\n",
"218 5-6 per week 3 enchiladas or tamales \n",
"219 1 per day 2 cloves \n",
"220 4 per day 2 teaspoons \n",
"221 1 per day 1 1/2 cups (12 oz) \n",
"222 2 per week 4 pieces hard candy, 10 Life Savers®<... \n",
"\n",
" survey_id \n",
"218 80043f5209506497 \n",
"219 80043f5209506497 \n",
"220 80043f5209506497 \n",
"221 80043f5209506497 \n",
"222 80043f5209506497 "
]
},
"execution_count": 368,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_with_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"847b5821f05f211f\n",
"d95b890fe63c21d0\n"
]
}
],
"source": [
"duplicated = []\n",
"for i, grp in vios_with_data.groupby('survey_id'):\n",
" if len(grp.description) != len(grp.description.unique()):\n",
" print(i)\n",
" for d, dup in grp.groupby('description'):\n",
" if len(dup) > 1:\n",
" duplicated.append(dup)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" consumptionAdjustment | \n",
" created | \n",
" data | \n",
" description | \n",
" foodCode | \n",
" foodGroup | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" survey_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 1016257 | \n",
" 0.5 | \n",
" 1.0 | \n",
" 2018-05-09T15:52:12.827 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/2 cup (4 oz) | \n",
" 847b5821f05f211f | \n",
"
\n",
" \n",
" 1016259 | \n",
" 0.666 | \n",
" 0.5 | \n",
" 2018-05-09T15:52:12.827 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2/3 cup | \n",
" 847b5821f05f211f | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount consumptionAdjustment created \\\n",
"1016257 0.5 1.0 2018-05-09T15:52:12.827 \n",
"1016259 0.666 0.5 2018-05-09T15:52:12.827 \n",
"\n",
" data \\\n",
"1016257 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"1016259 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"\n",
" description foodCode \\\n",
"1016257 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"1016259 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"\n",
" foodGroup frequency servingFrequencyText servingSizeText \\\n",
"1016257 Cereals and Breads 52.0 1 per week 1/2 cup (4 oz) \n",
"1016259 Cereals and Breads 52.0 1 per week 2/3 cup \n",
"\n",
" survey_id \n",
"1016257 847b5821f05f211f \n",
"1016259 847b5821f05f211f "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicated[0]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" consumptionAdjustment | \n",
" created | \n",
" data | \n",
" description | \n",
" foodCode | \n",
" foodGroup | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" survey_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 459488 | \n",
" 0.5 | \n",
" 1.0 | \n",
" 2017-07-29T02:28:12.49 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 104.0 | \n",
" 2-6 per week | \n",
" 1/2 cup (4 oz) | \n",
" d95b890fe63c21d0 | \n",
"
\n",
" \n",
" 459489 | \n",
" 0.666 | \n",
" 1.0 | \n",
" 2017-07-29T02:28:12.49 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 104.0 | \n",
" 2-6 per week | \n",
" 2/3 cup | \n",
" d95b890fe63c21d0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount consumptionAdjustment created \\\n",
"459488 0.5 1.0 2017-07-29T02:28:12.49 \n",
"459489 0.666 1.0 2017-07-29T02:28:12.49 \n",
"\n",
" data \\\n",
"459488 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"459489 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"\n",
" description foodCode \\\n",
"459488 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"459489 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"\n",
" foodGroup frequency servingFrequencyText servingSizeText \\\n",
"459488 Cereals and Breads 104.0 2-6 per week 1/2 cup (4 oz) \n",
"459489 Cereals and Breads 104.0 2-6 per week 2/3 cup \n",
"\n",
" survey_id \n",
"459488 d95b890fe63c21d0 \n",
"459489 d95b890fe63c21d0 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicated[1]"
]
},
{
"cell_type": "code",
"execution_count": 369,
"metadata": {},
"outputs": [],
"source": [
"# manually ignoring the duplicated index values above,\n",
"# otherwise would need to filter by grouped survey ids and that seems\n",
"# excessive for such a small impact (and likely bug on vioscreens side)\n",
"vios_with_data_nodup = vios_with_data.loc[[i for i in vios_with_data.index if i not in [1016259, 459489]]]\n",
"\n",
"# \"frequency\" is almost certainly the number of instances estimated per year.\n",
"# For example, when someone consumes tea once per day, the frequency is 365 whereas\n",
"# once per week it is 52. \n",
"vios_with_data_nodup_pivot = vios_with_data_nodup.pivot(index='survey_id', \n",
" columns='description', \n",
" values='frequency').fillna(0.0)\n",
"\n",
"vios_with_data_nodup_pivot = remap_to_sample_id(sanitize_columns(vios_with_data_nodup_pivot))\n",
"vios_with_data_nodup_pivot.to_csv('vioscreen_foods_consumed_frequency_units_per_year.tsv', sep='\\t',\n",
" index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 370,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat | \n",
" All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking | \n",
" All_other_fruits | \n",
" All_other_lunch_meat_such_as_bologna__salami_and_Spam | \n",
" All_tea | \n",
" Apples__applesauce_and_pears | \n",
" Apricots___dried | \n",
" Apricots___fresh_or_canned | \n",
" Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai | \n",
" ... | \n",
" White_Rice | \n",
" White_breads__including_bagels__rolls_and_English_muffins | \n",
" White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod | \n",
" White_or_rosé_wine | \n",
" Whole_grain_breads__including_bagels_and_rolls | \n",
" Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ | \n",
" Whole_kernel_grains_such_as_brown_rice | \n",
" Winter_squash_such_as_acorn__butternut_and_pumpkin | \n",
" Yams_and_sweet_potatoes | \n",
" Yogurt__all_types_except_frozen | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 0 | \n",
" 182.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 52.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 52.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 28.0 | \n",
" 28.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 52.0 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 1642.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 182.0 | \n",
" 0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 182.0 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 182.0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 912.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 182.0 | \n",
" 0 | \n",
" 12.0 | \n",
" 24.0 | \n",
" 182.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 28.0 | \n",
" 0 | \n",
" 52.0 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 182.0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 912.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 182.0 | \n",
" 0 | \n",
" 12.0 | \n",
" 24.0 | \n",
" 182.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 28.0 | \n",
" 0 | \n",
" 52.0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 260 columns
\n",
"
"
],
"text/plain": [
" survey_id \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c \n",
"10317.000107687 000fc7ac57776579 \n",
"10317.000087663 0012cf765bb386ea \n",
"10317.000030578 001b4c3e1c848f58 \n",
"10317.000030579 001b4c3e1c848f58 \n",
"\n",
" All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 52.0 \n",
"10317.000030578 182.0 \n",
"10317.000030579 182.0 \n",
"\n",
" All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking \\\n",
"#SampleID \n",
"10317.000072291 182.0 \n",
"10317.000107687 104.0 \n",
"10317.000087663 0 \n",
"10317.000030578 104.0 \n",
"10317.000030579 104.0 \n",
"\n",
" All_other_fruits \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" All_other_lunch_meat_such_as_bologna__salami_and_Spam All_tea \\\n",
"#SampleID \n",
"10317.000072291 0 0 \n",
"10317.000107687 0 1642.0 \n",
"10317.000087663 0 0 \n",
"10317.000030578 0 912.0 \n",
"10317.000030579 0 912.0 \n",
"\n",
" Apples__applesauce_and_pears Apricots___dried \\\n",
"#SampleID \n",
"10317.000072291 52.0 182.0 \n",
"10317.000107687 52.0 0 \n",
"10317.000087663 52.0 0 \n",
"10317.000030578 182.0 0 \n",
"10317.000030579 182.0 0 \n",
"\n",
" Apricots___fresh_or_canned \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" ... White_Rice \\\n",
"#SampleID ... \n",
"10317.000072291 ... 0 \n",
"10317.000107687 ... 182.0 \n",
"10317.000087663 ... 0 \n",
"10317.000030578 ... 182.0 \n",
"10317.000030579 ... 182.0 \n",
"\n",
" White_breads__including_bagels__rolls_and_English_muffins \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 104.0 \n",
"10317.000087663 0 \n",
"10317.000030578 12.0 \n",
"10317.000030579 12.0 \n",
"\n",
" White_or_rosé_wine \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 24.0 \n",
"10317.000030579 24.0 \n",
"\n",
" Whole_grain_breads__including_bagels_and_rolls \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 182.0 \n",
"10317.000030579 182.0 \n",
"\n",
" Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ \\\n",
"#SampleID \n",
"10317.000072291 28.0 \n",
"10317.000107687 104.0 \n",
"10317.000087663 0 \n",
"10317.000030578 182.0 \n",
"10317.000030579 182.0 \n",
"\n",
" Whole_kernel_grains_such_as_brown_rice \\\n",
"#SampleID \n",
"10317.000072291 28.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" Winter_squash_such_as_acorn__butternut_and_pumpkin \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 28.0 \n",
"10317.000030579 28.0 \n",
"\n",
" Yams_and_sweet_potatoes Yogurt__all_types_except_frozen \n",
"#SampleID \n",
"10317.000072291 0 52.0 \n",
"10317.000107687 0 182.0 \n",
"10317.000087663 0 0 \n",
"10317.000030578 0 52.0 \n",
"10317.000030579 0 52.0 \n",
"\n",
"[5 rows x 260 columns]"
]
},
"execution_count": 370,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_with_data_nodup_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting raw food information\n",
"\n",
"This resulting DataFrame is massive at 39M rows at the time of creating this notebook. Each row represents a micro or macronutrients of each food in its corresponding serving size that the person consumed.\n",
"\n",
"Due to the size, this frame is written out compressed (gzip). We are also not remapping to sample ID here as it would replicate a lot of data in what is already a pretty verbose structure.\n",
"\n",
"**WARNING: the first cell will run for 5-10 minutes**"
]
},
{
"cell_type": "code",
"execution_count": 239,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"434.7384750843048\n"
]
}
],
"source": [
"# an example of loading the data and what the contents look like.\n",
"# The \"code\" values *should* correspond to the non-null code values\n",
"# in the full raw file\n",
"\n",
"# WARNING: this will run for 5-10 minutes\n",
"items = []\n",
"parsed = {}\n",
"start = time.time()\n",
"for _, row in vios_with_data.iterrows(): \n",
" # we're using a memoization pattern to minimize parsing JSON. \n",
" # Of the ~250,000 rows in vios_with_data, only about 50,000 have a unique \n",
" # \"data\" JSON object. Parsing JSON is relatively expensive and this memoization\n",
" # shaves a pretty large amount of time off.\n",
" datablock = row['data']\n",
" if datablock in parsed:\n",
" data_parsed = parsed[datablock].copy()\n",
" else:\n",
" data_parsed = pd.read_json(datablock.replace(\"'\", '\"'), dtype=False, \n",
" convert_axes=False, convert_dates=False)\n",
" \n",
" parsed[datablock] = data_parsed\n",
" \n",
" # make sure relevant information about the survey etc are retained\n",
" # some is probably extraneous\n",
" for c in ['survey_id', \n",
" 'description', \n",
" 'frequency', \n",
" 'servingFrequencyText', \n",
" 'servingSizeText', \n",
" 'foodCode']:\n",
" data_parsed[c] = row[c]\n",
" items.append(data_parsed)\n",
"meal_data = pd.concat(items, ignore_index=True)\n",
"meal_data.set_index('survey_id', inplace=True)\n",
"print(time.time() - start) "
]
},
{
"cell_type": "code",
"execution_count": 240,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" units | \n",
" valueType | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" foodCode | \n",
"
\n",
" \n",
" survey_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 80043f5209506497 | \n",
" 0.000000 | \n",
" acesupot | \n",
" Enchiladas and tamales | \n",
" mg | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" 0.000000 | \n",
" addsugar | \n",
" Enchiladas and tamales | \n",
" g | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" 0.000000 | \n",
" adsugtot | \n",
" Enchiladas and tamales | \n",
" g | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" 1.905583 | \n",
" alanine | \n",
" Enchiladas and tamales | \n",
" g | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" 0.000000 | \n",
" alcohol | \n",
" Enchiladas and tamales | \n",
" g | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description units valueType \\\n",
"survey_id \n",
"80043f5209506497 0.000000 acesupot Enchiladas and tamales mg Amount \n",
"80043f5209506497 0.000000 addsugar Enchiladas and tamales g Amount \n",
"80043f5209506497 0.000000 adsugtot Enchiladas and tamales g Amount \n",
"80043f5209506497 1.905583 alanine Enchiladas and tamales g Amount \n",
"80043f5209506497 0.000000 alcohol Enchiladas and tamales g Amount \n",
"\n",
" frequency servingFrequencyText servingSizeText \\\n",
"survey_id \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"\n",
" foodCode \n",
"survey_id \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 "
]
},
"execution_count": 240,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"meal_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {},
"outputs": [],
"source": [
"meal_data.to_csv('vioscreen_expanded_meal_data.tsv.gz', sep='\\t', index=True, header=True,\n",
" compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": 246,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(39486728, 9)"
]
},
"execution_count": 246,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"meal_data.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Round trip test\n",
"\n",
"Note the codes in the meal data do not have a 1-1 correspondence with the micromacro frame. In that frame, we used the description field which is more human readable than the code.\n",
"\n",
"It also appears that some codes like GLAC are not obviously represented in the micromacro frame. Similarly, there are some entities like animal protein that do not appear in the meal data frame so that may be inferred by vioscreen elsewhere. "
]
},
{
"cell_type": "code",
"execution_count": 372,
"metadata": {},
"outputs": [],
"source": [
"test_survey = '001b4c3e1c848f58'\n",
"test = meal_data.loc[test_survey]"
]
},
{
"cell_type": "code",
"execution_count": 378,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" units | \n",
" valueType | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" foodCode | \n",
"
\n",
" \n",
" survey_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" All other cheese, such as American, cheddar or... | \n",
" unit | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 1 slice (1 oz), 1/4 cup shredded, 2 tablespoon... | \n",
" 70005 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.910150 | \n",
" GLAC | \n",
" Apples, applesauce and pears | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 apple or pear, 1/4 cup | \n",
" 20001 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Avocado and guacamole | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/2 avocado, 1/2 cup | \n",
" 30006 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 3.096284 | \n",
" GLAC | \n",
" Bananas | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 banana | \n",
" 20002 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Beef, pork, ham and lamb - with fat | \n",
" unit | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 3 slices, 2 chops, small steak (4-5 oz) | \n",
" 40002 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 3.568904 | \n",
" GLAC | \n",
" Beer (all types) | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 day per week | \n",
" 16 oz (1 pint) | \n",
" 90002 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Broccoli | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/4 cup | \n",
" 30012 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.437427 | \n",
" GLAC | \n",
" Burritos, tacos, tostadas and quesadillas | \n",
" unit | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 1 taco or tostada, 1 small burrito, 1 quesadilla | \n",
" 40202 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Butter (Cereals and Breads) | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 4 teaspoons (4 pats) | \n",
" 10004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.930636 | \n",
" GLAC | \n",
" Carrots - cooked | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 cup | \n",
" 30041 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.164987 | \n",
" GLAC | \n",
" Carrots - raw | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 carrot, 4 baby carrots, 1/4 cup | \n",
" 30040 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Cauliflower, cabbage and Brussels sprouts | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/4 cup | \n",
" 30018 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.960348 | \n",
" GLAC | \n",
" Chocolate, candy bars, and toffee | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 regular bar, 8-10 bite sized-pieces (1.5 oz) | \n",
" 80008 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Coffee (not lattes or mochas) | \n",
" unit | \n",
" Index | \n",
" 912.0 | \n",
" 2-3 per day | \n",
" 1 small cup (6 oz) | \n",
" 90008 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Coleslaw | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 3/4 cup | \n",
" 30017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 2.233787 | \n",
" GLAC | \n",
" Cookies and cakes - regular | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2 medium cookies, 1 large cookie, 1 small piec... | \n",
" 80005 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 4.057757 | \n",
" GLAC | \n",
" Whole grain breads, including bagels and rolls | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 2 slices bread, 1 medium roll, 1 English muffi... | \n",
" 120413 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Eggs | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 2 eggs | \n",
" 50004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Fresh garlic, including in cooking | \n",
" unit | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 1 clove | \n",
" 30043 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.544489 | \n",
" GLAC | \n",
" Fresh tomatoes | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 4 slices, 1 medium tomato | \n",
" 30008 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.851028 | \n",
" GLAC | \n",
" Grapes, fresh | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 8-10 grapes, 1/2 cup | \n",
" 20024 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Green or string beans | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/4 cup | \n",
" 30001 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Green peppers and green chilies, cooked | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/8 cup | \n",
" 30054 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Green salad (Lettuce or spinach) | \n",
" unit | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 1 cup (medium bowl) | \n",
" 30021 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.711497 | \n",
" GLAC | \n",
" Latte (whole milk), mocha or hot chocolate | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 regular cup (8-10 oz) | \n",
" 90007 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Lunch meats such as ham, turkey and lowfat bol... | \n",
" unit | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 2 slices (2 oz) | \n",
" 40028 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.636527 | \n",
" GLAC | \n",
" Muffins, scones, croissants and biscuits | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" Small muffin, small croissant, small biscuit | \n",
" 60028 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Oil, canola (Fat used in cooking) | \n",
" unit | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10011 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Oil, olive (Cereals and Breads) | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 4 teaspoons (4 pats) | \n",
" 10010 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Oil, other such as corn, soybean, safflower or... | \n",
" unit | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10007 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Red peppers and red chilies, cooked | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/8 cup | \n",
" 30057 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Red peppers and red chilies, raw | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/4 pepper, 4-6 strips, 1/4 cup | \n",
" 30056 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Red Wine | \n",
" unit | \n",
" Index | \n",
" 104.0 | \n",
" 2 days per week | \n",
" 4 oz (about 1/8th bottle) | \n",
" 90018 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.702932 | \n",
" GLAC | \n",
" Regular potato, tortilla chips, corn chips and... | \n",
" unit | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 2 handfuls, 1 small bag (1 oz) | \n",
" 60031 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.801028 | \n",
" GLAC | \n",
" Spaghetti, lasagna and other pasta with tomato... | \n",
" unit | \n",
" Index | \n",
" 12.0 | \n",
" 1 per month | \n",
" 1 cup (medium bowl) | \n",
" 40017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.252476 | \n",
" GLAC | \n",
" Stew, pot pie, curries and casseroles with mea... | \n",
" unit | \n",
" Index | \n",
" 12.0 | \n",
" 1 per month | \n",
" 1 cup (medium bowl) | \n",
" 40004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Summer squash and zucchini | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/4 cup | \n",
" 30046 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" All tea | \n",
" unit | \n",
" Index | \n",
" 912.0 | \n",
" 2-3 per day | \n",
" 1 large glass (12 oz) | \n",
" 90009 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.538358 | \n",
" GLAC | \n",
" Vegetable, minestrone and tomato soup | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 1/2 cups (large bowl) | \n",
" 40033 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Water (tap or bottled) | \n",
" unit | \n",
" Index | \n",
" 1642.0 | \n",
" 4-5 per day | \n",
" 1 regular glass (8 oz) | \n",
" 90017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" White fish (broiled or baked) such as sole, ha... | \n",
" unit | \n",
" Index | \n",
" 12.0 | \n",
" 1 per month | \n",
" 1 medium piece (4-5 oz) | \n",
" 40073 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" White or rosé wine | \n",
" unit | \n",
" Index | \n",
" 24.0 | \n",
" 2-3 days per month | \n",
" 6 oz (about 1/6th bottle) | \n",
" 90019 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.665620 | \n",
" GLAC | \n",
" Winter squash such as acorn, butternut and pum... | \n",
" unit | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 3/4 cup | \n",
" 30047 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.336993 | \n",
" GLAC | \n",
" Yogurt, all types except frozen | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 3/4 (6 oz) (regular container) | \n",
" 70017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Butter (Fat used in cooking) | \n",
" unit | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Oil, olive (Fat used in cooking) | \n",
" unit | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10010 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Oil, olive (Fats used on vegetables) | \n",
" unit | \n",
" Index | \n",
" 730.0 | \n",
" 2 per day | \n",
" 2 teaspoons | \n",
" 10010 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Salt | \n",
" unit | \n",
" Index | \n",
" 1095.0 | \n",
" 3 per day | \n",
" 21-40 shakes (1/4 teaspoon) | \n",
" 10100 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Veggie soy or tofu burgers or ground meat subs... | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 burger or 3 oz | \n",
" 120191 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Tofu | \n",
" unit | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 4-5 pieces (4 oz) | \n",
" 120206 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Tempeh | \n",
" unit | \n",
" Index | \n",
" 24.0 | \n",
" 1-3 per month | \n",
" 3 pieces (3 oz) | \n",
" 120207 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Soy sauce, tamari, teriyaki sauce, Szechwan sa... | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2 teaspoons | \n",
" 120198 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.391782 | \n",
" GLAC | \n",
" Corn tortillas | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2 (6\" diameter) | \n",
" 120123 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 7.016067 | \n",
" GLAC | \n",
" White Rice | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 cup | \n",
" 120414 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 0.000000 | \n",
" GLAC | \n",
" Cooked greens, such as spinach, swiss chard an... | \n",
" unit | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 3/4 cup | \n",
" 120438 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 3.030604 | \n",
" GLAC | \n",
" Whole grain breads, including bagels and rolls... | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 2 slices bread, 1 medium roll, 1 English muffi... | \n",
" 120412 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 5.485402 | \n",
" GLAC | \n",
" Cooked whole grain cereals | \n",
" unit | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 1/2 cup (small bowl) | \n",
" 120410 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 3.923203 | \n",
" GLAC | \n",
" All other beans such as baked beans, lima bean... | \n",
" unit | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 3/4 cup | \n",
" 30044 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 1.430008 | \n",
" GLAC | \n",
" Soy milk, fortified (Milk in cooked cereal) | \n",
" unit | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 2/3 cup | \n",
" 120560 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" 4.556126 | \n",
" GLAC | \n",
" Rice milk (Milk in cooked cereal) | \n",
" unit | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 2/3 cup | \n",
" 70018 | \n",
"
\n",
" \n",
"
\n",
"
64 rows × 9 columns
\n",
"
"
],
"text/plain": [
" amount code \\\n",
"survey_id \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 1.910150 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 3.096284 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 3.568904 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 1.437427 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.930636 GLAC \n",
"001b4c3e1c848f58 0.164987 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 1.960348 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 2.233787 GLAC \n",
"001b4c3e1c848f58 4.057757 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.544489 GLAC \n",
"001b4c3e1c848f58 0.851028 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.711497 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 1.636527 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"... ... ... \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.702932 GLAC \n",
"001b4c3e1c848f58 0.801028 GLAC \n",
"001b4c3e1c848f58 0.252476 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 1.538358 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.665620 GLAC \n",
"001b4c3e1c848f58 1.336993 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 1.391782 GLAC \n",
"001b4c3e1c848f58 7.016067 GLAC \n",
"001b4c3e1c848f58 0.000000 GLAC \n",
"001b4c3e1c848f58 3.030604 GLAC \n",
"001b4c3e1c848f58 5.485402 GLAC \n",
"001b4c3e1c848f58 3.923203 GLAC \n",
"001b4c3e1c848f58 1.430008 GLAC \n",
"001b4c3e1c848f58 4.556126 GLAC \n",
"\n",
" description units \\\n",
"survey_id \n",
"001b4c3e1c848f58 All other cheese, such as American, cheddar or... unit \n",
"001b4c3e1c848f58 Apples, applesauce and pears unit \n",
"001b4c3e1c848f58 Avocado and guacamole unit \n",
"001b4c3e1c848f58 Bananas unit \n",
"001b4c3e1c848f58 Beef, pork, ham and lamb - with fat unit \n",
"001b4c3e1c848f58 Beer (all types) unit \n",
"001b4c3e1c848f58 Broccoli unit \n",
"001b4c3e1c848f58 Burritos, tacos, tostadas and quesadillas unit \n",
"001b4c3e1c848f58 Butter (Cereals and Breads) unit \n",
"001b4c3e1c848f58 Carrots - cooked unit \n",
"001b4c3e1c848f58 Carrots - raw unit \n",
"001b4c3e1c848f58 Cauliflower, cabbage and Brussels sprouts unit \n",
"001b4c3e1c848f58 Chocolate, candy bars, and toffee unit \n",
"001b4c3e1c848f58 Coffee (not lattes or mochas) unit \n",
"001b4c3e1c848f58 Coleslaw unit \n",
"001b4c3e1c848f58 Cookies and cakes - regular unit \n",
"001b4c3e1c848f58 Whole grain breads, including bagels and rolls unit \n",
"001b4c3e1c848f58 Eggs unit \n",
"001b4c3e1c848f58 Fresh garlic, including in cooking unit \n",
"001b4c3e1c848f58 Fresh tomatoes unit \n",
"001b4c3e1c848f58 Grapes, fresh unit \n",
"001b4c3e1c848f58 Green or string beans unit \n",
"001b4c3e1c848f58 Green peppers and green chilies, cooked unit \n",
"001b4c3e1c848f58 Green salad (Lettuce or spinach) unit \n",
"001b4c3e1c848f58 Latte (whole milk), mocha or hot chocolate unit \n",
"001b4c3e1c848f58 Lunch meats such as ham, turkey and lowfat bol... unit \n",
"001b4c3e1c848f58 Muffins, scones, croissants and biscuits unit \n",
"001b4c3e1c848f58 Oil, canola (Fat used in cooking) unit \n",
"001b4c3e1c848f58 Oil, olive (Cereals and Breads) unit \n",
"001b4c3e1c848f58 Oil, other such as corn, soybean, safflower or... unit \n",
"... ... ... \n",
"001b4c3e1c848f58 Red peppers and red chilies, cooked unit \n",
"001b4c3e1c848f58 Red peppers and red chilies, raw unit \n",
"001b4c3e1c848f58 Red Wine unit \n",
"001b4c3e1c848f58 Regular potato, tortilla chips, corn chips and... unit \n",
"001b4c3e1c848f58 Spaghetti, lasagna and other pasta with tomato... unit \n",
"001b4c3e1c848f58 Stew, pot pie, curries and casseroles with mea... unit \n",
"001b4c3e1c848f58 Summer squash and zucchini unit \n",
"001b4c3e1c848f58 All tea unit \n",
"001b4c3e1c848f58 Vegetable, minestrone and tomato soup unit \n",
"001b4c3e1c848f58 Water (tap or bottled) unit \n",
"001b4c3e1c848f58 White fish (broiled or baked) such as sole, ha... unit \n",
"001b4c3e1c848f58 White or rosé wine unit \n",
"001b4c3e1c848f58 Winter squash such as acorn, butternut and pum... unit \n",
"001b4c3e1c848f58 Yogurt, all types except frozen unit \n",
"001b4c3e1c848f58 Butter (Fat used in cooking) unit \n",
"001b4c3e1c848f58 Oil, olive (Fat used in cooking) unit \n",
"001b4c3e1c848f58 Oil, olive (Fats used on vegetables) unit \n",
"001b4c3e1c848f58 Salt unit \n",
"001b4c3e1c848f58 Veggie soy or tofu burgers or ground meat subs... unit \n",
"001b4c3e1c848f58 Tofu unit \n",
"001b4c3e1c848f58 Tempeh unit \n",
"001b4c3e1c848f58 Soy sauce, tamari, teriyaki sauce, Szechwan sa... unit \n",
"001b4c3e1c848f58 Corn tortillas unit \n",
"001b4c3e1c848f58 White Rice unit \n",
"001b4c3e1c848f58 Cooked greens, such as spinach, swiss chard an... unit \n",
"001b4c3e1c848f58 Whole grain breads, including bagels and rolls... unit \n",
"001b4c3e1c848f58 Cooked whole grain cereals unit \n",
"001b4c3e1c848f58 All other beans such as baked beans, lima bean... unit \n",
"001b4c3e1c848f58 Soy milk, fortified (Milk in cooked cereal) unit \n",
"001b4c3e1c848f58 Rice milk (Milk in cooked cereal) unit \n",
"\n",
" valueType frequency servingFrequencyText \\\n",
"survey_id \n",
"001b4c3e1c848f58 Index 104.0 2 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 28.0 2-3 per month \n",
"001b4c3e1c848f58 Index 52.0 1 day per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 104.0 2 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 912.0 2-3 per day \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 104.0 2 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 286.0 5-6 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 28.0 2-3 per month \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 365.0 1 per day \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 365.0 1 per day \n",
"... ... ... ... \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 104.0 2 days per week \n",
"001b4c3e1c848f58 Index 28.0 2-3 per month \n",
"001b4c3e1c848f58 Index 12.0 1 per month \n",
"001b4c3e1c848f58 Index 12.0 1 per month \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 912.0 2-3 per day \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 1642.0 4-5 per day \n",
"001b4c3e1c848f58 Index 12.0 1 per month \n",
"001b4c3e1c848f58 Index 24.0 2-3 days per month \n",
"001b4c3e1c848f58 Index 28.0 2-3 per month \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 365.0 1 per day \n",
"001b4c3e1c848f58 Index 365.0 1 per day \n",
"001b4c3e1c848f58 Index 730.0 2 per day \n",
"001b4c3e1c848f58 Index 1095.0 3 per day \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 104.0 2 per week \n",
"001b4c3e1c848f58 Index 24.0 1-3 per month \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 52.0 1 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 286.0 5-6 per week \n",
"001b4c3e1c848f58 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 Index 286.0 5-6 per week \n",
"001b4c3e1c848f58 Index 286.0 5-6 per week \n",
"\n",
" servingSizeText foodCode \n",
"survey_id \n",
"001b4c3e1c848f58 1 slice (1 oz), 1/4 cup shredded, 2 tablespoon... 70005 \n",
"001b4c3e1c848f58 1/2 apple or pear, 1/4 cup 20001 \n",
"001b4c3e1c848f58 1/2 avocado, 1/2 cup 30006 \n",
"001b4c3e1c848f58 1/2 banana 20002 \n",
"001b4c3e1c848f58 3 slices, 2 chops, small steak (4-5 oz) 40002 \n",
"001b4c3e1c848f58 16 oz (1 pint) 90002 \n",
"001b4c3e1c848f58 1/4 cup 30012 \n",
"001b4c3e1c848f58 1 taco or tostada, 1 small burrito, 1 quesadilla 40202 \n",
"001b4c3e1c848f58 4 teaspoons (4 pats) 10004 \n",
"001b4c3e1c848f58 1/2 cup 30041 \n",
"001b4c3e1c848f58 1/2 carrot, 4 baby carrots, 1/4 cup 30040 \n",
"001b4c3e1c848f58 1/4 cup 30018 \n",
"001b4c3e1c848f58 1 regular bar, 8-10 bite sized-pieces (1.5 oz) 80008 \n",
"001b4c3e1c848f58 1 small cup (6 oz) 90008 \n",
"001b4c3e1c848f58 3/4 cup 30017 \n",
"001b4c3e1c848f58 2 medium cookies, 1 large cookie, 1 small piec... 80005 \n",
"001b4c3e1c848f58 2 slices bread, 1 medium roll, 1 English muffi... 120413 \n",
"001b4c3e1c848f58 2 eggs 50004 \n",
"001b4c3e1c848f58 1 clove 30043 \n",
"001b4c3e1c848f58 4 slices, 1 medium tomato 30008 \n",
"001b4c3e1c848f58 8-10 grapes, 1/2 cup 20024 \n",
"001b4c3e1c848f58 1/4 cup 30001 \n",
"001b4c3e1c848f58 1/8 cup 30054 \n",
"001b4c3e1c848f58 1 cup (medium bowl) 30021 \n",
"001b4c3e1c848f58 1 regular cup (8-10 oz) 90007 \n",
"001b4c3e1c848f58 2 slices (2 oz) 40028 \n",
"001b4c3e1c848f58 Small muffin, small croissant, small biscuit 60028 \n",
"001b4c3e1c848f58 2 teaspoons 10011 \n",
"001b4c3e1c848f58 4 teaspoons (4 pats) 10010 \n",
"001b4c3e1c848f58 2 teaspoons 10007 \n",
"... ... ... \n",
"001b4c3e1c848f58 1/8 cup 30057 \n",
"001b4c3e1c848f58 1/4 pepper, 4-6 strips, 1/4 cup 30056 \n",
"001b4c3e1c848f58 4 oz (about 1/8th bottle) 90018 \n",
"001b4c3e1c848f58 2 handfuls, 1 small bag (1 oz) 60031 \n",
"001b4c3e1c848f58 1 cup (medium bowl) 40017 \n",
"001b4c3e1c848f58 1 cup (medium bowl) 40004 \n",
"001b4c3e1c848f58 1/4 cup 30046 \n",
"001b4c3e1c848f58 1 large glass (12 oz) 90009 \n",
"001b4c3e1c848f58 1 1/2 cups (large bowl) 40033 \n",
"001b4c3e1c848f58 1 regular glass (8 oz) 90017 \n",
"001b4c3e1c848f58 1 medium piece (4-5 oz) 40073 \n",
"001b4c3e1c848f58 6 oz (about 1/6th bottle) 90019 \n",
"001b4c3e1c848f58 3/4 cup 30047 \n",
"001b4c3e1c848f58 3/4 (6 oz) (regular container) 70017 \n",
"001b4c3e1c848f58 2 teaspoons 10004 \n",
"001b4c3e1c848f58 2 teaspoons 10010 \n",
"001b4c3e1c848f58 2 teaspoons 10010 \n",
"001b4c3e1c848f58 21-40 shakes (1/4 teaspoon) 10100 \n",
"001b4c3e1c848f58 1 burger or 3 oz 120191 \n",
"001b4c3e1c848f58 4-5 pieces (4 oz) 120206 \n",
"001b4c3e1c848f58 3 pieces (3 oz) 120207 \n",
"001b4c3e1c848f58 2 teaspoons 120198 \n",
"001b4c3e1c848f58 2 (6\" diameter) 120123 \n",
"001b4c3e1c848f58 1/2 cup 120414 \n",
"001b4c3e1c848f58 3/4 cup 120438 \n",
"001b4c3e1c848f58 2 slices bread, 1 medium roll, 1 English muffi... 120412 \n",
"001b4c3e1c848f58 1/2 cup (small bowl) 120410 \n",
"001b4c3e1c848f58 3/4 cup 30044 \n",
"001b4c3e1c848f58 2/3 cup 120560 \n",
"001b4c3e1c848f58 2/3 cup 70018 \n",
"\n",
"[64 rows x 9 columns]"
]
},
"execution_count": 378,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test[test.code == 'GLAC']"
]
},
{
"cell_type": "code",
"execution_count": 373,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
"
\n",
" \n",
" code | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" GLAC | \n",
" 62.397260 | \n",
"
\n",
" \n",
" GLTC | \n",
" 69.876978 | \n",
"
\n",
" \n",
" LineGi | \n",
" 336.912520 | \n",
"
\n",
" \n",
" acesupot | \n",
" 0.000000 | \n",
"
\n",
" \n",
" addsugar | \n",
" 25.437240 | \n",
"
\n",
" \n",
" adsugtot | \n",
" 20.476220 | \n",
"
\n",
" \n",
" alanine | \n",
" 2.147886 | \n",
"
\n",
" \n",
" alcohol | \n",
" 17.794446 | \n",
"
\n",
" \n",
" alphacar | \n",
" 2840.690441 | \n",
"
\n",
" \n",
" alphtoce | \n",
" 12.646316 | \n",
"
\n",
" \n",
" alphtoco | \n",
" 11.345113 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount\n",
"code \n",
"GLAC 62.397260\n",
"GLTC 69.876978\n",
"LineGi 336.912520\n",
"acesupot 0.000000\n",
"addsugar 25.437240\n",
"adsugtot 20.476220\n",
"alanine 2.147886\n",
"alcohol 17.794446\n",
"alphacar 2840.690441\n",
"alphtoce 12.646316\n",
"alphtoco 11.345113"
]
},
"execution_count": 373,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.groupby('code').agg('sum').head(11)"
]
},
{
"cell_type": "code",
"execution_count": 375,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"survey_id 001b4c3e1c848f58\n",
"3_Methylhistidine_in_mg 2.60684169804397\n",
"Acesulfame_Potassium_in_mg 0.0\n",
"Added_Sugars__by_Available_Carbohydrate__in_g 25.4372404710912\n",
"Added_Sugars__by_Total_Sugars__in_g 20.4762200586827\n",
"Alanine_in_g 2.14788575606249\n",
"Alcohol_in_g 17.7944461190833\n",
"Alpha_Carotene__provitamin_A_carotenoid__in_mcg 2840.69044111298\n",
"Alpha_Tocopherol_in_mg 11.3451129336108\n",
"Animal_Protein_in_g 16.6596165030072\n",
"Arginine_in_g 2.87293720209445\n",
"Name: 10317.000030578, dtype: object"
]
},
"execution_count": 375,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_micromacro_pivot[vios_coded_micromacro_pivot.survey_id == test_survey].iloc[0].head(11)"
]
},
{
"cell_type": "code",
"execution_count": 376,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"survey_id 001b4c3e1c848f58\n",
"Percent_of_calories_from_Added_Sugar 0.864436614273603\n",
"Percent_of_calories_from_Alcohol 7.643624875670007\n",
"Percent_of_calories_from_Carbohydrate 38.41905488083203\n",
"Percent_of_calories_from_Fat 41.82753132412285\n",
"Percent_of_calories_from_Monounsaturated_Fat 21.020284676343078\n",
"Percent_of_calories_from_Polyunsaturated_Fat 8.6645383097221\n",
"Percent_of_calories_from_Protein 12.109788919375113\n",
"Percent_of_calories_from_Saturated_Fat 11.298838478082244\n",
"Name: 10317.000030578, dtype: object"
]
},
"execution_count": 376,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_percents_pivot[vios_coded_percents_pivot.survey_id == test_survey].iloc[0]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}