{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Partitioning and formatting of the raw vioscreen information.\n",
"\n",
"This notebook consumes the raw vioscreen data, and splits it up into a series of pivot tables.\n",
"\n",
"The pivot tables are indexed by the survey ID. A survey ID corresponds to a single instance of a vioscreen FFQ. Separate mappings, not included at present, relate the survey ID to a host subject identifier and/or sample barcode.\n",
"\n",
"All headers are scrubbed to avoid annoying characters like spaces. This is done for the benefit of tools like QIIME, however it does reduce readability in some cases.\n",
"\n",
"Code modified to include the output of food items consumption in grams and exclude one vioscreen_id (1758d77b00b1f67b) for which host_subject_id affiliation of samples could not be confirmed.\n",
"\n",
"This code reads and writes files in the current working directory.\n",
"A roundtrip test going from raw data to the summarized micro / macronutrients is included at the end of this notebook."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"import numpy as np\n",
"import time\n",
"import os\n",
"from collections import defaultdict"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# full raw vioscreen data\n",
"full_vios = pd.read_csv('vioscreen_dump.tsv.gz', sep='\\t', dtype=str)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# a raw dump of the ag.source_barcodes_surveys table\n",
"survey_sample_map_raw = pd.read_csv('sid_bc.tsv', \n",
" sep='\\t', dtype=str).set_index('survey_id')['barcode']\n",
"survey_sample_map = defaultdict(list)\n",
"for i, s in survey_sample_map_raw.items():\n",
" survey_sample_map[i].append('10317.%s' % s)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# Explicitely removing vioscreen_id 1758d77b00b1f67b and its associated samples from both datasets\n",
"full_vios=full_vios[full_vios.survey_id != \"1758d77b00b1f67b\"]\n",
"del survey_sample_map['1758d77b00b1f67b']"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"def drop_null_cols(df):\n",
" \"\"\"Drop a column that is entirely null\"\"\"\n",
" drop = []\n",
" n = len(df)\n",
" for c in df.columns:\n",
" if df[c].isnull().sum() == n:\n",
" drop.append(c)\n",
" return df.drop(columns=drop)\n",
"\n",
"replacer = re.compile(\"[ \\-,()%':]\")\n",
"def sanitize_columns(df):\n",
" \"\"\"Remove possibly annoying characters\"\"\"\n",
" df.columns = [replacer.sub('_', c) for c in df.columns]\n",
" return df\n",
"\n",
"def remap_to_sample_id(df):\n",
" new_rows = []\n",
" sample_ids = []\n",
" for _, row in df.reset_index().iterrows():\n",
" for id_ in survey_sample_map.get(row['survey_id'], []):\n",
" new_rows.append(row.copy())\n",
" sample_ids.append(id_)\n",
" new_df = pd.DataFrame(new_rows)\n",
" new_df['#SampleID'] = sample_ids\n",
" return new_df.set_index('#SampleID')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# partition the raw vioscreen data into logically related units.\n",
"# within each partition, columns that are entirely null are removed.\n",
"\n",
"# a few specific values are used for this:\n",
"# - \"code\" should correspond to a specific micro/macro nutrient or summarized \n",
"# value (e.g., % calories from alcohol)\n",
"# - \"data\" contains the raw micro/macro nutrient information for the given food\n",
"# entry for the given survey scaled (I believe) \"servingFrequencyText\" and the \"servingSizeText\". \n",
"\n",
"# where the \"code\" column is not null\n",
"vios_coded = drop_null_cols(full_vios[~full_vios.code.isnull()])\n",
"\n",
"# what should be food components (e.g., % protein)\n",
"vios_coded_percents = drop_null_cols(vios_coded[~vios_coded.foodComponentType.isnull()])\n",
"\n",
"# what should be only micro and macronutriet data\n",
"vios_coded_micromacro = drop_null_cols(vios_coded[np.logical_and(vios_coded.foodComponentType.isnull(),\n",
" vios_coded.description != 'Eating Pattern')])\n",
"\n",
"# what vioscreen terms eating patterns (e.g., added fats per day)\n",
"vios_coded_eatingpattern = drop_null_cols(vios_coded[vios_coded.description == 'Eating Pattern'])\n",
"\n",
"# entries that are not coded\n",
"vios_not_coded = drop_null_cols(full_vios[full_vios.code.isnull()])\n",
"\n",
"# contains the raw vioscreen json output (more below)\n",
"vios_with_data = drop_null_cols(vios_not_coded[~vios_not_coded.data.isnull()])\n",
"\n",
"# the vioscreen scores (e.g., Total Fruits)\n",
"vios_no_data = drop_null_cols(vios_not_coded[vios_not_coded.data.isnull()])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting food components"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" foodComponentType | \n",
" precision | \n",
" shortDescription | \n",
" survey_id | \n",
" units | \n",
"
\n",
" \n",
" \n",
" \n",
" 153 | \n",
" 15.49482152820182 | \n",
" %protein | \n",
" Percent of calories from Protein | \n",
" 1.0 | \n",
" 0.0 | \n",
" Protein | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 154 | \n",
" 44.07939989979141 | \n",
" %fat | \n",
" Percent of calories from Fat | \n",
" 1.0 | \n",
" 0.0 | \n",
" Fat | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 155 | \n",
" 40.42577857200676 | \n",
" %carbo | \n",
" Percent of calories from Carbohydrate | \n",
" 1.0 | \n",
" 0.0 | \n",
" Carbohydrate | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 156 | \n",
" 0.0 | \n",
" %alcohol | \n",
" Percent of calories from Alcohol | \n",
" 1.0 | \n",
" 0.0 | \n",
" Alcohol | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
" 157 | \n",
" 16.382593411285388 | \n",
" %sfatot | \n",
" Percent of calories from Saturated Fat | \n",
" 1.0 | \n",
" 0.0 | \n",
" Saturated Fat | \n",
" 80043f5209506497 | \n",
" % | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description \\\n",
"153 15.49482152820182 %protein Percent of calories from Protein \n",
"154 44.07939989979141 %fat Percent of calories from Fat \n",
"155 40.42577857200676 %carbo Percent of calories from Carbohydrate \n",
"156 0.0 %alcohol Percent of calories from Alcohol \n",
"157 16.382593411285388 %sfatot Percent of calories from Saturated Fat \n",
"\n",
" foodComponentType precision shortDescription survey_id units \n",
"153 1.0 0.0 Protein 80043f5209506497 % \n",
"154 1.0 0.0 Fat 80043f5209506497 % \n",
"155 1.0 0.0 Carbohydrate 80043f5209506497 % \n",
"156 1.0 0.0 Alcohol 80043f5209506497 % \n",
"157 1.0 0.0 Saturated Fat 80043f5209506497 % "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_percents.head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"vios_coded_percents_pivot = vios_coded_percents.pivot(index='survey_id', columns='description', values='amount')\n",
"vios_coded_percents_pivot = remap_to_sample_id(sanitize_columns(vios_coded_percents_pivot))\n",
"vios_coded_percents_pivot.to_csv('vioscreen_component_percents.tsv', sep='\\t', index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" Percent_of_calories_from_Added_Sugar | \n",
" Percent_of_calories_from_Alcohol | \n",
" Percent_of_calories_from_Carbohydrate | \n",
" Percent_of_calories_from_Fat | \n",
" Percent_of_calories_from_Monounsaturated_Fat | \n",
" Percent_of_calories_from_Polyunsaturated_Fat | \n",
" Percent_of_calories_from_Protein | \n",
" Percent_of_calories_from_Saturated_Fat | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 0.5278094148896552 | \n",
" 4.405707930741896 | \n",
" 27.628189670463787 | \n",
" 45.084477618669254 | \n",
" 20.95621331900778 | \n",
" 12.559912015883315 | \n",
" 22.881624780125065 | \n",
" 10.631030850565056 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 0.46065971747342294 | \n",
" 0.0 | \n",
" 23.474779328659373 | \n",
" 55.745954131731786 | \n",
" 22.489476549272094 | \n",
" 8.219601812309241 | \n",
" 20.77926653960884 | \n",
" 22.733751192146702 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 0.8657785646763633 | \n",
" 0.00023962357002141254 | \n",
" 37.5279841722288 | \n",
" 43.04814628870771 | \n",
" 21.854142563950166 | \n",
" 10.351928530673 | \n",
" 19.42362991549346 | \n",
" 9.169527931007428 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 0.864436614273603 | \n",
" 7.643624875670007 | \n",
" 38.41905488083203 | \n",
" 41.82753132412285 | \n",
" 21.020284676343078 | \n",
" 8.6645383097221 | \n",
" 12.109788919375113 | \n",
" 11.298838478082244 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 0.864436614273603 | \n",
" 7.643624875670007 | \n",
" 38.41905488083203 | \n",
" 41.82753132412285 | \n",
" 21.020284676343078 | \n",
" 8.6645383097221 | \n",
" 12.109788919375113 | \n",
" 11.298838478082244 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id Percent_of_calories_from_Added_Sugar \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 0.5278094148896552 \n",
"10317.000107687 000fc7ac57776579 0.46065971747342294 \n",
"10317.000087663 0012cf765bb386ea 0.8657785646763633 \n",
"10317.000030578 001b4c3e1c848f58 0.864436614273603 \n",
"10317.000030579 001b4c3e1c848f58 0.864436614273603 \n",
"\n",
" Percent_of_calories_from_Alcohol \\\n",
"#SampleID \n",
"10317.000072291 4.405707930741896 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.00023962357002141254 \n",
"10317.000030578 7.643624875670007 \n",
"10317.000030579 7.643624875670007 \n",
"\n",
" Percent_of_calories_from_Carbohydrate \\\n",
"#SampleID \n",
"10317.000072291 27.628189670463787 \n",
"10317.000107687 23.474779328659373 \n",
"10317.000087663 37.5279841722288 \n",
"10317.000030578 38.41905488083203 \n",
"10317.000030579 38.41905488083203 \n",
"\n",
" Percent_of_calories_from_Fat \\\n",
"#SampleID \n",
"10317.000072291 45.084477618669254 \n",
"10317.000107687 55.745954131731786 \n",
"10317.000087663 43.04814628870771 \n",
"10317.000030578 41.82753132412285 \n",
"10317.000030579 41.82753132412285 \n",
"\n",
" Percent_of_calories_from_Monounsaturated_Fat \\\n",
"#SampleID \n",
"10317.000072291 20.95621331900778 \n",
"10317.000107687 22.489476549272094 \n",
"10317.000087663 21.854142563950166 \n",
"10317.000030578 21.020284676343078 \n",
"10317.000030579 21.020284676343078 \n",
"\n",
" Percent_of_calories_from_Polyunsaturated_Fat \\\n",
"#SampleID \n",
"10317.000072291 12.559912015883315 \n",
"10317.000107687 8.219601812309241 \n",
"10317.000087663 10.351928530673 \n",
"10317.000030578 8.6645383097221 \n",
"10317.000030579 8.6645383097221 \n",
"\n",
" Percent_of_calories_from_Protein \\\n",
"#SampleID \n",
"10317.000072291 22.881624780125065 \n",
"10317.000107687 20.77926653960884 \n",
"10317.000087663 19.42362991549346 \n",
"10317.000030578 12.109788919375113 \n",
"10317.000030579 12.109788919375113 \n",
"\n",
" Percent_of_calories_from_Saturated_Fat \n",
"#SampleID \n",
"10317.000072291 10.631030850565056 \n",
"10317.000107687 22.733751192146702 \n",
"10317.000087663 9.169527931007428 \n",
"10317.000030578 11.298838478082244 \n",
"10317.000030579 11.298838478082244 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_percents_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting micro and macro nutrients\n",
"\n",
"NOTE: headers are augmented to include the unit of measurement (e.g., grams)."
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" survey_id | \n",
" units | \n",
" valueType | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 80.4870014190674 | \n",
" acesupot | \n",
" Acesulfame Potassium | \n",
" 80043f5209506497 | \n",
" mg | \n",
" Amount | \n",
"
\n",
" \n",
" 1 | \n",
" 23.9946937142986 | \n",
" addsugar | \n",
" Added Sugars (by Available Carbohydrate) | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
"
\n",
" \n",
" 2 | \n",
" 22.221421156844 | \n",
" adsugtot | \n",
" Added Sugars (by Total Sugars) | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
"
\n",
" \n",
" 3 | \n",
" 2.82355429149195 | \n",
" alanine | \n",
" Alanine | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" alcohol | \n",
" Alcohol | \n",
" 80043f5209506497 | \n",
" g | \n",
" Amount | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description \\\n",
"0 80.4870014190674 acesupot Acesulfame Potassium \n",
"1 23.9946937142986 addsugar Added Sugars (by Available Carbohydrate) \n",
"2 22.221421156844 adsugtot Added Sugars (by Total Sugars) \n",
"3 2.82355429149195 alanine Alanine \n",
"4 0.0 alcohol Alcohol \n",
"\n",
" survey_id units valueType \n",
"0 80043f5209506497 mg Amount \n",
"1 80043f5209506497 g Amount \n",
"2 80043f5209506497 g Amount \n",
"3 80043f5209506497 g Amount \n",
"4 80043f5209506497 g Amount "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_micromacro.head()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"vios_coded_micromacro = vios_coded_micromacro.copy()\n",
"vios_coded_micromacro['description_augmented'] = [\"%s_in_%s\" % (desc, u)\n",
" for desc, u in zip(vios_coded_micromacro['description'],\n",
" vios_coded_micromacro['units'])]\n",
"vios_coded_micromacro_pivot = vios_coded_micromacro.pivot(index='survey_id', \n",
" columns='description_augmented', \n",
" values='amount')\n",
"vios_coded_micromacro_pivot = remap_to_sample_id(sanitize_columns(vios_coded_micromacro_pivot))\n",
"vios_coded_micromacro_pivot.to_csv('vioscreen_micromacro.tsv', sep='\\t', index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" 3_Methylhistidine_in_mg | \n",
" Acesulfame_Potassium_in_mg | \n",
" Added_Sugars__by_Available_Carbohydrate__in_g | \n",
" Added_Sugars__by_Total_Sugars__in_g | \n",
" Alanine_in_g | \n",
" Alcohol_in_g | \n",
" Alpha_Carotene__provitamin_A_carotenoid__in_mcg | \n",
" Alpha_Tocopherol_in_mg | \n",
" Animal_Protein_in_g | \n",
" ... | \n",
" Vitamin_D__calciferol__in_mcg | \n",
" Vitamin_D2__ergocalciferol__in_mcg | \n",
" Vitamin_D3__cholecalciferol__in_mcg | \n",
" Vitamin_D_in_IU | \n",
" Vitamin_E_in_IU | \n",
" Vitamin_K__phylloquinone__in_mcg | \n",
" Water_in_g | \n",
" Whole_Grains__ounce_equivalents__in_oz_eq | \n",
" Xylitol_in_g | \n",
" Zinc_in_mg | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 27.1747373814452 | \n",
" 0.0 | \n",
" 13.4562635816126 | \n",
" 12.4120058199281 | \n",
" 5.51489136698938 | \n",
" 12.3349310056407 | \n",
" 1138.36933677242 | \n",
" 28.5702428804122 | \n",
" 65.1852377337893 | \n",
" ... | \n",
" 12.4742957697532 | \n",
" 0.200216711868774 | \n",
" 12.2740790684938 | \n",
" 498.971830790127 | \n",
" 50.9459459997824 | \n",
" 224.194887651232 | \n",
" 2149.69964033154 | \n",
" 0.617644916526757 | \n",
" 0.0367491587143106 | \n",
" 12.8837308617062 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 28.2962962869096 | \n",
" 0.0 | \n",
" 12.5258816620258 | \n",
" 11.6483954938307 | \n",
" 6.42827878920708 | \n",
" 0.0 | \n",
" 3101.36641244233 | \n",
" 15.1735223541489 | \n",
" 94.5589348006414 | \n",
" ... | \n",
" 8.9346003949231 | \n",
" 0.0 | \n",
" 8.9346003949231 | \n",
" 357.384015796924 | \n",
" 22.6390968368538 | \n",
" 931.249354337616 | \n",
" 3629.797712835 | \n",
" 0.431386317945506 | \n",
" 0.0211558774590833 | \n",
" 14.6265771027999 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 18.2712649474405 | \n",
" 0.0 | \n",
" 20.7296489630817 | \n",
" 15.8732604976236 | \n",
" 3.50057925906138 | \n",
" 0.000514735905194579 | \n",
" 565.525633238059 | \n",
" 15.836220214478 | \n",
" 39.3703097824671 | \n",
" ... | \n",
" 2.22169212308429 | \n",
" 0.0 | \n",
" 2.22169212308429 | \n",
" 88.8676849233717 | \n",
" 23.6360169491142 | \n",
" 162.85388100445 | \n",
" 4291.30569341481 | \n",
" 0.0 | \n",
" 0.0236280034849236 | \n",
" 10.289681270179 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 2.60684169804397 | \n",
" 0.0 | \n",
" 25.4372404710912 | \n",
" 20.4762200586827 | \n",
" 2.14788575606249 | \n",
" 17.7944461190833 | \n",
" 2840.69044111298 | \n",
" 11.3451129336108 | \n",
" 16.6596165030072 | \n",
" ... | \n",
" 3.58194392826441 | \n",
" 1.31924199302752 | \n",
" 2.26270193523689 | \n",
" 143.277757130576 | \n",
" 16.909548815799 | \n",
" 201.226017515073 | \n",
" 3558.75827935317 | \n",
" 1.69406249866094 | \n",
" 0.0342438208502127 | \n",
" 8.10011550652904 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 2.60684169804397 | \n",
" 0.0 | \n",
" 25.4372404710912 | \n",
" 20.4762200586827 | \n",
" 2.14788575606249 | \n",
" 17.7944461190833 | \n",
" 2840.69044111298 | \n",
" 11.3451129336108 | \n",
" 16.6596165030072 | \n",
" ... | \n",
" 3.58194392826441 | \n",
" 1.31924199302752 | \n",
" 2.26270193523689 | \n",
" 143.277757130576 | \n",
" 16.909548815799 | \n",
" 201.226017515073 | \n",
" 3558.75827935317 | \n",
" 1.69406249866094 | \n",
" 0.0342438208502127 | \n",
" 8.10011550652904 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 194 columns
\n",
"
"
],
"text/plain": [
" survey_id 3_Methylhistidine_in_mg \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 27.1747373814452 \n",
"10317.000107687 000fc7ac57776579 28.2962962869096 \n",
"10317.000087663 0012cf765bb386ea 18.2712649474405 \n",
"10317.000030578 001b4c3e1c848f58 2.60684169804397 \n",
"10317.000030579 001b4c3e1c848f58 2.60684169804397 \n",
"\n",
" Acesulfame_Potassium_in_mg \\\n",
"#SampleID \n",
"10317.000072291 0.0 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 0.0 \n",
"10317.000030579 0.0 \n",
"\n",
" Added_Sugars__by_Available_Carbohydrate__in_g \\\n",
"#SampleID \n",
"10317.000072291 13.4562635816126 \n",
"10317.000107687 12.5258816620258 \n",
"10317.000087663 20.7296489630817 \n",
"10317.000030578 25.4372404710912 \n",
"10317.000030579 25.4372404710912 \n",
"\n",
" Added_Sugars__by_Total_Sugars__in_g Alanine_in_g \\\n",
"#SampleID \n",
"10317.000072291 12.4120058199281 5.51489136698938 \n",
"10317.000107687 11.6483954938307 6.42827878920708 \n",
"10317.000087663 15.8732604976236 3.50057925906138 \n",
"10317.000030578 20.4762200586827 2.14788575606249 \n",
"10317.000030579 20.4762200586827 2.14788575606249 \n",
"\n",
" Alcohol_in_g \\\n",
"#SampleID \n",
"10317.000072291 12.3349310056407 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.000514735905194579 \n",
"10317.000030578 17.7944461190833 \n",
"10317.000030579 17.7944461190833 \n",
"\n",
" Alpha_Carotene__provitamin_A_carotenoid__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 1138.36933677242 \n",
"10317.000107687 3101.36641244233 \n",
"10317.000087663 565.525633238059 \n",
"10317.000030578 2840.69044111298 \n",
"10317.000030579 2840.69044111298 \n",
"\n",
" Alpha_Tocopherol_in_mg Animal_Protein_in_g ... \\\n",
"#SampleID ... \n",
"10317.000072291 28.5702428804122 65.1852377337893 ... \n",
"10317.000107687 15.1735223541489 94.5589348006414 ... \n",
"10317.000087663 15.836220214478 39.3703097824671 ... \n",
"10317.000030578 11.3451129336108 16.6596165030072 ... \n",
"10317.000030579 11.3451129336108 16.6596165030072 ... \n",
"\n",
" Vitamin_D__calciferol__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 12.4742957697532 \n",
"10317.000107687 8.9346003949231 \n",
"10317.000087663 2.22169212308429 \n",
"10317.000030578 3.58194392826441 \n",
"10317.000030579 3.58194392826441 \n",
"\n",
" Vitamin_D2__ergocalciferol__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 0.200216711868774 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 1.31924199302752 \n",
"10317.000030579 1.31924199302752 \n",
"\n",
" Vitamin_D3__cholecalciferol__in_mcg Vitamin_D_in_IU \\\n",
"#SampleID \n",
"10317.000072291 12.2740790684938 498.971830790127 \n",
"10317.000107687 8.9346003949231 357.384015796924 \n",
"10317.000087663 2.22169212308429 88.8676849233717 \n",
"10317.000030578 2.26270193523689 143.277757130576 \n",
"10317.000030579 2.26270193523689 143.277757130576 \n",
"\n",
" Vitamin_E_in_IU Vitamin_K__phylloquinone__in_mcg \\\n",
"#SampleID \n",
"10317.000072291 50.9459459997824 224.194887651232 \n",
"10317.000107687 22.6390968368538 931.249354337616 \n",
"10317.000087663 23.6360169491142 162.85388100445 \n",
"10317.000030578 16.909548815799 201.226017515073 \n",
"10317.000030579 16.909548815799 201.226017515073 \n",
"\n",
" Water_in_g Whole_Grains__ounce_equivalents__in_oz_eq \\\n",
"#SampleID \n",
"10317.000072291 2149.69964033154 0.617644916526757 \n",
"10317.000107687 3629.797712835 0.431386317945506 \n",
"10317.000087663 4291.30569341481 0.0 \n",
"10317.000030578 3558.75827935317 1.69406249866094 \n",
"10317.000030579 3558.75827935317 1.69406249866094 \n",
"\n",
" Xylitol_in_g Zinc_in_mg \n",
"#SampleID \n",
"10317.000072291 0.0367491587143106 12.8837308617062 \n",
"10317.000107687 0.0211558774590833 14.6265771027999 \n",
"10317.000087663 0.0236280034849236 10.289681270179 \n",
"10317.000030578 0.0342438208502127 8.10011550652904 \n",
"10317.000030579 0.0342438208502127 8.10011550652904 \n",
"\n",
"[5 rows x 194 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_micromacro_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting eating patterns\n",
"\n",
"NOTE: headers are augmented to include the time unit (e.g., per day)."
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" code | \n",
" description | \n",
" survey_id | \n",
" units | \n",
" valueType | \n",
"
\n",
" \n",
" \n",
" \n",
" 201 | \n",
" 6.83359985351562 | \n",
" ADDEDFATS | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
"
\n",
" \n",
" 202 | \n",
" 0.0 | \n",
" ALCOHOLSERV | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
"
\n",
" \n",
" 203 | \n",
" 1.24581536668154 | \n",
" ANIMALPROTEIN | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
"
\n",
" \n",
" 204 | \n",
" 1.8388000404998 | \n",
" CALCDAIRYSERV | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
"
\n",
" \n",
" 205 | \n",
" 3.4041772177448 | \n",
" CALCSERV | \n",
" Eating Pattern | \n",
" 80043f5209506497 | \n",
" PerDay | \n",
" Amount | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount code description survey_id \\\n",
"201 6.83359985351562 ADDEDFATS Eating Pattern 80043f5209506497 \n",
"202 0.0 ALCOHOLSERV Eating Pattern 80043f5209506497 \n",
"203 1.24581536668154 ANIMALPROTEIN Eating Pattern 80043f5209506497 \n",
"204 1.8388000404998 CALCDAIRYSERV Eating Pattern 80043f5209506497 \n",
"205 3.4041772177448 CALCSERV Eating Pattern 80043f5209506497 \n",
"\n",
" units valueType \n",
"201 PerDay Amount \n",
"202 PerDay Amount \n",
"203 PerDay Amount \n",
"204 PerDay Amount \n",
"205 PerDay Amount "
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_eatingpattern.head()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"vios_coded_eatingpattern = vios_coded_eatingpattern.copy()\n",
"vios_coded_eatingpattern['code_augmented'] = ['%s_%s' % (c, u)\n",
" for c, u in zip(vios_coded_eatingpattern['code'],\n",
" vios_coded_eatingpattern['units'])]\n",
"vios_coded_eatingpattern_pivot = vios_coded_eatingpattern.pivot(index='survey_id', \n",
" columns='code_augmented', \n",
" values='amount')\n",
"vios_coded_eatingpattern_pivot = remap_to_sample_id(sanitize_columns(vios_coded_eatingpattern_pivot))\n",
"vios_coded_eatingpattern_pivot.to_csv('vioscreen_eatingpatterns.tsv', sep='\\t', index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" ADDEDFATS_PerDay | \n",
" ALCOHOLSERV_PerDay | \n",
" ANIMALPROTEIN_PerDay | \n",
" CALCDAIRYSERV_PerDay | \n",
" CALCSERV_PerDay | \n",
" FISHSERV_PerWeek | \n",
" FRIEDFISH_PerWeek | \n",
" FRTSUMM_PerDay | \n",
" GRAINSERV_PerDay | \n",
" JUICESERV_PerDay | \n",
" LOWFATDAIRYSERV_PerDay | \n",
" NOFRYFISHSERV_PerWeek | \n",
" NONFATDAIRY_PerDay | \n",
" PLANTPROTEIN_PerDay | \n",
" SALADSERV_PerDay | \n",
" SOYFOODS_PerDay | \n",
" VEGSUMM_PerDay | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 13.7111325807085 | \n",
" 0.811984630480205 | \n",
" 3.12011660969654 | \n",
" 0.588968275238422 | \n",
" 2.43930048364662 | \n",
" 0.757184681663774 | \n",
" 0.0 | \n",
" 3.9613478141107 | \n",
" 0.203613660210178 | \n",
" 1.56846226815002 | \n",
" 0.10680657461898 | \n",
" 0.757184681663774 | \n",
" 0.0 | \n",
" 0.788205997952043 | \n",
" 4.27983387202433 | \n",
" 0.019353452018679 | \n",
" 6.78067953556929 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 16.9556567945546 | \n",
" 0.0 | \n",
" 5.55031021924401 | \n",
" 1.02710044945619 | \n",
" 2.85550994853688 | \n",
" 0.429514730793156 | \n",
" 0.0 | \n",
" 1.09213853647856 | \n",
" 0.547190726293276 | \n",
" 0.0 | \n",
" 0.373823011166429 | \n",
" 0.429514730793156 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05764470002423 | \n",
" 0.0 | \n",
" 8.40219293535005 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 10.460175704561 | \n",
" 0.0 | \n",
" 1.75556821008173 | \n",
" 0.143241657294231 | \n",
" 1.7933502932502 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.27800895759504 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0412194728851318 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0961474784433025 | \n",
" 6.61006004091812 | \n",
" 0.202395007014275 | \n",
" 8.88691729763619 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 9.4877522015245 | \n",
" 1.18359554067103 | \n",
" 1.35898643631758 | \n",
" 0.508965186039879 | \n",
" 2.37677246201077 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 1.45998473791023 | \n",
" 1.76516303281359 | \n",
" 0.0 | \n",
" 0.10680657461898 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 0.712004397299192 | \n",
" 2.13061859428066 | \n",
" 0.450974494893012 | \n",
" 5.41913946629678 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 9.4877522015245 | \n",
" 1.18359554067103 | \n",
" 1.35898643631758 | \n",
" 0.508965186039879 | \n",
" 2.37677246201077 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 1.45998473791023 | \n",
" 1.76516303281359 | \n",
" 0.0 | \n",
" 0.10680657461898 | \n",
" 0.0330395946763966 | \n",
" 0.0 | \n",
" 0.712004397299192 | \n",
" 2.13061859428066 | \n",
" 0.450974494893012 | \n",
" 5.41913946629678 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id ADDEDFATS_PerDay ALCOHOLSERV_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 13.7111325807085 0.811984630480205 \n",
"10317.000107687 000fc7ac57776579 16.9556567945546 0.0 \n",
"10317.000087663 0012cf765bb386ea 10.460175704561 0.0 \n",
"10317.000030578 001b4c3e1c848f58 9.4877522015245 1.18359554067103 \n",
"10317.000030579 001b4c3e1c848f58 9.4877522015245 1.18359554067103 \n",
"\n",
" ANIMALPROTEIN_PerDay CALCDAIRYSERV_PerDay CALCSERV_PerDay \\\n",
"#SampleID \n",
"10317.000072291 3.12011660969654 0.588968275238422 2.43930048364662 \n",
"10317.000107687 5.55031021924401 1.02710044945619 2.85550994853688 \n",
"10317.000087663 1.75556821008173 0.143241657294231 1.7933502932502 \n",
"10317.000030578 1.35898643631758 0.508965186039879 2.37677246201077 \n",
"10317.000030579 1.35898643631758 0.508965186039879 2.37677246201077 \n",
"\n",
" FISHSERV_PerWeek FRIEDFISH_PerWeek FRTSUMM_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0.757184681663774 0.0 3.9613478141107 \n",
"10317.000107687 0.429514730793156 0.0 1.09213853647856 \n",
"10317.000087663 0.0 0.0 3.27800895759504 \n",
"10317.000030578 0.0330395946763966 0.0 1.45998473791023 \n",
"10317.000030579 0.0330395946763966 0.0 1.45998473791023 \n",
"\n",
" GRAINSERV_PerDay JUICESERV_PerDay LOWFATDAIRYSERV_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0.203613660210178 1.56846226815002 0.10680657461898 \n",
"10317.000107687 0.547190726293276 0.0 0.373823011166429 \n",
"10317.000087663 0.0 0.0 0.0412194728851318 \n",
"10317.000030578 1.76516303281359 0.0 0.10680657461898 \n",
"10317.000030579 1.76516303281359 0.0 0.10680657461898 \n",
"\n",
" NOFRYFISHSERV_PerWeek NONFATDAIRY_PerDay PLANTPROTEIN_PerDay \\\n",
"#SampleID \n",
"10317.000072291 0.757184681663774 0.0 0.788205997952043 \n",
"10317.000107687 0.429514730793156 0.0 0.0 \n",
"10317.000087663 0.0 0.0 0.0961474784433025 \n",
"10317.000030578 0.0330395946763966 0.0 0.712004397299192 \n",
"10317.000030579 0.0330395946763966 0.0 0.712004397299192 \n",
"\n",
" SALADSERV_PerDay SOYFOODS_PerDay VEGSUMM_PerDay \n",
"#SampleID \n",
"10317.000072291 4.27983387202433 0.019353452018679 6.78067953556929 \n",
"10317.000107687 1.05764470002423 0.0 8.40219293535005 \n",
"10317.000087663 6.61006004091812 0.202395007014275 8.88691729763619 \n",
"10317.000030578 2.13061859428066 0.450974494893012 5.41913946629678 \n",
"10317.000030579 2.13061859428066 0.450974494893012 5.41913946629678 "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_eatingpattern_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting vioscreen scores\n",
"\n",
"NOTE: a \"bounds\" file is also produced here as vioscreen provides upper and lower limits."
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" lowerLimit | \n",
" name | \n",
" score | \n",
" survey_id | \n",
" type | \n",
" upperLimit | \n",
"
\n",
" \n",
" \n",
" \n",
" 227 | \n",
" 0.0 | \n",
" Total Vegetables | \n",
" 2.3880099595267104 | \n",
" 80043f5209506497 | \n",
" TotalVegetables | \n",
" 5.0 | \n",
"
\n",
" \n",
" 228 | \n",
" 0.0 | \n",
" Greens and Beans | \n",
" 0.0 | \n",
" 80043f5209506497 | \n",
" GreensAndBeans | \n",
" 5.0 | \n",
"
\n",
" \n",
" 229 | \n",
" 0.0 | \n",
" Total Fruit | \n",
" 5.0 | \n",
" 80043f5209506497 | \n",
" TotalFruit | \n",
" 5.0 | \n",
"
\n",
" \n",
" 230 | \n",
" 0.0 | \n",
" Whole Fruit | \n",
" 5.0 | \n",
" 80043f5209506497 | \n",
" WholeFruit | \n",
" 5.0 | \n",
"
\n",
" \n",
" 231 | \n",
" 0.0 | \n",
" Whole Grains | \n",
" 5.6810242143311065 | \n",
" 80043f5209506497 | \n",
" WholeGrains | \n",
" 10.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" lowerLimit name score survey_id \\\n",
"227 0.0 Total Vegetables 2.3880099595267104 80043f5209506497 \n",
"228 0.0 Greens and Beans 0.0 80043f5209506497 \n",
"229 0.0 Total Fruit 5.0 80043f5209506497 \n",
"230 0.0 Whole Fruit 5.0 80043f5209506497 \n",
"231 0.0 Whole Grains 5.6810242143311065 80043f5209506497 \n",
"\n",
" type upperLimit \n",
"227 TotalVegetables 5.0 \n",
"228 GreensAndBeans 5.0 \n",
"229 TotalFruit 5.0 \n",
"230 WholeFruit 5.0 \n",
"231 WholeGrains 10.0 "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_no_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"vios_no_data_pivot = vios_no_data.pivot(index='survey_id', columns='type', values='score')\n",
"vios_no_data_pivot = remap_to_sample_id(sanitize_columns(vios_no_data_pivot))\n",
"vios_no_data_pivot.to_csv('vioscreen_scores.tsv', sep='\\t', index=True, header=True)\n",
"\n",
"vios_no_data_pivot_desc = vios_no_data[['type', 'lowerLimit', 'upperLimit']]\n",
"vios_no_data_pivot_desc = vios_no_data_pivot_desc[~vios_no_data_pivot_desc.duplicated()]\n",
"vios_no_data_pivot_desc = sanitize_columns(vios_no_data_pivot_desc)\n",
"vios_no_data_pivot_desc.to_csv('vioscreen_scores_bounds.tsv', sep='\\t', index=False, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" Dairy | \n",
" EmptyCalories | \n",
" FattyAcids | \n",
" GreensAndBeans | \n",
" RefinedGrains | \n",
" SeafoodAndPlantProteins | \n",
" Sodium | \n",
" TotalFruit | \n",
" TotalProteins | \n",
" TotalScore | \n",
" TotalVegetables | \n",
" WholeFruit | \n",
" WholeGrains | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 2.773913251852978 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 2.9116395144107985 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 76.42011495192892 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 0.7345621856651521 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 3.4494368436822747 | \n",
" 10.414684587341299 | \n",
" 1.0945527410632858 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 1.4900690155742815 | \n",
" 5.0 | \n",
" 51.00709571319465 | \n",
" 5.0 | \n",
" 2.965681423347652 | \n",
" 1.5926711021858588 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 0.7657591447261929 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 70.76575914472619 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 3.744019504399623 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 2.8927585756303893 | \n",
" 5.0 | \n",
" 79.09795635014056 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 7.461178270110556 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 3.744019504399623 | \n",
" 20.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 10.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 2.8927585756303893 | \n",
" 5.0 | \n",
" 79.09795635014056 | \n",
" 5.0 | \n",
" 5.0 | \n",
" 7.461178270110556 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id Dairy EmptyCalories \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c 2.773913251852978 20.0 \n",
"10317.000107687 000fc7ac57776579 3.4494368436822747 10.414684587341299 \n",
"10317.000087663 0012cf765bb386ea 0.7657591447261929 20.0 \n",
"10317.000030578 001b4c3e1c848f58 3.744019504399623 20.0 \n",
"10317.000030579 001b4c3e1c848f58 3.744019504399623 20.0 \n",
"\n",
" FattyAcids GreensAndBeans RefinedGrains \\\n",
"#SampleID \n",
"10317.000072291 10.0 5.0 10.0 \n",
"10317.000107687 1.0945527410632858 5.0 10.0 \n",
"10317.000087663 10.0 5.0 10.0 \n",
"10317.000030578 10.0 5.0 10.0 \n",
"10317.000030579 10.0 5.0 10.0 \n",
"\n",
" SeafoodAndPlantProteins Sodium \\\n",
"#SampleID \n",
"10317.000072291 5.0 2.9116395144107985 \n",
"10317.000107687 5.0 0.0 \n",
"10317.000087663 5.0 0.0 \n",
"10317.000030578 5.0 0.0 \n",
"10317.000030579 5.0 0.0 \n",
"\n",
" TotalFruit TotalProteins TotalScore \\\n",
"#SampleID \n",
"10317.000072291 5.0 5.0 76.42011495192892 \n",
"10317.000107687 1.4900690155742815 5.0 51.00709571319465 \n",
"10317.000087663 5.0 5.0 70.76575914472619 \n",
"10317.000030578 2.8927585756303893 5.0 79.09795635014056 \n",
"10317.000030579 2.8927585756303893 5.0 79.09795635014056 \n",
"\n",
" TotalVegetables WholeFruit WholeGrains \n",
"#SampleID \n",
"10317.000072291 5.0 5.0 0.7345621856651521 \n",
"10317.000107687 5.0 2.965681423347652 1.5926711021858588 \n",
"10317.000087663 5.0 5.0 0.0 \n",
"10317.000030578 5.0 5.0 7.461178270110556 \n",
"10317.000030579 5.0 5.0 7.461178270110556 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_no_data_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Format frequency information on the foods consumed\n",
"\n",
"NOTE: two entries are unexpectedly duplicated and are removed."
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" consumptionAdjustment | \n",
" created | \n",
" data | \n",
" description | \n",
" foodCode | \n",
" foodGroup | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" survey_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 218 | \n",
" 3.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Enchiladas and tamales | \n",
" 40076 | \n",
" Asian, Mexican and Soy Foods | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 219 | \n",
" 2.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Fresh garlic, including in cooking | \n",
" 30043 | \n",
" Sauces and Seasonings | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 cloves | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 220 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Lard, bacon fat or meat drippings (Fat used in... | \n",
" 10005 | \n",
" Oil or Fat Used in Cooking | \n",
" 1460.0 | \n",
" 4 per day | \n",
" 2 teaspoons | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 221 | \n",
" 1.5 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Meal replacement drinks and shakes such as Sli... | \n",
" 90015 | \n",
" Meal Replacement Drinks, Sports and Granola Bars | \n",
" 365.0 | \n",
" 1 per day | \n",
" 1 1/2 cups (12 oz) | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
" 222 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 2017-07-29T06:56:24.553 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Other candy, such as Lifesavers, licorice and ... | \n",
" 80015 | \n",
" Sweets | \n",
" 104.0 | \n",
" 2 per week | \n",
" 4 pieces hard candy, 10 Life Savers<SUP>®<... | \n",
" 80043f5209506497 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount consumptionAdjustment created \\\n",
"218 3.0 1.0 2017-07-29T06:56:24.553 \n",
"219 2.0 1.0 2017-07-29T06:56:24.553 \n",
"220 1.0 1.0 2017-07-29T06:56:24.553 \n",
"221 1.5 1.0 2017-07-29T06:56:24.553 \n",
"222 1.0 1.0 2017-07-29T06:56:24.553 \n",
"\n",
" data \\\n",
"218 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"219 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"220 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"221 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"222 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"\n",
" description foodCode \\\n",
"218 Enchiladas and tamales 40076 \n",
"219 Fresh garlic, including in cooking 30043 \n",
"220 Lard, bacon fat or meat drippings (Fat used in... 10005 \n",
"221 Meal replacement drinks and shakes such as Sli... 90015 \n",
"222 Other candy, such as Lifesavers, licorice and ... 80015 \n",
"\n",
" foodGroup frequency \\\n",
"218 Asian, Mexican and Soy Foods 286.0 \n",
"219 Sauces and Seasonings 365.0 \n",
"220 Oil or Fat Used in Cooking 1460.0 \n",
"221 Meal Replacement Drinks, Sports and Granola Bars 365.0 \n",
"222 Sweets 104.0 \n",
"\n",
" servingFrequencyText servingSizeText \\\n",
"218 5-6 per week 3 enchiladas or tamales \n",
"219 1 per day 2 cloves \n",
"220 4 per day 2 teaspoons \n",
"221 1 per day 1 1/2 cups (12 oz) \n",
"222 2 per week 4 pieces hard candy, 10 Life Savers®<... \n",
"\n",
" survey_id \n",
"218 80043f5209506497 \n",
"219 80043f5209506497 \n",
"220 80043f5209506497 \n",
"221 80043f5209506497 \n",
"222 80043f5209506497 "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_with_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"847b5821f05f211f\n",
"d95b890fe63c21d0\n"
]
}
],
"source": [
"duplicated = []\n",
"for i, grp in vios_with_data.groupby('survey_id'):\n",
" if len(grp.description) != len(grp.description.unique()):\n",
" print(i)\n",
" for d, dup in grp.groupby('description'):\n",
" if len(dup) > 1:\n",
" duplicated.append(dup)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" consumptionAdjustment | \n",
" created | \n",
" data | \n",
" description | \n",
" foodCode | \n",
" foodGroup | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" survey_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 1016257 | \n",
" 0.5 | \n",
" 1.0 | \n",
" 2018-05-09T15:52:12.827 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/2 cup (4 oz) | \n",
" 847b5821f05f211f | \n",
"
\n",
" \n",
" 1016259 | \n",
" 0.666 | \n",
" 0.5 | \n",
" 2018-05-09T15:52:12.827 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2/3 cup | \n",
" 847b5821f05f211f | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount consumptionAdjustment created \\\n",
"1016257 0.5 1.0 2018-05-09T15:52:12.827 \n",
"1016259 0.666 0.5 2018-05-09T15:52:12.827 \n",
"\n",
" data \\\n",
"1016257 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"1016259 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"\n",
" description foodCode \\\n",
"1016257 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"1016259 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"\n",
" foodGroup frequency servingFrequencyText servingSizeText \\\n",
"1016257 Cereals and Breads 52.0 1 per week 1/2 cup (4 oz) \n",
"1016259 Cereals and Breads 52.0 1 per week 2/3 cup \n",
"\n",
" survey_id \n",
"1016257 847b5821f05f211f \n",
"1016259 847b5821f05f211f "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicated[0]"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
" consumptionAdjustment | \n",
" created | \n",
" data | \n",
" description | \n",
" foodCode | \n",
" foodGroup | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" survey_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 459488 | \n",
" 0.5 | \n",
" 1.0 | \n",
" 2017-07-29T02:28:12.49 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 104.0 | \n",
" 2-6 per week | \n",
" 1/2 cup (4 oz) | \n",
" d95b890fe63c21d0 | \n",
"
\n",
" \n",
" 459489 | \n",
" 0.666 | \n",
" 1.0 | \n",
" 2017-07-29T02:28:12.49 | \n",
" [{'code': 'acesupot', 'description': 'Acesulfa... | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" 120561 | \n",
" Cereals and Breads | \n",
" 104.0 | \n",
" 2-6 per week | \n",
" 2/3 cup | \n",
" d95b890fe63c21d0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount consumptionAdjustment created \\\n",
"459488 0.5 1.0 2017-07-29T02:28:12.49 \n",
"459489 0.666 1.0 2017-07-29T02:28:12.49 \n",
"\n",
" data \\\n",
"459488 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"459489 [{'code': 'acesupot', 'description': 'Acesulfa... \n",
"\n",
" description foodCode \\\n",
"459488 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"459489 Soy milk, not fortified (Milk on cooked cereal) 120561 \n",
"\n",
" foodGroup frequency servingFrequencyText servingSizeText \\\n",
"459488 Cereals and Breads 104.0 2-6 per week 1/2 cup (4 oz) \n",
"459489 Cereals and Breads 104.0 2-6 per week 2/3 cup \n",
"\n",
" survey_id \n",
"459488 d95b890fe63c21d0 \n",
"459489 d95b890fe63c21d0 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicated[1]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"# manually ignoring the duplicated index values above,\n",
"# otherwise would need to filter by grouped survey ids and that seems\n",
"# excessive for such a small impact (and likely bug on vioscreens side)\n",
"vios_with_data_nodup = vios_with_data.loc[[i for i in vios_with_data.index if i not in [1016259, 459489]]]\n",
"\n",
"# \"frequency\" is almost certainly the number of instances estimated per year.\n",
"# For example, when someone consumes tea once per day, the frequency is 365 whereas\n",
"# once per week it is 52. \n",
"vios_with_data_nodup_pivot = vios_with_data_nodup.pivot(index='survey_id', \n",
" columns='description', \n",
" values='frequency').fillna(0.0)\n",
"\n",
"vios_with_data_nodup_pivot = remap_to_sample_id(sanitize_columns(vios_with_data_nodup_pivot))\n",
"vios_with_data_nodup_pivot.to_csv('vioscreen_foods_consumed_frequency_units_per_year.tsv', sep='\\t',\n",
" index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat | \n",
" All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking | \n",
" All_other_fruits | \n",
" All_other_lunch_meat_such_as_bologna__salami_and_Spam | \n",
" All_tea | \n",
" Apples__applesauce_and_pears | \n",
" Apricots___dried | \n",
" Apricots___fresh_or_canned | \n",
" Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai | \n",
" ... | \n",
" White_Rice | \n",
" White_breads__including_bagels__rolls_and_English_muffins | \n",
" White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod | \n",
" White_or_rosé_wine | \n",
" Whole_grain_breads__including_bagels_and_rolls | \n",
" Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ | \n",
" Whole_kernel_grains_such_as_brown_rice | \n",
" Winter_squash_such_as_acorn__butternut_and_pumpkin | \n",
" Yams_and_sweet_potatoes | \n",
" Yogurt__all_types_except_frozen | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 0 | \n",
" 182.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 52.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 52.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 28.0 | \n",
" 28.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 52.0 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 1642.0 | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 182.0 | \n",
" 0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 182.0 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 52.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 182.0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 912.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 182.0 | \n",
" 0 | \n",
" 12.0 | \n",
" 24.0 | \n",
" 182.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 28.0 | \n",
" 0 | \n",
" 52.0 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 182.0 | \n",
" 104.0 | \n",
" 0 | \n",
" 0 | \n",
" 912.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 182.0 | \n",
" 0 | \n",
" 12.0 | \n",
" 24.0 | \n",
" 182.0 | \n",
" 182.0 | \n",
" 0 | \n",
" 28.0 | \n",
" 0 | \n",
" 52.0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 260 columns
\n",
"
"
],
"text/plain": [
" survey_id \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c \n",
"10317.000107687 000fc7ac57776579 \n",
"10317.000087663 0012cf765bb386ea \n",
"10317.000030578 001b4c3e1c848f58 \n",
"10317.000030579 001b4c3e1c848f58 \n",
"\n",
" All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 52.0 \n",
"10317.000030578 182.0 \n",
"10317.000030579 182.0 \n",
"\n",
" All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking \\\n",
"#SampleID \n",
"10317.000072291 182.0 \n",
"10317.000107687 104.0 \n",
"10317.000087663 0 \n",
"10317.000030578 104.0 \n",
"10317.000030579 104.0 \n",
"\n",
" All_other_fruits \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" All_other_lunch_meat_such_as_bologna__salami_and_Spam All_tea \\\n",
"#SampleID \n",
"10317.000072291 0 0 \n",
"10317.000107687 0 1642.0 \n",
"10317.000087663 0 0 \n",
"10317.000030578 0 912.0 \n",
"10317.000030579 0 912.0 \n",
"\n",
" Apples__applesauce_and_pears Apricots___dried \\\n",
"#SampleID \n",
"10317.000072291 52.0 182.0 \n",
"10317.000107687 52.0 0 \n",
"10317.000087663 52.0 0 \n",
"10317.000030578 182.0 0 \n",
"10317.000030579 182.0 0 \n",
"\n",
" Apricots___fresh_or_canned \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" ... White_Rice \\\n",
"#SampleID ... \n",
"10317.000072291 ... 0 \n",
"10317.000107687 ... 182.0 \n",
"10317.000087663 ... 0 \n",
"10317.000030578 ... 182.0 \n",
"10317.000030579 ... 182.0 \n",
"\n",
" White_breads__including_bagels__rolls_and_English_muffins \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 104.0 \n",
"10317.000087663 0 \n",
"10317.000030578 12.0 \n",
"10317.000030579 12.0 \n",
"\n",
" White_or_rosé_wine \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 24.0 \n",
"10317.000030579 24.0 \n",
"\n",
" Whole_grain_breads__including_bagels_and_rolls \\\n",
"#SampleID \n",
"10317.000072291 0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 182.0 \n",
"10317.000030579 182.0 \n",
"\n",
" Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ \\\n",
"#SampleID \n",
"10317.000072291 28.0 \n",
"10317.000107687 104.0 \n",
"10317.000087663 0 \n",
"10317.000030578 182.0 \n",
"10317.000030579 182.0 \n",
"\n",
" Whole_kernel_grains_such_as_brown_rice \\\n",
"#SampleID \n",
"10317.000072291 28.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 0 \n",
"10317.000030579 0 \n",
"\n",
" Winter_squash_such_as_acorn__butternut_and_pumpkin \\\n",
"#SampleID \n",
"10317.000072291 52.0 \n",
"10317.000107687 0 \n",
"10317.000087663 0 \n",
"10317.000030578 28.0 \n",
"10317.000030579 28.0 \n",
"\n",
" Yams_and_sweet_potatoes Yogurt__all_types_except_frozen \n",
"#SampleID \n",
"10317.000072291 0 52.0 \n",
"10317.000107687 0 182.0 \n",
"10317.000087663 0 0 \n",
"10317.000030578 0 52.0 \n",
"10317.000030579 0 52.0 \n",
"\n",
"[5 rows x 260 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_with_data_nodup_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Formatting raw food information\n",
"\n",
"This resulting DataFrame is massive at 39M rows at the time of creating this notebook. Each row represents a micro or macronutrients of each food in its corresponding serving size that the person consumed.\n",
"\n",
"Due to the size, this frame is written out compressed (gzip). We are also not remapping to sample ID here as it would replicate a lot of data in what is already a pretty verbose structure.\n",
"\n",
"This DataFrame will be used to get access to the grams information for food items.\n",
"\n",
"**WARNING: the first cell will run for 5-10 minutes**"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"783.0043518543243\n"
]
}
],
"source": [
"# an example of loading the data and what the contents look like.\n",
"# The \"code\" values *should* correspond to the non-null code values\n",
"# in the full raw file\n",
"\n",
"# WARNING: this will run for 5-10 minutes\n",
"items = []\n",
"parsed = {}\n",
"start = time.time()\n",
"for _, row in vios_with_data.iterrows(): \n",
" # we're using a memoization pattern to minimize parsing JSON. \n",
" # Of the ~250,000 rows in vios_with_data, only about 50,000 have a unique \n",
" # \"data\" JSON object. Parsing JSON is relatively expensive and this memoization\n",
" # shaves a pretty large amount of time off.\n",
" datablock = row['data']\n",
" if datablock in parsed:\n",
" data_parsed = parsed[datablock].copy()\n",
" else:\n",
" data_parsed = pd.read_json(datablock.replace(\"'\", '\"'), dtype=False, \n",
" convert_axes=False, convert_dates=False)\n",
" \n",
" parsed[datablock] = data_parsed\n",
" \n",
" # make sure relevant information about the survey etc are retained\n",
" # some is probably extraneous\n",
" for c in ['survey_id', \n",
" 'description', \n",
" 'frequency', \n",
" 'servingFrequencyText', \n",
" 'servingSizeText', \n",
" 'foodCode']:\n",
" data_parsed[c] = row[c]\n",
" items.append(data_parsed)\n",
"meal_data = pd.concat(items, ignore_index=True)\n",
"meal_data.set_index('survey_id', inplace=True)\n",
"print(time.time() - start) "
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" description | \n",
" units | \n",
" amount | \n",
" valueType | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" foodCode | \n",
"
\n",
" \n",
" survey_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 80043f5209506497 | \n",
" acesupot | \n",
" Enchiladas and tamales | \n",
" mg | \n",
" 0.000000 | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" addsugar | \n",
" Enchiladas and tamales | \n",
" g | \n",
" 0.000000 | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" adsugtot | \n",
" Enchiladas and tamales | \n",
" g | \n",
" 0.000000 | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" alanine | \n",
" Enchiladas and tamales | \n",
" g | \n",
" 1.905583 | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
" 80043f5209506497 | \n",
" alcohol | \n",
" Enchiladas and tamales | \n",
" g | \n",
" 0.000000 | \n",
" Amount | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 3 enchiladas or tamales | \n",
" 40076 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" code description units amount valueType \\\n",
"survey_id \n",
"80043f5209506497 acesupot Enchiladas and tamales mg 0.000000 Amount \n",
"80043f5209506497 addsugar Enchiladas and tamales g 0.000000 Amount \n",
"80043f5209506497 adsugtot Enchiladas and tamales g 0.000000 Amount \n",
"80043f5209506497 alanine Enchiladas and tamales g 1.905583 Amount \n",
"80043f5209506497 alcohol Enchiladas and tamales g 0.000000 Amount \n",
"\n",
" frequency servingFrequencyText servingSizeText \\\n",
"survey_id \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n",
"\n",
" foodCode \n",
"survey_id \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 \n",
"80043f5209506497 40076 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"meal_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"meal_data.to_csv('vioscreen_expanded_meal_data.tsv.gz', sep='\\t', index=True, header=True,\n",
" compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(39476142, 9)"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"meal_data.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Format grams information on the foods consumed\n",
"\n",
"NOTE: the same two entries as before that are unexpectedly duplicated and are removed."
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(249849, 10)"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Keep only grams in the meal_data DataFrame and add row number\n",
"meal_data_grams = meal_data.loc[meal_data.code==\"grams\"]\n",
"meal_data_grams = meal_data_grams.reset_index()\n",
"meal_data_grams.shape"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"847b5821f05f211f\n",
"d95b890fe63c21d0\n"
]
}
],
"source": [
"duplicated2 = []\n",
"for i, grp in meal_data_grams.groupby('survey_id'):\n",
" if len(grp.description) != len(grp.description.unique()):\n",
" print(i)\n",
" for d, dup in grp.groupby('description'):\n",
" if len(dup) > 1:\n",
" duplicated2.append(dup)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" code | \n",
" description | \n",
" units | \n",
" amount | \n",
" valueType | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" foodCode | \n",
"
\n",
" \n",
" \n",
" \n",
" 208832 | \n",
" 847b5821f05f211f | \n",
" grams | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" - | \n",
" 17.309589 | \n",
" Amount | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/2 cup (4 oz) | \n",
" 120561 | \n",
"
\n",
" \n",
" 208834 | \n",
" 847b5821f05f211f | \n",
" grams | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" - | \n",
" 11.528186 | \n",
" Amount | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2/3 cup | \n",
" 120561 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id code \\\n",
"208832 847b5821f05f211f grams \n",
"208834 847b5821f05f211f grams \n",
"\n",
" description units amount \\\n",
"208832 Soy milk, not fortified (Milk on cooked cereal) - 17.309589 \n",
"208834 Soy milk, not fortified (Milk on cooked cereal) - 11.528186 \n",
"\n",
" valueType frequency servingFrequencyText servingSizeText foodCode \n",
"208832 Amount 52.0 1 per week 1/2 cup (4 oz) 120561 \n",
"208834 Amount 52.0 1 per week 2/3 cup 120561 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicated2[0]"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" code | \n",
" description | \n",
" units | \n",
" amount | \n",
" valueType | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" foodCode | \n",
"
\n",
" \n",
" \n",
" \n",
" 94713 | \n",
" d95b890fe63c21d0 | \n",
" grams | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" - | \n",
" 34.619178 | \n",
" Amount | \n",
" 104.0 | \n",
" 2-6 per week | \n",
" 1/2 cup (4 oz) | \n",
" 120561 | \n",
"
\n",
" \n",
" 94714 | \n",
" d95b890fe63c21d0 | \n",
" grams | \n",
" Soy milk, not fortified (Milk on cooked cereal) | \n",
" - | \n",
" 46.112745 | \n",
" Amount | \n",
" 104.0 | \n",
" 2-6 per week | \n",
" 2/3 cup | \n",
" 120561 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survey_id code \\\n",
"94713 d95b890fe63c21d0 grams \n",
"94714 d95b890fe63c21d0 grams \n",
"\n",
" description units amount \\\n",
"94713 Soy milk, not fortified (Milk on cooked cereal) - 34.619178 \n",
"94714 Soy milk, not fortified (Milk on cooked cereal) - 46.112745 \n",
"\n",
" valueType frequency servingFrequencyText servingSizeText foodCode \n",
"94713 Amount 104.0 2-6 per week 1/2 cup (4 oz) 120561 \n",
"94714 Amount 104.0 2-6 per week 2/3 cup 120561 "
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicated2[1]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"# manually ignoring the duplicated Row_nb values above,\n",
"# otherwise would need to filter by grouped survey ids and that seems\n",
"# excessive for such a small impact (and likely bug on vioscreens side)\n",
"meal_data_grams_nodup = meal_data_grams.loc[[i for i in meal_data_grams.index if i not in [208834, 94714]]]\n",
"\n",
"# \"amount\" is the estimated quantity in grams, almost certainly per day.\n",
"meal_data_grams_nodup_pivot = meal_data_grams_nodup.pivot(index='survey_id', \n",
" columns='description', \n",
" values='amount').fillna(0.0)\n",
"\n",
"meal_data_grams_nodup_pivot = remap_to_sample_id(sanitize_columns(meal_data_grams_nodup_pivot))\n",
"meal_data_grams_nodup_pivot.to_csv('vioscreen_foods_consumed_grams_per_day.tsv', sep='\\t',\n",
" index=True, header=True)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survey_id | \n",
" All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat | \n",
" All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking | \n",
" All_other_fruits | \n",
" All_other_lunch_meat_such_as_bologna__salami_and_Spam | \n",
" All_tea | \n",
" Apples__applesauce_and_pears | \n",
" Apricots___dried | \n",
" Apricots___fresh_or_canned | \n",
" Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai | \n",
" ... | \n",
" White_Rice | \n",
" White_breads__including_bagels__rolls_and_English_muffins | \n",
" White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod | \n",
" White_or_rosé_wine | \n",
" Whole_grain_breads__including_bagels_and_rolls | \n",
" Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ | \n",
" Whole_kernel_grains_such_as_brown_rice | \n",
" Winter_squash_such_as_acorn__butternut_and_pumpkin | \n",
" Yams_and_sweet_potatoes | \n",
" Yogurt__all_types_except_frozen | \n",
"
\n",
" \n",
" #SampleID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 10317.000072291 | \n",
" 0000904d9779a86c | \n",
" 0.000000 | \n",
" 28.272329 | \n",
" 10.601222 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 10.191718 | \n",
" 5.189409 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 27.262121 | \n",
" 50.261916 | \n",
" 0.000000 | \n",
" 2.174795 | \n",
" 7.479489 | \n",
" 32.544828 | \n",
" 0.0 | \n",
" 26.178082 | \n",
"
\n",
" \n",
" 10317.000107687 | \n",
" 000fc7ac57776579 | \n",
" 0.000000 | \n",
" 16.155617 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 1597.913445 | \n",
" 35.988632 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 59.087671 | \n",
" 0.0 | \n",
" 54.524242 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 16.155617 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 91.623288 | \n",
"
\n",
" \n",
" 10317.000087663 | \n",
" 0012cf765bb386ea | \n",
" 15.172959 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 5.141233 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 10317.000030578 | \n",
" 001b4c3e1c848f58 | \n",
" 79.658036 | \n",
" 8.077808 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 887.513436 | \n",
" 35.988632 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 39.391978 | \n",
" 0.0 | \n",
" 4.194172 | \n",
" 11.598904 | \n",
" 14.460274 | \n",
" 14.136165 | \n",
" 0.000000 | \n",
" 15.565084 | \n",
" 0.0 | \n",
" 26.178082 | \n",
"
\n",
" \n",
" 10317.000030579 | \n",
" 001b4c3e1c848f58 | \n",
" 79.658036 | \n",
" 8.077808 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 887.513436 | \n",
" 35.988632 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 39.391978 | \n",
" 0.0 | \n",
" 4.194172 | \n",
" 11.598904 | \n",
" 14.460274 | \n",
" 14.136165 | \n",
" 0.000000 | \n",
" 15.565084 | \n",
" 0.0 | \n",
" 26.178082 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 260 columns
\n",
"
"
],
"text/plain": [
" survey_id \\\n",
"#SampleID \n",
"10317.000072291 0000904d9779a86c \n",
"10317.000107687 000fc7ac57776579 \n",
"10317.000087663 0012cf765bb386ea \n",
"10317.000030578 001b4c3e1c848f58 \n",
"10317.000030579 001b4c3e1c848f58 \n",
"\n",
" All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat \\\n",
"#SampleID \n",
"10317.000072291 0.000000 \n",
"10317.000107687 0.000000 \n",
"10317.000087663 15.172959 \n",
"10317.000030578 79.658036 \n",
"10317.000030579 79.658036 \n",
"\n",
" All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking \\\n",
"#SampleID \n",
"10317.000072291 28.272329 \n",
"10317.000107687 16.155617 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 8.077808 \n",
"10317.000030579 8.077808 \n",
"\n",
" All_other_fruits \\\n",
"#SampleID \n",
"10317.000072291 10.601222 \n",
"10317.000107687 0.000000 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 0.000000 \n",
"10317.000030579 0.000000 \n",
"\n",
" All_other_lunch_meat_such_as_bologna__salami_and_Spam \\\n",
"#SampleID \n",
"10317.000072291 0.0 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 0.0 \n",
"10317.000030579 0.0 \n",
"\n",
" All_tea Apples__applesauce_and_pears Apricots___dried \\\n",
"#SampleID \n",
"10317.000072291 0.000000 10.191718 5.189409 \n",
"10317.000107687 1597.913445 35.988632 0.000000 \n",
"10317.000087663 0.000000 5.141233 0.000000 \n",
"10317.000030578 887.513436 35.988632 0.000000 \n",
"10317.000030579 887.513436 35.988632 0.000000 \n",
"\n",
" Apricots___fresh_or_canned \\\n",
"#SampleID \n",
"10317.000072291 0.0 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 0.0 \n",
"10317.000030579 0.0 \n",
"\n",
" Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai \\\n",
"#SampleID \n",
"10317.000072291 0.0 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 0.0 \n",
"10317.000030579 0.0 \n",
"\n",
" ... White_Rice \\\n",
"#SampleID ... \n",
"10317.000072291 ... 0.000000 \n",
"10317.000107687 ... 59.087671 \n",
"10317.000087663 ... 0.000000 \n",
"10317.000030578 ... 39.391978 \n",
"10317.000030579 ... 39.391978 \n",
"\n",
" White_breads__including_bagels__rolls_and_English_muffins \\\n",
"#SampleID \n",
"10317.000072291 0.0 \n",
"10317.000107687 0.0 \n",
"10317.000087663 0.0 \n",
"10317.000030578 0.0 \n",
"10317.000030579 0.0 \n",
"\n",
" White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod \\\n",
"#SampleID \n",
"10317.000072291 27.262121 \n",
"10317.000107687 54.524242 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 4.194172 \n",
"10317.000030579 4.194172 \n",
"\n",
" White_or_rosé_wine \\\n",
"#SampleID \n",
"10317.000072291 50.261916 \n",
"10317.000107687 0.000000 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 11.598904 \n",
"10317.000030579 11.598904 \n",
"\n",
" Whole_grain_breads__including_bagels_and_rolls \\\n",
"#SampleID \n",
"10317.000072291 0.000000 \n",
"10317.000107687 0.000000 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 14.460274 \n",
"10317.000030579 14.460274 \n",
"\n",
" Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ \\\n",
"#SampleID \n",
"10317.000072291 2.174795 \n",
"10317.000107687 16.155617 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 14.136165 \n",
"10317.000030579 14.136165 \n",
"\n",
" Whole_kernel_grains_such_as_brown_rice \\\n",
"#SampleID \n",
"10317.000072291 7.479489 \n",
"10317.000107687 0.000000 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 0.000000 \n",
"10317.000030579 0.000000 \n",
"\n",
" Winter_squash_such_as_acorn__butternut_and_pumpkin \\\n",
"#SampleID \n",
"10317.000072291 32.544828 \n",
"10317.000107687 0.000000 \n",
"10317.000087663 0.000000 \n",
"10317.000030578 15.565084 \n",
"10317.000030579 15.565084 \n",
"\n",
" Yams_and_sweet_potatoes Yogurt__all_types_except_frozen \n",
"#SampleID \n",
"10317.000072291 0.0 26.178082 \n",
"10317.000107687 0.0 91.623288 \n",
"10317.000087663 0.0 0.000000 \n",
"10317.000030578 0.0 26.178082 \n",
"10317.000030579 0.0 26.178082 \n",
"\n",
"[5 rows x 260 columns]"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"meal_data_grams_nodup_pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Round trip test\n",
"\n",
"Note the codes in the meal data do not have a 1-1 correspondence with the micromacro frame. In that frame, we used the description field which is more human readable than the code.\n",
"\n",
"It also appears that some codes like GLAC are not obviously represented in the micromacro frame. Similarly, there are some entities like animal protein that do not appear in the meal data frame so that may be inferred by vioscreen elsewhere. "
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"test_survey = '001b4c3e1c848f58'\n",
"test = meal_data.loc[test_survey]"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" code | \n",
" description | \n",
" units | \n",
" amount | \n",
" valueType | \n",
" frequency | \n",
" servingFrequencyText | \n",
" servingSizeText | \n",
" foodCode | \n",
"
\n",
" \n",
" survey_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" All other cheese, such as American, cheddar or... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 1 slice (1 oz), 1/4 cup shredded, 2 tablespoon... | \n",
" 70005 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Apples, applesauce and pears | \n",
" unit | \n",
" 1.910150 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 apple or pear, 1/4 cup | \n",
" 20001 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Avocado and guacamole | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/2 avocado, 1/2 cup | \n",
" 30006 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Bananas | \n",
" unit | \n",
" 3.096284 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 banana | \n",
" 20002 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Beef, pork, ham and lamb - with fat | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 3 slices, 2 chops, small steak (4-5 oz) | \n",
" 40002 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Beer (all types) | \n",
" unit | \n",
" 3.568904 | \n",
" Index | \n",
" 52.0 | \n",
" 1 day per week | \n",
" 16 oz (1 pint) | \n",
" 90002 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Broccoli | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/4 cup | \n",
" 30012 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Burritos, tacos, tostadas and quesadillas | \n",
" unit | \n",
" 1.437427 | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 1 taco or tostada, 1 small burrito, 1 quesadilla | \n",
" 40202 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Butter (Cereals and Breads) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 4 teaspoons (4 pats) | \n",
" 10004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Carrots - cooked | \n",
" unit | \n",
" 0.930636 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 cup | \n",
" 30041 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Carrots - raw | \n",
" unit | \n",
" 0.164987 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 carrot, 4 baby carrots, 1/4 cup | \n",
" 30040 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Cauliflower, cabbage and Brussels sprouts | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/4 cup | \n",
" 30018 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Chocolate, candy bars, and toffee | \n",
" unit | \n",
" 1.960348 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 regular bar, 8-10 bite sized-pieces (1.5 oz) | \n",
" 80008 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Coffee (not lattes or mochas) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 912.0 | \n",
" 2-3 per day | \n",
" 1 small cup (6 oz) | \n",
" 90008 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Coleslaw | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 3/4 cup | \n",
" 30017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Cookies and cakes - regular | \n",
" unit | \n",
" 2.233787 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2 medium cookies, 1 large cookie, 1 small piec... | \n",
" 80005 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Whole grain breads, including bagels and rolls | \n",
" unit | \n",
" 4.057757 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 2 slices bread, 1 medium roll, 1 English muffi... | \n",
" 120413 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Eggs | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 2 eggs | \n",
" 50004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Fresh garlic, including in cooking | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 1 clove | \n",
" 30043 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Fresh tomatoes | \n",
" unit | \n",
" 0.544489 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 4 slices, 1 medium tomato | \n",
" 30008 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Grapes, fresh | \n",
" unit | \n",
" 0.851028 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 8-10 grapes, 1/2 cup | \n",
" 20024 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Green or string beans | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/4 cup | \n",
" 30001 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Green peppers and green chilies, cooked | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/8 cup | \n",
" 30054 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Green salad (Lettuce or spinach) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 1 cup (medium bowl) | \n",
" 30021 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Latte (whole milk), mocha or hot chocolate | \n",
" unit | \n",
" 0.711497 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 regular cup (8-10 oz) | \n",
" 90007 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Lunch meats such as ham, turkey and lowfat bol... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 2 slices (2 oz) | \n",
" 40028 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Muffins, scones, croissants and biscuits | \n",
" unit | \n",
" 1.636527 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" Small muffin, small croissant, small biscuit | \n",
" 60028 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Oil, canola (Fat used in cooking) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10011 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Oil, olive (Cereals and Breads) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 4 teaspoons (4 pats) | \n",
" 10010 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Oil, other such as corn, soybean, safflower or... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10007 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Red peppers and red chilies, cooked | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/8 cup | \n",
" 30057 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Red peppers and red chilies, raw | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1/4 pepper, 4-6 strips, 1/4 cup | \n",
" 30056 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Red Wine | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 104.0 | \n",
" 2 days per week | \n",
" 4 oz (about 1/8th bottle) | \n",
" 90018 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Regular potato, tortilla chips, corn chips and... | \n",
" unit | \n",
" 0.702932 | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 2 handfuls, 1 small bag (1 oz) | \n",
" 60031 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Spaghetti, lasagna and other pasta with tomato... | \n",
" unit | \n",
" 0.801028 | \n",
" Index | \n",
" 12.0 | \n",
" 1 per month | \n",
" 1 cup (medium bowl) | \n",
" 40017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Stew, pot pie, curries and casseroles with mea... | \n",
" unit | \n",
" 0.252476 | \n",
" Index | \n",
" 12.0 | \n",
" 1 per month | \n",
" 1 cup (medium bowl) | \n",
" 40004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Summer squash and zucchini | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/4 cup | \n",
" 30046 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" All tea | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 912.0 | \n",
" 2-3 per day | \n",
" 1 large glass (12 oz) | \n",
" 90009 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Vegetable, minestrone and tomato soup | \n",
" unit | \n",
" 1.538358 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 1/2 cups (large bowl) | \n",
" 40033 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Water (tap or bottled) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 1642.0 | \n",
" 4-5 per day | \n",
" 1 regular glass (8 oz) | \n",
" 90017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" White fish (broiled or baked) such as sole, ha... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 12.0 | \n",
" 1 per month | \n",
" 1 medium piece (4-5 oz) | \n",
" 40073 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" White or rosé wine | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 24.0 | \n",
" 2-3 days per month | \n",
" 6 oz (about 1/6th bottle) | \n",
" 90019 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Winter squash such as acorn, butternut and pum... | \n",
" unit | \n",
" 0.665620 | \n",
" Index | \n",
" 28.0 | \n",
" 2-3 per month | \n",
" 3/4 cup | \n",
" 30047 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Yogurt, all types except frozen | \n",
" unit | \n",
" 1.336993 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 3/4 (6 oz) (regular container) | \n",
" 70017 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Butter (Fat used in cooking) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10004 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Oil, olive (Fat used in cooking) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 365.0 | \n",
" 1 per day | \n",
" 2 teaspoons | \n",
" 10010 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Oil, olive (Fats used on vegetables) | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 730.0 | \n",
" 2 per day | \n",
" 2 teaspoons | \n",
" 10010 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Salt | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 1095.0 | \n",
" 3 per day | \n",
" 21-40 shakes (1/4 teaspoon) | \n",
" 10100 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Veggie soy or tofu burgers or ground meat subs... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 1 burger or 3 oz | \n",
" 120191 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Tofu | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 104.0 | \n",
" 2 per week | \n",
" 4-5 pieces (4 oz) | \n",
" 120206 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Tempeh | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 24.0 | \n",
" 1-3 per month | \n",
" 3 pieces (3 oz) | \n",
" 120207 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Soy sauce, tamari, teriyaki sauce, Szechwan sa... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2 teaspoons | \n",
" 120198 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Corn tortillas | \n",
" unit | \n",
" 1.391782 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 2 (6\" diameter) | \n",
" 120123 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" White Rice | \n",
" unit | \n",
" 7.016067 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 1/2 cup | \n",
" 120414 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Cooked greens, such as spinach, swiss chard an... | \n",
" unit | \n",
" 0.000000 | \n",
" Index | \n",
" 52.0 | \n",
" 1 per week | \n",
" 3/4 cup | \n",
" 120438 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Whole grain breads, including bagels and rolls... | \n",
" unit | \n",
" 3.030604 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 2 slices bread, 1 medium roll, 1 English muffi... | \n",
" 120412 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Cooked whole grain cereals | \n",
" unit | \n",
" 5.485402 | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 1/2 cup (small bowl) | \n",
" 120410 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" All other beans such as baked beans, lima bean... | \n",
" unit | \n",
" 3.923203 | \n",
" Index | \n",
" 182.0 | \n",
" 3-4 per week | \n",
" 3/4 cup | \n",
" 30044 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Soy milk, fortified (Milk in cooked cereal) | \n",
" unit | \n",
" 1.430008 | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 2/3 cup | \n",
" 120560 | \n",
"
\n",
" \n",
" 001b4c3e1c848f58 | \n",
" GLAC | \n",
" Rice milk (Milk in cooked cereal) | \n",
" unit | \n",
" 4.556126 | \n",
" Index | \n",
" 286.0 | \n",
" 5-6 per week | \n",
" 2/3 cup | \n",
" 70018 | \n",
"
\n",
" \n",
"
\n",
"
64 rows × 9 columns
\n",
"
"
],
"text/plain": [
" code description \\\n",
"survey_id \n",
"001b4c3e1c848f58 GLAC All other cheese, such as American, cheddar or... \n",
"001b4c3e1c848f58 GLAC Apples, applesauce and pears \n",
"001b4c3e1c848f58 GLAC Avocado and guacamole \n",
"001b4c3e1c848f58 GLAC Bananas \n",
"001b4c3e1c848f58 GLAC Beef, pork, ham and lamb - with fat \n",
"... ... ... \n",
"001b4c3e1c848f58 GLAC Whole grain breads, including bagels and rolls... \n",
"001b4c3e1c848f58 GLAC Cooked whole grain cereals \n",
"001b4c3e1c848f58 GLAC All other beans such as baked beans, lima bean... \n",
"001b4c3e1c848f58 GLAC Soy milk, fortified (Milk in cooked cereal) \n",
"001b4c3e1c848f58 GLAC Rice milk (Milk in cooked cereal) \n",
"\n",
" units amount valueType frequency servingFrequencyText \\\n",
"survey_id \n",
"001b4c3e1c848f58 unit 0.000000 Index 104.0 2 per week \n",
"001b4c3e1c848f58 unit 1.910150 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 unit 0.000000 Index 52.0 1 per week \n",
"001b4c3e1c848f58 unit 3.096284 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 unit 0.000000 Index 28.0 2-3 per month \n",
"... ... ... ... ... ... \n",
"001b4c3e1c848f58 unit 3.030604 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 unit 5.485402 Index 286.0 5-6 per week \n",
"001b4c3e1c848f58 unit 3.923203 Index 182.0 3-4 per week \n",
"001b4c3e1c848f58 unit 1.430008 Index 286.0 5-6 per week \n",
"001b4c3e1c848f58 unit 4.556126 Index 286.0 5-6 per week \n",
"\n",
" servingSizeText foodCode \n",
"survey_id \n",
"001b4c3e1c848f58 1 slice (1 oz), 1/4 cup shredded, 2 tablespoon... 70005 \n",
"001b4c3e1c848f58 1/2 apple or pear, 1/4 cup 20001 \n",
"001b4c3e1c848f58 1/2 avocado, 1/2 cup 30006 \n",
"001b4c3e1c848f58 1/2 banana 20002 \n",
"001b4c3e1c848f58 3 slices, 2 chops, small steak (4-5 oz) 40002 \n",
"... ... ... \n",
"001b4c3e1c848f58 2 slices bread, 1 medium roll, 1 English muffi... 120412 \n",
"001b4c3e1c848f58 1/2 cup (small bowl) 120410 \n",
"001b4c3e1c848f58 3/4 cup 30044 \n",
"001b4c3e1c848f58 2/3 cup 120560 \n",
"001b4c3e1c848f58 2/3 cup 70018 \n",
"\n",
"[64 rows x 9 columns]"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test[test.code == 'GLAC']"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" amount | \n",
"
\n",
" \n",
" code | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" GLAC | \n",
" 62.397260 | \n",
"
\n",
" \n",
" GLTC | \n",
" 69.876978 | \n",
"
\n",
" \n",
" LineGi | \n",
" 336.912520 | \n",
"
\n",
" \n",
" acesupot | \n",
" 0.000000 | \n",
"
\n",
" \n",
" addsugar | \n",
" 25.437240 | \n",
"
\n",
" \n",
" adsugtot | \n",
" 20.476220 | \n",
"
\n",
" \n",
" alanine | \n",
" 2.147886 | \n",
"
\n",
" \n",
" alcohol | \n",
" 17.794446 | \n",
"
\n",
" \n",
" alphacar | \n",
" 2840.690441 | \n",
"
\n",
" \n",
" alphtoce | \n",
" 12.646316 | \n",
"
\n",
" \n",
" alphtoco | \n",
" 11.345113 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" amount\n",
"code \n",
"GLAC 62.397260\n",
"GLTC 69.876978\n",
"LineGi 336.912520\n",
"acesupot 0.000000\n",
"addsugar 25.437240\n",
"adsugtot 20.476220\n",
"alanine 2.147886\n",
"alcohol 17.794446\n",
"alphacar 2840.690441\n",
"alphtoce 12.646316\n",
"alphtoco 11.345113"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.groupby('code').agg('sum').head(11)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"survey_id 001b4c3e1c848f58\n",
"3_Methylhistidine_in_mg 2.60684169804397\n",
"Acesulfame_Potassium_in_mg 0.0\n",
"Added_Sugars__by_Available_Carbohydrate__in_g 25.4372404710912\n",
"Added_Sugars__by_Total_Sugars__in_g 20.4762200586827\n",
"Alanine_in_g 2.14788575606249\n",
"Alcohol_in_g 17.7944461190833\n",
"Alpha_Carotene__provitamin_A_carotenoid__in_mcg 2840.69044111298\n",
"Alpha_Tocopherol_in_mg 11.3451129336108\n",
"Animal_Protein_in_g 16.6596165030072\n",
"Arginine_in_g 2.87293720209445\n",
"Name: 10317.000030578, dtype: object"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_micromacro_pivot[vios_coded_micromacro_pivot.survey_id == test_survey].iloc[0].head(11)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"survey_id 001b4c3e1c848f58\n",
"Percent_of_calories_from_Added_Sugar 0.864436614273603\n",
"Percent_of_calories_from_Alcohol 7.643624875670007\n",
"Percent_of_calories_from_Carbohydrate 38.41905488083203\n",
"Percent_of_calories_from_Fat 41.82753132412285\n",
"Percent_of_calories_from_Monounsaturated_Fat 21.020284676343078\n",
"Percent_of_calories_from_Polyunsaturated_Fat 8.6645383097221\n",
"Percent_of_calories_from_Protein 12.109788919375113\n",
"Percent_of_calories_from_Saturated_Fat 11.298838478082244\n",
"Name: 10317.000030578, dtype: object"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vios_coded_percents_pivot[vios_coded_percents_pivot.survey_id == test_survey].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}