Find Average Food Intake (Food Groups) by Age
Published in
6 min readMar 2, 2021
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import os
data_folder = './csvdietfiles/'
diet_files = os.listdir(data_folder)
diet_files['dairy_intakes.txt_Female.csv',
'dairy_intakes.txt_Male.csv',
'dark_green_vegetables_subgroup.txt_Female.csv',
'dark_green_vegetables_subgroup.txt_Male.csv',
'fruits_intake.txt_Female.csv',
'fruits_intake.txt_Male.csv',
'grains_intake.txt_Female.csv',
'grains_intake.txt_Male.csv',
'legumes_beans_and_peas_subgroup.txt_Female.csv',
'legumes_beans_and_peas_subgroup.txt_Male.csv',
'meat_poultry_and_eggs_subgroup.txt_Female.csv',
'meat_poultry_and_eggs_subgroup.txt_Male.csv',
'nuts_seeds_and_soy_products_subgroup.txt_Female.csv',
'nuts_seeds_and_soy_products_subgroup.txt_Male.csv',
'other_vegetables_subgroup.txt_Female.csv',
'other_vegetables_subgroup.txt_Male.csv',
'protein_intake.txt_Female.csv',
'protein_intake.txt_Male.csv',
'red_and_orange_vegetables_subgroup.txt_Female.csv',
'red_and_orange_vegetables_subgroup.txt_Male.csv',
'seafood_subgroup.txt_Female.csv',
'seafood_subgroup.txt_Male.csv',
'starchy_vegetables_subgroup.txt_Female.csv',
'starchy_vegetables_subgroup.txt_Male.csv',
'vegetable_intake.txt_Female.csv',
'vegetable_intake.txt_Male.csv']def write_to_file(filename, data):
file = open(filename,'w')
#file_data = data #[ aNumber.strip() for aRow in data for aNumber in aRow ]
file_data = 'age_from, age_to, average_intake, recommended_intake_low, recommended_intake_high\n'
for aRow in data:
element_count = len(aRow)
for i, aNumber in enumerate(aRow):
if (element_count == 4) and (i==1) :
file_data += '120,'
file_data += aNumber
if (i < len(aRow) - 1):
file_data += ','
file_data += '\n'
#file_data = str(file_data)
print(file_data)
file.write(file_data)
file.close()#print('Select a health measure/aspect to visualize\n')
# create the interactive interface
def f(diet_file):
return diet_file
print('Select a measure:')
diet_file = interactive(f, diet_file = diet_files);
display(diet_file)Select a measure:
interactive(children=(Dropdown(description='diet_file', options=('dairy_intakes.txt_Female.csv', 'dairy_intake…path = data_folder + diet_file.result
print ('Selected: ' + path)
path_f = data_folder + 'starchy_vegetables_subgroup.txt_Female.csv'
print ('Selected: ' + path_f)Selected: ./csvdietfiles/starchy_vegetables_subgroup.txt_Male.csv
Selected: ./csvdietfiles/starchy_vegetables_subgroup.txt_Female.csvimport pandas as pd
df = pd.read_csv(path)
df.head()
df_f = pd.read_csv(path_f)
df_f.head()
import os.path
import re
# open the file for reading
#path = 'dietfiles/vegetable_intake.txt'
if os.path.exists(path):
# print('debug: exists')
file = open(path,'r')
file_f = open(path_f,'r')
else:
print('Fatal error: file does not exist')
exit(1)
# diet_data = []
diet_data = []
# diet_data['Female'] = {}
csv_diet_folder = './agescsvdietfiles/subgroup/'
file_name = csv_diet_folder + diet_file.result
file_write = open(file_name,'w')
print(file_name)
current_gender = ''
is_first_line = 1
recom_low = 0
recom_high = 0
for line in file:
if is_first_line == 1:
is_first_line = 0
line_f = file_f.readline()
continue
line_f = file_f.readline()
#print(line_f)
if line !='':
#diet_data[current_gender] = re.findall('[0-9]*', line)
#print(re.findall('[0-9]*[\.]*[0-9]*', line))
#x = re.findall('[0-9]+[\.]*[0-9]*', line)
#diet_data.append(x)
s = line.split(',')
#print(s)
s_f = line_f.split(',')
age_from = int(s[0])
age_to = int(s[1])
recom_low = float(s[3])
recom_high = float(s[4])
age_from_f = int(s_f[0])
age_to_f = int(s_f[1])
recom_low_f = float(s_f[3])
recom_high_f = float(s_f[4])
#print(age_from, age_to)
for a in range(age_from, age_to+1):
#print(a,a, recom_low, recom_high)
#print(a,a, recom_low_f, recom_high_f)
recom_l = round( ( recom_low + recom_low_f )/2, 2) * 150
recom_h = round( ( recom_high + recom_high_f )/2, 2) * 150
# for verification
#recom_l = round( ( recom_low + recom_low_f )/2, 2)
#recom_h = round( ( recom_high + recom_high_f )/2, 2)
#print(a,a, recom_l, recom_h)
#print('----')
#only output
#print(recom_l, ',', recom_h)
d = str(a) + ',' + str(a) + ',' + str(recom_l) + ',' + str(recom_h) + '\n'
#write_to_file(file_name + '_' + '.csv', d)
file_write.write(d)
#write_to_file(file_name + '_' + current_gender + '.csv', diet_data)
#diet_data
file.close()
file_f.close()
#write_to_file(file_name + '_' + '.csv', d)
file_write.close()./agescsvdietfiles/subgroup/starchy_vegetables_subgroup.txt_Male.csv### process_mortality_data
data_folder = './process_mortality_data/'
mortality_file = os.listdir(data_folder)
mortality_file['hellomortality_h_1_Total patient deaths_ ESRD patients.csv',
'hellomortality_h_1_Total patient deaths_ ESRD_patients.csv',
'mortality_h_1_Total patient deaths_ ESRD patients.xlsx',
'mortality_h_1_Total patient deaths_ ESRD_patients.csv',
'_hellomortality_h_1_Total patient deaths_ ESRD_patients.csv']#print('Select a health measure/aspect to visualize\n')
# create the interactive interface
def f(mortality_file):
return mortality_file
print('Select a measure:')
mortality_file = interactive(f, mortality_file = mortality_file);
display(mortality_file)Select a measure:
interactive(children=(Dropdown(description='mortality_file', options=('hellomortality_h_1_Total patient deaths…path = data_folder + mortality_file.result
print ('Selected: ' + path)Selected: ./process_mortality_data/mortality_h_1_Total patient deaths_ ESRD_patients.csvimport os.path
import re
# open the file for reading
# path = 'dietfiles/vegetable_intake.txt'
if os.path.exists(path):
# print('debug: exists')
file = open(path,'r')
else:
print('Fatal error: file does not exist')
exit(1)
# diet_data = []
diet_data = []
# diet_data['Female'] = {}
csv_diet_folder = './process_mortality_data/output/'
file_name = csv_diet_folder + diet_file.result
file_write = open(file_name,'w')
print(file_name)
current_gender = ''
is_first_line = 1
recom_low = 0
recom_high = 0
for line in file:
if is_first_line == 1:
is_first_line = 0
continue
if line !='':
#diet_data[current_gender] = re.findall('[0-9]*', line)
#print(re.findall('[0-9]*[\.]*[0-9]*', line))
#x = re.findall('[0-9]+[\.]*[0-9]*', line)
#diet_data.append(x)
s_l = line.split(',')
print(s_l)
s = s_l[0].split('-')
#print(s)
try:
age_from = int(s[0])
age_to = int(s[1])
except:
age_from = int(s[0].replace('+', ''))
age_to = 120
# divide by the number of ages
recom_low = float(s_l[3])/5
#recom_high = float(s_l[4])
#print(age_from, age_to)
for a in range(age_from, age_to+1):
print(a,a, recom_low)
#print(a,a, recom_low_f, recom_high_f)
#recom_l = round( ( recom_low + recom_low_f )/2, 2) * 150
#recom_h = round( ( recom_high + recom_high_f )/2, 2) * 150
# for verification
#recom_l = round( ( recom_low + recom_low_f )/2, 2)
#recom_h = round( ( recom_high + recom_high_f )/2, 2)
#print(a,a, recom_l, recom_h)
#print('----')
#only output
#print(recom_l, ',', recom_h)
d = str(a) + ',' + str(a) + ',' + str(recom_low) + '\n'
#write_to_file(file_name + '_' + '.csv', d)
file_write.write(d)
#write_to_file(file_name + '_' + current_gender + '.csv', diet_data)
#diet_data
file.close()
#write_to_file(file_name + '_' + '.csv', d)
file_write.close()./process_mortality_data/output/mortality_h_1_Total patient deaths_ ESRD_patients.csv
References:
Ignore the below code:
# list to store the found dates, hashes, and events (named accordingly)
# interpretation of the problem: three diferent output
date_list = []
hash_list = []
event_list = []
for line in file:
if line !='':
# find date
# assumption: dates are in the correct format and no wrong dates are there in the file
# assumption: dates are in a consistent format such as yyyy-mm-dd
# times are ignored assumed times are not part of date as output
# year: 4 digit, where any digit can be 0 to 9. 9999 is the maximum year allowed
# month: always 2 digits assumed. first digit can be between 0 and 1 inclusive
#day: 0 to 31. first digit: 0 to 3, 2nd digit: 0 to 9
date_part= re.findall('[0-9]{4}-[01][1-2]-[0-3][0-9]', line.split()[0])
# append the date to the list
date_list.append(date_part)
# hash part
hash_part=line.split()[1]
hash = re.findall('[a-zA-Z0-9]+', hash_part)
hash_list.append(hash)
# event part
event_part = line.split()[6]
event = re.findall('[a-zA-Z0-9]+', event_part)
event_list.append(event)
# close the file
file.close()
#print the list with all dates
print('All dates found')
print(date_list)
print('\nAll hashes found')
print(hash_list)
print('\nAll events found')
print(event_list)
#print('\nFile content for verification')
#file = open('logs.txt','r')
#print(file.read())
file.close()
# reference
# https://docs.python.org/2/library/os.path.htmlAll dates found
[]
All hashes found
[]
All events found
[]# 1.3 method 2 : output line by line
import os.path
import re
# open the file for reading
path = 'logs.txt'
if os.path.exists(path):
# print('debug: exists')
file = open('logs.txt','r')
else:
print('Fatal error: file does not exist')
exit(1)
# list to store the found dates, hashes, and events (named accordingly)
# interpretation of the problem: three diferent output
date_list = []
hash_list = []
event_list = []
line_by_line = []
for line in file:
if line !='':
# find date
# assumption: dates are in the correct format and no wrong dates are there in the file
# assumption: dates are in a consistent format such as yyyy-mm-dd
# times are ignored assumed times are not part of date as output
# year: 4 digit, where any digit can be 0 to 9. 9999 is the maximum year allowed
# month: always 2 digits assumed. first digit can be between 0 and 1 inclusive
#day: 0 to 31. first digit: 0 to 3, 2nd digit: 0 to 9
date_part = re.findall('[0-9]{4}-[01][1-2]-[0-3][0-9]', line.split()[0])
# append the date to the list
date_list.append(date_part)
# hash part
hash_part = line.split()[1]
hash = re.findall('[a-zA-Z0-9]+', hash_part)
hash_list.append(hash)
# event part
event_part = line.split()[6]
event = re.findall('[a-zA-Z0-9]+', event_part)
event_list.append(event)
line_by_line.append ( [ date_part[0], hash[0], event[0] ] )
#line_by_line = str(date_part[0]) + ',' + str(hash[0]), + ',' + str(event[0])
# close the file
file.close()
# print the list with all dates
print(line_by_line)
print('\nFile content for verification')
file = open('logs.txt','r')
print(file.read())
file.close()
# reference
# https://docs.python.org/2/library/os.path.htmlSample Data File Content
age_from, age_to, average_intake, recommended_intake_low, recommended_intake_high
1,3,2.5,2.0,2.5
4,8,2.2,2.5,3.0
9,13,2.4,2.9,3.1
14,18,2.5,2.9,3.1
19,30,1.9,2.9,3.1
31,50,1.8,2.9,3.1
51,70,1.7,2.9,3.1
71,120,1.6,2.9,3.1
Data were just copied and pasted to files from web-page - may have done some manual cleanup