Find Average Food Intake (Food Groups) by Age

Published in

Data Science Project Development

6 min readMar 2, 2021

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import os

data_folder = './csvdietfiles/'
diet_files = os.listdir(data_folder)
diet_files['dairy_intakes.txt_Female.csv',
 'dairy_intakes.txt_Male.csv',
 'dark_green_vegetables_subgroup.txt_Female.csv',
 'dark_green_vegetables_subgroup.txt_Male.csv',
 'fruits_intake.txt_Female.csv',
 'fruits_intake.txt_Male.csv',
 'grains_intake.txt_Female.csv',
 'grains_intake.txt_Male.csv',
 'legumes_beans_and_peas_subgroup.txt_Female.csv',
 'legumes_beans_and_peas_subgroup.txt_Male.csv',
 'meat_poultry_and_eggs_subgroup.txt_Female.csv',
 'meat_poultry_and_eggs_subgroup.txt_Male.csv',
 'nuts_seeds_and_soy_products_subgroup.txt_Female.csv',
 'nuts_seeds_and_soy_products_subgroup.txt_Male.csv',
 'other_vegetables_subgroup.txt_Female.csv',
 'other_vegetables_subgroup.txt_Male.csv',
 'protein_intake.txt_Female.csv',
 'protein_intake.txt_Male.csv',
 'red_and_orange_vegetables_subgroup.txt_Female.csv',
 'red_and_orange_vegetables_subgroup.txt_Male.csv',
 'seafood_subgroup.txt_Female.csv',
 'seafood_subgroup.txt_Male.csv',
 'starchy_vegetables_subgroup.txt_Female.csv',
 'starchy_vegetables_subgroup.txt_Male.csv',
 'vegetable_intake.txt_Female.csv',
 'vegetable_intake.txt_Male.csv']def write_to_file(filename, data):
    file = open(filename,'w')
    
    #file_data = data #[ aNumber.strip() for aRow in data for aNumber in aRow ]
    file_data = 'age_from, age_to, average_intake, recommended_intake_low, recommended_intake_high\n'
    for aRow in data:
        element_count = len(aRow)
        for i, aNumber in enumerate(aRow):
            if (element_count == 4) and (i==1) :
                file_data += '120,'
                
            file_data += aNumber
            
            if (i < len(aRow) - 1):
                file_data += ','
            
        file_data += '\n'
        
    #file_data = str(file_data)
    print(file_data)
    file.write(file_data)
    file.close()#print('Select a health measure/aspect to visualize\n')
# create the interactive interface
def f(diet_file):
    return diet_file

print('Select a measure:')
diet_file = interactive(f, diet_file = diet_files);
display(diet_file)Select a measure:



interactive(children=(Dropdown(description='diet_file', options=('dairy_intakes.txt_Female.csv', 'dairy_intake…path = data_folder + diet_file.result
print ('Selected: ' + path)

path_f = data_folder + 'starchy_vegetables_subgroup.txt_Female.csv'
print ('Selected: ' + path_f)Selected: ./csvdietfiles/starchy_vegetables_subgroup.txt_Male.csv
Selected: ./csvdietfiles/starchy_vegetables_subgroup.txt_Female.csvimport pandas as pd
df = pd.read_csv(path)
df.head()

df_f = pd.read_csv(path_f)
df_f.head()

import os.path
import re

# open the file for reading
#path = 'dietfiles/vegetable_intake.txt'
if os.path.exists(path):
    # print('debug: exists')
    file = open(path,'r')
    file_f = open(path_f,'r')
else:
    print('Fatal error: file does not exist')
    exit(1)

# diet_data = []
diet_data = []
# diet_data['Female'] = {}

csv_diet_folder = './agescsvdietfiles/subgroup/'

file_name = csv_diet_folder + diet_file.result
file_write = open(file_name,'w')
print(file_name)
current_gender = ''
is_first_line = 1
recom_low = 0
recom_high = 0
for line in file:    
    if is_first_line == 1:
        is_first_line = 0
        line_f = file_f.readline()
        continue
        
    line_f = file_f.readline()
    #print(line_f)
    if line !='':
       
            
        #diet_data[current_gender] = re.findall('[0-9]*', line)
        #print(re.findall('[0-9]*[\.]*[0-9]*', line))
        #x = re.findall('[0-9]+[\.]*[0-9]*', line)
        #diet_data.append(x)
        s = line.split(',')
        #print(s)
        s_f = line_f.split(',')
        
        age_from = int(s[0])
        age_to = int(s[1])
        recom_low = float(s[3])
        recom_high = float(s[4])
        
        age_from_f = int(s_f[0])
        age_to_f = int(s_f[1])
        recom_low_f = float(s_f[3])
        recom_high_f = float(s_f[4])
        
        #print(age_from, age_to)
        for a in range(age_from, age_to+1):
            #print(a,a, recom_low, recom_high)
            #print(a,a, recom_low_f, recom_high_f)
            
            recom_l = round( ( recom_low + recom_low_f )/2, 2) * 150
            recom_h = round( ( recom_high + recom_high_f )/2, 2) * 150
            
            # for verification
            #recom_l = round( ( recom_low + recom_low_f )/2, 2)
            #recom_h = round( ( recom_high + recom_high_f )/2, 2) 
            
            #print(a,a, recom_l, recom_h)
            #print('----')
            
            #only output
            #print(recom_l, ',', recom_h)
            d = str(a) + ',' + str(a) + ',' + str(recom_l) + ',' + str(recom_h) + '\n'
            #write_to_file(file_name + '_' +  '.csv', d)   
            file_write.write(d)
            
            
        
#write_to_file(file_name + '_' + current_gender + '.csv', diet_data)      
#diet_data
file.close()
file_f.close()
#write_to_file(file_name + '_' +  '.csv', d) 
file_write.close()./agescsvdietfiles/subgroup/starchy_vegetables_subgroup.txt_Male.csv###  process_mortality_data
data_folder = './process_mortality_data/'
mortality_file = os.listdir(data_folder)
mortality_file['hellomortality_h_1_Total patient deaths_ ESRD patients.csv',
 'hellomortality_h_1_Total patient deaths_ ESRD_patients.csv',
 'mortality_h_1_Total patient deaths_ ESRD patients.xlsx',
 'mortality_h_1_Total patient deaths_ ESRD_patients.csv',
 '_hellomortality_h_1_Total patient deaths_ ESRD_patients.csv']#print('Select a health measure/aspect to visualize\n')
# create the interactive interface
def f(mortality_file):
    return mortality_file

print('Select a measure:')
mortality_file = interactive(f, mortality_file = mortality_file);
display(mortality_file)Select a measure:



interactive(children=(Dropdown(description='mortality_file', options=('hellomortality_h_1_Total patient deaths…path = data_folder + mortality_file.result
print ('Selected: ' + path)Selected: ./process_mortality_data/mortality_h_1_Total patient deaths_ ESRD_patients.csvimport os.path
import re

# open the file for reading
# path = 'dietfiles/vegetable_intake.txt'
if os.path.exists(path):
    # print('debug: exists')
    file = open(path,'r')
    
else:
    print('Fatal error: file does not exist')
    exit(1)

# diet_data = []
diet_data = []
# diet_data['Female'] = {}

csv_diet_folder = './process_mortality_data/output/'

file_name = csv_diet_folder +   diet_file.result
file_write = open(file_name,'w')
print(file_name)
current_gender = ''
is_first_line = 1
recom_low = 0
recom_high = 0
for line in file:
    if is_first_line == 1:
        is_first_line = 0        
        continue
        
   
    if line !='':                   
        #diet_data[current_gender] = re.findall('[0-9]*', line)
        #print(re.findall('[0-9]*[\.]*[0-9]*', line))
        #x = re.findall('[0-9]+[\.]*[0-9]*', line)
        #diet_data.append(x)
        s_l = line.split(',')
        print(s_l)
        s = s_l[0].split('-')
        #print(s)
        
        try:
            age_from = int(s[0])
            age_to = int(s[1])
        except:
            age_from = int(s[0].replace('+', ''))
            age_to = 120
            
        
        # divide by the number of ages
        recom_low = float(s_l[3])/5
        #recom_high = float(s_l[4])
                        
        #print(age_from, age_to)
        for a in range(age_from, age_to+1):
            print(a,a, recom_low)
            #print(a,a, recom_low_f, recom_high_f)
            
            #recom_l = round( ( recom_low + recom_low_f )/2, 2) * 150
            #recom_h = round( ( recom_high + recom_high_f )/2, 2) * 150
            
            # for verification
            #recom_l = round( ( recom_low + recom_low_f )/2, 2)
            #recom_h = round( ( recom_high + recom_high_f )/2, 2) 
            
            #print(a,a, recom_l, recom_h)
            #print('----')
            
            #only output
            #print(recom_l, ',', recom_h)
            d = str(a) + ',' + str(a) + ',' + str(recom_low) + '\n'
            #write_to_file(file_name + '_' +  '.csv', d)   
            file_write.write(d)
            
            
        
#write_to_file(file_name + '_' + current_gender + '.csv', diet_data)      
#diet_data
file.close()

#write_to_file(file_name + '_' +  '.csv', d) 
file_write.close()./process_mortality_data/output/mortality_h_1_Total patient deaths_ ESRD_patients.csv

References:

2015-2020 Dietary Guidelines

The 2015-2020 Dietary Guidelines was designed to help Americans eat a healthier diet. Intended for policymakers and…

health.gov

Ignore the below code:

# list to store the found dates, hashes, and events (named accordingly)
# interpretation of the problem: three diferent output
date_list = []
hash_list = []
event_list = []

for line in file:
    if line !='':
        # find date
        # assumption: dates are in the correct format and no wrong dates are there in the file
        # assumption: dates are in a consistent format such as yyyy-mm-dd
        # times are ignored assumed times are not part of date as output
        # year: 4 digit, where any digit can be 0 to 9. 9999 is the maximum year allowed
        # month: always 2 digits assumed. first digit can be between 0 and 1 inclusive
        #day: 0 to 31. first digit: 0 to 3, 2nd digit: 0 to 9
        date_part= re.findall('[0-9]{4}-[01][1-2]-[0-3][0-9]', line.split()[0])
    
        # append the date to the list
        date_list.append(date_part)
    
        # hash part
        hash_part=line.split()[1]
        hash = re.findall('[a-zA-Z0-9]+', hash_part)
        hash_list.append(hash)
        
        
        # event part
        event_part = line.split()[6]        
        event = re.findall('[a-zA-Z0-9]+', event_part)
        event_list.append(event)
    
# close the file
file.close()


#print the list with all dates
print('All dates found')
print(date_list)

print('\nAll hashes found')
print(hash_list)

print('\nAll events found')
print(event_list)


#print('\nFile content for verification')
#file = open('logs.txt','r')
#print(file.read())
file.close()


# reference
# https://docs.python.org/2/library/os.path.htmlAll dates found
[]

All hashes found
[]

All events found
[]# 1.3 method 2 : output line by line
import os.path
import re

# open the file for reading
path = 'logs.txt'
if os.path.exists(path):
    # print('debug: exists')
    file = open('logs.txt','r')
else:
    print('Fatal error: file does not exist')
    exit(1)

# list to store the found dates, hashes, and events (named accordingly)
# interpretation of the problem: three diferent output
date_list = []
hash_list = []
event_list = []

line_by_line = []
for line in file:
    if line !='':
        # find date
        # assumption: dates are in the correct format and no wrong dates are there in the file
        # assumption: dates are in a consistent format such as yyyy-mm-dd
        # times are ignored assumed times are not part of date as output
        # year: 4 digit, where any digit can be 0 to 9. 9999 is the maximum year allowed
        # month: always 2 digits assumed. first digit can be between 0 and 1 inclusive
        #day: 0 to 31. first digit: 0 to 3, 2nd digit: 0 to 9
        date_part = re.findall('[0-9]{4}-[01][1-2]-[0-3][0-9]', line.split()[0])
    
        # append the date to the list
        date_list.append(date_part)
    
        # hash part
        hash_part = line.split()[1]
        hash = re.findall('[a-zA-Z0-9]+', hash_part)
        hash_list.append(hash)
                
        # event part
        event_part = line.split()[6]        
        event = re.findall('[a-zA-Z0-9]+', event_part)
        event_list.append(event)
        
        line_by_line.append ( [ date_part[0], hash[0], event[0] ]  )
        #line_by_line = str(date_part[0]) + ',' + str(hash[0]), + ',' + str(event[0]) 
    
# close the file
file.close()


# print the list with all dates
print(line_by_line)


print('\nFile content for verification')
file = open('logs.txt','r')
print(file.read())
file.close()


# reference
# https://docs.python.org/2/library/os.path.htmlSample Data File Content

age_from, age_to, average_intake, recommended_intake_low, recommended_intake_high
1,3,2.5,2.0,2.5
4,8,2.2,2.5,3.0
9,13,2.4,2.9,3.1
14,18,2.5,2.9,3.1
19,30,1.9,2.9,3.1
31,50,1.8,2.9,3.1
51,70,1.7,2.9,3.1
71,120,1.6,2.9,3.1


Data were just copied and pasted to files from web-page - may have done some manual cleanup

Find Average Food Intake (Food Groups) by Age

References:

2015-2020 Dietary Guidelines

The 2015-2020 Dietary Guidelines was designed to help Americans eat a healthier diet. Intended for policymakers and…

Ignore the below code:

Written by Justetc Social Services (non-profit)