Written by: Jean Bouchat
using CSV
using DataFrames
df = DataFrame(CSV.File("./Life Expectancy Data.csv", normalizenames=true))using Plots
using LinearAlgebra
using Statistics
new_df = dropmissing(df)
new_df = new_df[new_df.Year .== 2012, :]Data
y = new_df.Life_expectancy
A = new_df.BMIPlot the data
scatter(A, y, label="Data")Add a column of ones to the right of A'
new_A = hcat(A, ones(length(A)))Solve the system Ax=y to find x, i.e., the coefficients of the linear regression model
coefs = new_A\y # Cannot handle missing valuesPlot the regression line
x_plot = minimum(A):maximum(A)
y_plot = coefs[1]*x_plot .+ coefs[2]
label = string(coefs[1], "*x + ", coefs[2])
println("Regression line: $(label)")
println("R = $(cor(new_df.Life_expectancy, new_df.BMI))")
plot!(x_plot, y_plot, label=label)
names(df)
features = ["Life_expectancy", "Total_expenditure", "GDP",
    "Income_composition_of_resources", "Adult_Mortality", "Schooling"]
for feature in features
    if feature != "Country" && feature != "Year" && feature != "Status"Data
y = new_df.Life_expectancy
        A = new_df[:, feature]Preparing the figure
fig = plot()Plot the data
scatter!(A, y, label="Data")Add a column of ones to the right of A'
new_A = hcat(A, ones(length(A)))Solve the system Ax=y to find x, i.e., the coefficients of the linear regression model
coefs = new_A\y # Cannot handle missing valuesPlot the regression line
x_plot = range(minimum(A), maximum(A), 100)
        y_plot = coefs[1]*x_plot .+ coefs[2]
        label = string(coefs[1], "*x + ", coefs[2])
        xlabel = feature
        ylabel = "Life expectancy in 2012"
        display("Regression line: $(label)")Prdicting Life_expectancy from feature using regression model
y_hat = coefs[1].*A .+ coefs[2]Computing correlation coefficient
display("R = $(cor(y, y_hat))")
        plot!(x_plot, y_plot, label=label, xlabel=xlabel, ylabel=ylabel)Displaying the figure
display(fig)
    end
endData
y = new_df.Life_expectancyA is the key to go from single to multiple linear regression !
A = hcat(new_df.Adult_Mortality, new_df.Schooling, ones(size(new_df, 1)))Solve the system Ax=y to find x, i.e., the coefficients of the linear regression model
coefs = A\y # Cannot handle missing values
println("Regression line: $(coefs[1])*x1 + $(coefs[2])*x2 + $(coefs[3])")Use the regression model to predict Lifeexpectancy from AdultMortality and Schooling
y_hat = coefs[1].*new_df.Adult_Mortality + coefs[2].*new_df.Schooling .+ coefs[3]Compute correlation coefficient
println("R = $(cor(y, y_hat))")https://github.com/odow/jump-training-materials/blob/master/gettingstartedwith_JuMP.ipynb JuMP documentation https://jump.dev/JuMP.jl/stable/ HiGHS documentation https://www.maths.ed.ac.uk/hall/HiGHS/
using Pkg Pkg.add("JuMP") Pkg.add("HiGHS")
using JuMP
using HiGHS # SolverSimple linear regression via least squares
such that
y = new_df.Life_expectancy
x = new_df.Schooling
N = length(x)
model = Model(HiGHS.Optimizer)
@variable(model, a)
@variable(model, b)
@expression(model, r[i = 1:N], a*x[i] + b)
@objective(model, Min, sum((y[i] - r[i])^2 for i=1:N))
optimize!(model)
@show value(a)
@show value(b);The diet problem was one of the first problems in optimization. George Joseph Stigler formulated the problem of the optimal diet in the late 1930s in an attempt to satisfy the concern of the North American army to find the most economical way to feed its troops while at the same time ensuring certain nutritional requirements.
Your homework consists in solving the diet problem described in Czyzyk and Wisniewski (1996), Section 2, pp. 2-3, using JuMP.
This page was generated using Literate.jl.