---
title: "attendance by early years foundation stage attainment"
author: "Giles Robinson"
date: 2024-07-09
editor: visual
format:
html:
code-tools: true
code-fold: true
toc: true
toc-location: left
toc-depth: 4
number-sections: true
number-depth: 4
fig-cap-location: top
execute:
warning: false
message: false
knitr:
opts_chunk:
out.width: "100%"
---
# Introduction
This report details the findings of analysis of how school attendance varies by the 'school readiness' scores which are reported by teachers at Early Years Foundation Stage (EYFS), and how that relationship varies with other characteristics. This is a replication of a research study (Wood et al, 2024). The study took data on 62,000 children in Bradford to establish a link between school readiness (as assessed in the EYFS scores) and persistent school absence later in life.
EYFS measures school readiness across a number of domains ....
This article gives an overview of the Bradford study: https://www.nurseryworld.co.uk/news/article/link-between-meeting-early-learning-goals-in-eyfs-and-later-school-absence-research
And the full research paper can be found in the PDF https://royalsocietypublishing.org/doi/pdf/10.1098/rsos.240272
The Bradford study found ....
This document shows the findings of a similar approach using local Sheffield data. We have around 44,000 results for EYFS in Sheffield between \*\*\*\*\* and \*\*\*\*\*
```{r setup}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
# clear the environment
remove(list = ls())
# load packages
library(tidyverse)
library(janitor)
library(lubridate)
library(ggtext)
library(ggrepel)
library(gghighlight)
library(kableExtra)
library(MetBrewer)
library(corrplot)
library(ggcorrplot)
#library(shadowtext)
library(readxl)
library(ggstatsplot)
library(geosphere)
library(ggridges)
library(forecast)
library(tsibble)
library(gt)
# specify data folder
data_folder <- str_c("S:/Public Health/Policy Performance Communications/Business Intelligence/Projects/EIP/data/inclusion/")
# copy to excel function
copy_excel <- function(input)
{write.table(input, file = "clipboard-20000", sep = "\t", row.names = F)}
# ggplot themes
eb <- element_blank()
# Set default ggplot theme
theme_set(
theme_classic() +
theme(
#plot.title = element_text(),
plot.subtitle = element_text(size = 9, face = "italic"),
plot.caption = element_text(size = 8, face = "italic"),
plot.title.position = "plot",
plot.title = element_markdown(size = 12),
strip.background = eb
)
)
# theme for minimal bar charts
barplottheme_minimal <- theme(
axis.title.y = eb,
axis.line.y = eb,
axis.ticks.y = eb,
axis.line.x = eb,
axis.ticks.x = eb
)
gannt_theme <- theme_classic() +
theme(
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 8, face = "italic"),
plot.caption = element_text(size = 8, face = "italic"),
plot.title.position = "plot",
axis.title = eb,
axis.line.y = eb,
axis.ticks.y = eb,
axis.text.y = eb,
legend.position = "right",
legend.title = eb,
legend.text = element_text(size = 8)
)
# Connect to OSCAR database via ODBC
oscar_con <- DBI::dbConnect(
odbc::odbc(),
Driver = "Oracle in OraClient12Home1",
Dbq = "SCPRFLVE",
UID = if (Sys.getenv("oscar_userid") == "") {
rstudioapi::askForPassword("OSCAR User ID")
} else {
Sys.getenv("oscar_userid")
},
PWD = if (Sys.getenv("oscar_pwd") == "") {
rstudioapi::askForPassword("OSCAR Password")
} else {
Sys.getenv("oscar_pwd")
},
timeout = 10
)
# summarising attendance function
# this is copied from the attendance & exclusion data model.
# any changes made there should be reflected here & vice versa
# note that the groupings appear TWICE in this function, once for grouped data and once for the "no grouping" scenario (grouping_vars = "none"). Any changes must be consistent across both.
summarise_attendance <- function(input_data, grouping_vars) {
ifelse (grouping_vars == "none", {
# Aggregate without grouping
result <- input_data |>
mutate(zero_attendance = if_else(present == 0, 1, 0)) |>
summarise(child_count = n_distinct(stud_id, na.rm = TRUE),
row_count = n(),
possible_sessions = sum(possible_sessions, na.rm = TRUE),
present = sum(present, na.rm = TRUE),
authorised = sum(authorised, na.rm = TRUE),
unauthorised = sum(unauthorised, na.rm = TRUE),
missing = sum(missing, na.rm = TRUE),
excluded = sum(excluded, na.rm = TRUE),
family_holiday_agreed = sum(family_holiday_agreed, na.rm = TRUE),
family_holiday_not_agreed = sum(family_holiday_not_agreed, na.rm = TRUE),
family_holiday_total = sum(family_holiday_total, na.rm = TRUE),
illness = sum(illness, na.rm = TRUE),
med_appt = sum(med_appt, na.rm = TRUE),
no_reason = sum(no_reason, na.rm = TRUE),
late_absent = sum(late_absent, na.rm = TRUE),
late_pres = sum(late_pres, na.rm = TRUE),
late_total = sum(late_absent, na.rm = TRUE) + sum(late_pres, na.rm = TRUE),
study_leave = sum(study_leave, na.rm = TRUE),
approved_offsite = sum(approved_offsite, na.rm = TRUE),
fixed_exclusions = sum(fixed_exclusions, na.rm = TRUE),
perm_exclusions = sum(perm_exclusions, na.rm = TRUE),
total_exclusions = sum(total_exclusions, na.rm = TRUE),
persistent_absent_count = sum(persistent_absence, na.rm = TRUE),
severe_absent_count = sum(severe_absence, na.rm = TRUE),
zero_attendance_count = sum(zero_attendance, na.rm = TRUE)
) |>
mutate(percent_present = present / possible_sessions,
percent_auth_absence = authorised / possible_sessions,
percent_unauth_absence = unauthorised / possible_sessions,
percent_missing = missing / possible_sessions,
percent_family_holiday_agreed = family_holiday_agreed / possible_sessions,
percent_family_holiday_not_agreed = family_holiday_not_agreed / possible_sessions,
percent_family_holiday = family_holiday_total / possible_sessions,
percent_excluded = excluded / possible_sessions,
percent_illness = illness / possible_sessions,
percent_med_appt = med_appt / possible_sessions,
percent_no_reason = no_reason / possible_sessions,
percent_late_absent = late_absent / possible_sessions,
percent_late_pres = late_pres / possible_sessions,
percent_late_total = late_total / possible_sessions,
percent_study_leave = study_leave / possible_sessions,
percent_approved_offsite = approved_offsite / possible_sessions,
pc_of_pupils_persistent_absent = persistent_absent_count / row_count,
pc_of_pupils_severely_absent = severe_absent_count / row_count,
pc_of_pupils_zero_attendance = zero_attendance_count / row_count
) |>
mutate(percent_absent = 1 - percent_present)
},
{
# Group by specified variables and then summarize
result <- input_data |>
mutate(zero_attendance = if_else(present == 0, 1, 0)) |>
group_by(across(all_of(grouping_vars))) |>
summarise(child_count = n_distinct(stud_id, na.rm = TRUE),
row_count = n(),
possible_sessions = sum(possible_sessions, na.rm = TRUE),
present = sum(present, na.rm = TRUE),
authorised = sum(authorised, na.rm = TRUE),
unauthorised = sum(unauthorised, na.rm = TRUE),
missing = sum(missing, na.rm = TRUE),
excluded = sum(excluded, na.rm = TRUE),
family_holiday_agreed = sum(family_holiday_agreed, na.rm = TRUE),
family_holiday_not_agreed = sum(family_holiday_not_agreed, na.rm = TRUE),
family_holiday_total = sum(family_holiday_total, na.rm = TRUE),
illness = sum(illness, na.rm = TRUE),
med_appt = sum(med_appt, na.rm = TRUE),
no_reason = sum(no_reason, na.rm = TRUE),
late_absent = sum(late_absent, na.rm = TRUE),
late_pres = sum(late_pres, na.rm = TRUE),
late_total = sum(late_absent, na.rm = TRUE) + sum(late_pres, na.rm = TRUE),
study_leave = sum(study_leave, na.rm = TRUE),
approved_offsite = sum(approved_offsite, na.rm = TRUE),
fixed_exclusions = sum(fixed_exclusions, na.rm = TRUE),
perm_exclusions = sum(perm_exclusions, na.rm = TRUE),
total_exclusions = sum(total_exclusions, na.rm = TRUE),
persistent_absent_count = sum(persistent_absence, na.rm = TRUE),
severe_absent_count = sum(severe_absence, na.rm = TRUE),
zero_attendance_count = sum(zero_attendance, na.rm = TRUE)
) |>
mutate(percent_of_pupils = child_count / sum(child_count, na.rm = TRUE),
percent_present = present / possible_sessions,
percent_auth_absence = authorised / possible_sessions,
percent_unauth_absence = unauthorised / possible_sessions,
percent_missing = missing / possible_sessions,
percent_family_holiday_agreed = family_holiday_agreed / possible_sessions,
percent_family_holiday_not_agreed = family_holiday_not_agreed / possible_sessions,
percent_family_holiday = family_holiday_total / possible_sessions,
percent_excluded = excluded / possible_sessions,
percent_illness = illness / possible_sessions,
percent_med_appt = med_appt / possible_sessions,
percent_no_reason = no_reason / possible_sessions,
percent_late_absent = late_absent / possible_sessions,
percent_late_pres = late_pres / possible_sessions,
percent_late_total = late_total / possible_sessions,
percent_study_leave = study_leave / possible_sessions,
percent_approved_offsite = approved_offsite / possible_sessions,
pc_of_pupils_persistent_absent = persistent_absent_count / row_count,
pc_of_pupils_severely_absent = severe_absent_count / row_count,
pc_of_pupils_zero_attendance = zero_attendance_count / row_count
)|>
mutate(percent_absent = 1 - percent_present)
}
)
return(result)
}
summarise_avg <- function(input_data)
{ summarise (input_data,
mean.percent_present = mean(percent_present, na.rm = TRUE),
sd.percent_present = sd(percent_present, na.rm = TRUE),
n.percent_present = n() ) |>
mutate(se.percent_present = sd.percent_present / sqrt(n.percent_present),
lower.ci.percent_present = mean.percent_present - qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present,
upper.ci.percent_present = mean.percent_present + qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present
) }
percent_calc <- function(input_data)
{input_data |> tally() |> mutate(freq = n / sum(n)) |>
mutate(
l_ci = freq - (1.96 * sqrt((freq * (1 - freq)) / n)),
u_ci = freq + (1.96 * sqrt((freq * (1 - freq)) / n))
)}
presence_mean_calc <- function(input_data)
{input_data |>
summarise(mean.percent_present = mean(percent_present, na.rm = TRUE),
sd.percent_present = sd(percent_present, na.rm = TRUE),
n.percent_present = n() ) |>
mutate(se.percent_present = sd.percent_present / sqrt(n.percent_present),
lower.ci.percent_present = mean.percent_present - qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present,
upper.ci.percent_present = mean.percent_present + qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present)
}
#presence_mean_calc_eyfs <- function(input_data = attend_eyfs, grouping_vars)
#{input_data |> ifelse (grouping_vars == "none", {
# Aggregate without grouping
# summarise(presence_y1 = mean(y1), na.rm = TRUE)
# )
#,
#sd.percent_present = sd(percent_present, na.rm = TRUE),
#n.percent_present = n() ) |>
#mutate(se.percent_present = sd.percent_present / sqrt(n.percent_present),
#lower.ci.percent_present = mean.percent_present - qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present,
#upper.ci.percent_present = mean.percent_present + qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present)
#}
```
# Data processing
The data is loaded from the *attendance* data model, which contains pre-aggregated & cleansed attendance data, as well as other descriptive characteristics. EYFS scores are loaded from the SCC OSCAR database, and the attendance data is joined on for each national curriculum year (NCY). Finally the data is aggregated, with COVID years filtered out, to calculate average attendance rates for each NCY, first by the EYFS 'good' indicator, and then by other characteristics of interest.
```{r}
#| label: load data
# get the attendance data model
load (str_c (data_folder,"attendance_inclusion_data_model.RData" ))
# MOVE THIS TO DATA MODEL ON NEXT FULL REFRESH
# get annual cohort
# cohort <- attend |>
# group_by(stud_id, dob) |>
# summarise(first_year = min(year),
# first_ncy = min(ncy)) |>
# select(stud_id, dob, first_year, first_ncy) |>
# ungroup() |>
# mutate(cohort = first_year - first_ncy + 1,
# class_of = str_c("class of ",(first_year - first_ncy + 11))) |>
# select(-first_year, -first_ncy, -dob)
#
# attend <- attend |>
# left_join(cohort, by = "stud_id")
# create a simple version of attend_stud_year for joining
asyj <- attend_stud_year |>
select (stud_id,
year,
percent_present)
```
```{r}
#| label: join eyfs to attendance data
attend_eyfs <- eyfs |>
left_join (asyj |> rename (y1 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y1_year" == "year" )) |>
left_join (asyj |> rename (y2 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y2_year" == "year" )) |>
left_join (asyj |> rename (y3 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y3_year" == "year" )) |>
left_join (asyj |> rename (y4 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y4_year" == "year" )) |>
left_join (asyj |> rename (y5 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y5_year" == "year" )) |>
left_join (asyj |> rename (y6 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y6_year" == "year" )) |>
left_join (asyj |> rename (y7 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y7_year" == "year" )) |>
left_join (asyj |> rename (y8 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y8_year" == "year" )) |>
left_join (asyj |> rename (y9 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y9_year" == "year" )) |>
left_join (asyj |> rename (y10 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y10_year" == "year" )) |>
left_join (asyj |> rename (y11 = percent_present),
join_by ("stud_id" == "stud_id" ,
"y11_year" == "year" )) |>
left_join (stud_details_joined) |> # join on the student details
mutate (sen_level = replace_na (sen_level,"No SEN" )) |>
mutate (primary_specific_need = ifelse (primary_specific_need == "Behaviour, Emotional And Social Difficulty" ,
"Social, Emotional and Mental Health" ,
primary_specific_need))
# get school
sch_data <- attend_stud_year_school |>
ungroup () |>
select (stud_id, year, base_id) |>
group_by (stud_id, year) |>
slice_head (n = 1 ) |>
left_join (school_details_short) |>
select (stud_id, year, school = school_short_name)
attend_eyfs <- attend_eyfs |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y1_year" == "year" )) |>
rename (y1_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y2_year" == "year" )) |>
rename (y2_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y3_year" == "year" )) |>
rename (y3_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y4_year" == "year" )) |>
rename (y4_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y5_year" == "year" )) |>
rename (y5_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y6_year" == "year" )) |>
rename (y6_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y7_year" == "year" )) |>
rename (y7_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y8_year" == "year" )) |>
rename (y8_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y9_year" == "year" )) |>
rename (y9_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y10_year" == "year" )) |>
rename (y10_school = school) |>
left_join (sch_data,
join_by ("stud_id" == "stud_id" ,
"y11_year" == "year" )) |>
rename (y11_school = school)
```
```{r}
#| label: aggregate the joined eyfs data
# we have to do this one NCY at a time, because the covid years need filtering out
# average attendance function - we'll apply this to each year & then stack the results
avg_attend_by_good_fun <- function (input_data = attend_eyfs, ncy, ncy_year, grouping_vars = "none" )
{
ifelse (grouping_vars == "none" ,
{
# Aggregate without grouping
result <- input_data |>
filter (! get (ncy_year) %in% c (2020 ,2021 ),
! is.na (get (ncy))) |>
group_by (good) |>
summarise (mean_pc_present = mean (get (ncy), na.rm = TRUE ),
sd = sd (get (ncy), na.rm = TRUE ),
n = n () ) |>
mutate (se = sd / sqrt (n),
lower_ci_pc_present = mean_pc_present - qt (1 - (0.05 / 2 ), n - 1 ) * se,
upper_ci_pc_present = mean_pc_present + qt (1 - (0.05 / 2 ), n - 1 ) * se
) |>
select (- se, - sd) |>
mutate (ncy = ncy)
}
,
# aggregate with grouping
{
result <- input_data |>
filter (! get (ncy_year) %in% c (2020 ,2021 ),
! is.na (get (ncy))) |>
group_by (good,across (all_of (grouping_vars))) |>
summarise (mean_pc_present = mean (get (ncy), na.rm = TRUE ),
sd = sd (get (ncy), na.rm = TRUE ),
n = n () ) |>
mutate (se = sd / sqrt (n),
lower_ci_pc_present = mean_pc_present - qt (1 - (0.05 / 2 ), n - 1 ) * se,
upper_ci_pc_present = mean_pc_present + qt (1 - (0.05 / 2 ), n - 1 ) * se
) |>
select (- se, - sd) |>
mutate (ncy = ncy)
}
)
return (result)
}
# first for all children
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" )
# combine
avg_attend_by_good <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" )))
# by gender
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "gender" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "gender" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "gender" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "gender" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "gender" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "gender" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "gender" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "gender" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "gender" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "gender" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "gender" )
# combine
avg_attend_by_good_gender <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" )))
# by ethnicity category
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "ethnicity_category" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "ethnicity_category" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "ethnicity_category" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "ethnicity_category" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "ethnicity_category" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "ethnicity_category" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "ethnicity_category" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "ethnicity_category" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "ethnicity_category" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "ethnicity_category" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "ethnicity_category" )
# combine
avg_attend_by_good_eth_cat <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" ))) |>
filter (! is.na (ethnicity_category),
! ethnicity_category %in% c ("Refused " ,"Information Not Yet Obtained" ))
# by ethnicity description
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "ethnicity_description" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "ethnicity_description" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "ethnicity_description" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "ethnicity_description" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "ethnicity_description" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "ethnicity_description" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "ethnicity_description" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "ethnicity_description" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "ethnicity_description" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "ethnicity_description" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "ethnicity_description" )
# combine
avg_attend_by_good_eth_des <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" ))) |>
filter (! is.na (ethnicity_description),
! ethnicity_description %in% c ("Irish" ,"Not Known" ,"not known" ))
# by deprivation quartile
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "imd_quartile" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "imd_quartile" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "imd_quartile" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "imd_quartile" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "imd_quartile" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "imd_quartile" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "imd_quartile" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "imd_quartile" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "imd_quartile" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "imd_quartile" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "imd_quartile" )
# combine
avg_attend_by_good_imd_quartile <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" ))) |>
filter (! is.na (imd_quartile))
# by sen_level
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "sen_level" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "sen_level" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "sen_level" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "sen_level" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "sen_level" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "sen_level" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "sen_level" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "sen_level" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "sen_level" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "sen_level" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "sen_level" )
# combine
avg_attend_by_good_sen_level <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" ))) |>
filter (! is.na (sen_level))
# by sen primary specific need
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "primary_specific_need" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "primary_specific_need" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "primary_specific_need" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "primary_specific_need" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "primary_specific_need" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "primary_specific_need" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "primary_specific_need" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "primary_specific_need" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "primary_specific_need" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "primary_specific_need" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "primary_specific_need" )
# combine
avg_attend_by_good_primary_specific_need <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" ))) |>
filter (! is.na (primary_specific_need))#,
#!primary_specific_need %in% c("Irish","Not Known","not known"))
# by ward
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "ward" )
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "ward" )
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "ward" )
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "ward" )
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "ward" )
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "ward" )
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "ward" )
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "ward" )
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "ward" )
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "ward" )
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "ward" )
# combine
avg_attend_by_good_ward <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" ))) |>
filter (! is.na (ward))#,
#!ward %in% c("Irish","Not Known","not known"))
# primary schools
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun (ncy = "y1" ,ncy_year = "y1_year" , grouping_vars = "y1_school" ) |> rename (school = y1_school)
y2_avg <- avg_attend_by_good_fun (ncy = "y2" ,ncy_year = "y2_year" , grouping_vars = "y2_school" ) |> rename (school = y2_school)
y3_avg <- avg_attend_by_good_fun (ncy = "y3" ,ncy_year = "y3_year" , grouping_vars = "y3_school" ) |> rename (school = y3_school)
y4_avg <- avg_attend_by_good_fun (ncy = "y4" ,ncy_year = "y4_year" , grouping_vars = "y4_school" ) |> rename (school = y4_school)
y5_avg <- avg_attend_by_good_fun (ncy = "y5" ,ncy_year = "y5_year" , grouping_vars = "y5_school" ) |> rename (school = y5_school)
y6_avg <- avg_attend_by_good_fun (ncy = "y6" ,ncy_year = "y6_year" , grouping_vars = "y6_school" ) |> rename (school = y6_school)
y7_avg <- avg_attend_by_good_fun (ncy = "y7" ,ncy_year = "y7_year" , grouping_vars = "y7_school" ) |> rename (school = y7_school)
y8_avg <- avg_attend_by_good_fun (ncy = "y8" ,ncy_year = "y8_year" , grouping_vars = "y8_school" ) |> rename (school = y8_school)
y9_avg <- avg_attend_by_good_fun (ncy = "y9" ,ncy_year = "y9_year" , grouping_vars = "y9_school" ) |> rename (school = y9_school)
y10_avg <- avg_attend_by_good_fun (ncy = "y10" ,ncy_year = "y10_year" , grouping_vars = "y10_school" ) |> rename (school = y10_school)
y11_avg <- avg_attend_by_good_fun (ncy = "y11" ,ncy_year = "y11_year" , grouping_vars = "y11_school" ) |> rename (school = y11_school)
# combine
avg_attend_by_good_school <-
rbind (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
mutate (ncy = factor (ncy, levels = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" )))
# tidy up
remove (y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg)
# annual cohorts TO DO
# create averages for each ncy
# y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "y1_school") |> rename(school = y1_school)
# y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "y2_school") |> rename(school = y2_school)
# y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "y3_school") |> rename(school = y3_school)
# y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "y4_school") |> rename(school = y4_school)
# y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "y5_school") |> rename(school = y5_school)
# y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "y6_school") |> rename(school = y6_school)
# y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "y7_school") |> rename(school = y7_school)
# y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "y8_school") |> rename(school = y8_school)
# y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "y9_school") |> rename(school = y9_school)
# y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "y10_school") |> rename(school = y10_school)
# y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "y11_school") |> rename(school = y11_school)
#
# # combine
# avg_attend_by_good_school <-
# rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |>
# mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")))
#
# # tidy up
# remove(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg)
```
## Analysis
Attainment over time
```{r}
eyfs_year <- eyfs |>
group_by (year, good) |>
summarise (child_count = n_distinct (stud_id)) |>
mutate (percent = child_count / sum (child_count))
ggplot (eyfs_year,
aes (x = year,
y = child_count,
fill = good,
label = str_c (child_count," \n (" ,scales:: percent (percent, accuracy = 0.1 ),")" ))
)+
geom_col (position = "stack" ) +
geom_text (data = eyfs_year |> filter (good == 1 ), aes (y = 2000 ), colour = "white" , fontface = "bold" , size = 3.5 ) +
geom_text (data = eyfs_year |> filter (good == 0 ), aes (y = 5500 ), colour = "white" , fontface = "bold" , size = 3.5 ) +
labs (title = "Sheffield children <b><span style='color:#208cc0'>meeting </span></b>and <b><span style='color:#a82203'>not meeting </span></b>expected standards at early years foundation stage" ) +
scale_x_continuous (breaks = seq (2013 ,2019 ,by = 1 )) +
#scale_x_reverse() +
#coord_flip() +
theme (legend.position = "none" ,
axis.title = eb,
axis.line = eb,
axis.text.y = eb,
axis.ticks = eb) +
scale_fill_met_d ("Juarez" )
```
Plotting the overall attendance rates for each year, by the EYFS *good* score shows a clear gap in attendance between children meeting expected level and those not. This gap is present in all years and grows during secondary school.
```{r}
ggplot (avg_attend_by_good |>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_label_repel (size = 2.8 ,
nudge_y = - 0.01 ,
min.segment.length = Inf ) +
scale_y_continuous (labels = scales:: percent) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ())
```
Next we'll explore how this difference varies by different characteristics, in each case recreating the plot above.
### Gender
```{r}
#| label: plot attendance by ncy, good and gender
ggplot (avg_attend_by_good_gender |>
mutate (category = if_else (good == 1 ,if_else (gender == "M" ,"boys - expected level" ,"girls - expected level" ),
if_else (gender == "M" ,"boys - not meeting expected level" ,"girls - not meeting expected level" ),NA_character_ )) |>
mutate (label = if_else (ncy == "y11" ,category,NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = category,
group = category,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_label_repel (size = 2.8 ,
nudge_y = - 0.01 ,
min.segment.length = Inf ,
alpha = 0.7 ) +
scale_y_continuous (labels = scales:: percent) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure, and gender" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ()) +
scale_colour_brewer (palette = "Set1" )
```
### Ethnicity
This table provides the numbers for the following plot
```{r}
avg_attend_by_good_eth_cat |>
select (ethnicity_category, n, ncy) |>
pivot_wider (c (ethnicity_category, good),
names_from = "ncy" ,
values_from = "n" ) |>
group_by (ethnicity_category) |>
gt (
rowname_col = "good"
)
```
```{r}
#| label: plot pc meeting eyfs good by ethnicity category
avg_attend_by_good_eth_cat |>
select (ethnicity_category, n) |>
group_by (ethnicity_category, good) |>
summarise (n = sum (n)) |>
pivot_wider (values_from = n,
names_from = good) |>
rename (meeting = "1" ,
not_meeting = "0" ) |>
mutate (total = meeting + not_meeting,
pc_good = meeting / (meeting + not_meeting)) |>
ggplot (aes (x = fct_reorder (ethnicity_category, pc_good),
y = pc_good,
label = scales:: percent (pc_good)) ) +
geom_col (fill = "steel blue" ) +
geom_text (hjust = 1 , colour = "white" ) +
coord_flip () +
labs (title = "% achieving EYFS good measure, by ethnicity category" ) +
barplottheme_minimal +
theme (axis.text.x = eb, axis.title = eb)
```
```{r}
#| label: plot attendance by ncy, good and ethnicity category
#| fig-height: 10
ggplot (avg_attend_by_good_eth_cat |>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
#geom_label_repel(size = 2,
# nudge_y = -0.01,
# min.segment.length = Inf,
# alpha = 0.6) +
scale_colour_hue (labels = c ("1" = "meeting expected level" ,"0" = "not meeting expected level" )) +
scale_y_continuous (labels = scales:: percent) +
geom_line (data = avg_attend_by_good,
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good),
linetype = "dashed" , alpha = 0.6 , inherit.aes = FALSE ) +
facet_wrap (vars (ethnicity_category)) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ethnicity category" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.title = eb,
plot.subtitle = element_markdown (),
legend.position = "top" )
```
```{r}
#| label: plot attendance by ncy, good and ethnicity description
#| fig-height: 10
ggplot (avg_attend_by_good_eth_des |>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
#geom_label_repel(size = 2,
# nudge_y = -0.01,
# min.segment.length = Inf,
# alpha = 0.6) +
scale_y_continuous (labels = scales:: percent) +
geom_line (data = avg_attend_by_good,
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good),
linetype = "dashed" , alpha = 0.6 , inherit.aes = FALSE ) +
facet_wrap (vars (ethnicity_description)) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ethnicity description" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ())
```
### SEN level
```{r}
#| label: plot pc meeting eyfs good by sen level
#| fig-height: 3
avg_attend_by_good_sen_level |>
select (sen_level, n) |>
group_by (sen_level, good) |>
summarise (n = sum (n)) |>
pivot_wider (values_from = n,
names_from = good) |>
rename (meeting = "1" ,
not_meeting = "0" ) |>
#arrange(desc(pc_good)) |>
mutate (total = meeting + not_meeting,
pc_good = meeting / (meeting + not_meeting)) |>
ggplot (aes (x = fct_reorder (sen_level, pc_good),
y = pc_good,
label = scales:: percent (pc_good, accuracy = 0.1 )) ) +
geom_col (fill = "steel blue" ) +
geom_text (aes (hjust = ifelse (pc_good > 0.05 ,1.5 ,- 0.5 ), colour = pc_good > 0.05 )) +
scale_color_manual (values = c ("steel blue" , "white" ), guide = "none" ) +
coord_flip () +
labs (title = "% achieving EYFS good measure, by SEN level" ) +
barplottheme_minimal +
theme (axis.text.x = eb, axis.title = eb)
```
Children with an EHCP plan are generally unlikely to meet the expected level at EYFS, and so the numbers do not allow a useful comparison here, and they are removed from this chart.
Children with no special educational needs make up the majority, and so closely follow the overall averages (shown here as dotted lines).
Children requiring SEN support attend worse than those with no SEN *regardless of their EYFS score* - those who do meet the expected standard still attend worse than average throughout their school career.
```{r}
#| label: plot attendance by ncy, good and sen_level
#| fig-height: 4
ggplot (avg_attend_by_good_sen_level |>
filter (sen_level != "EHCP" ) |>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_label_repel (size = 2 ,
nudge_y = - 0.01 ,
min.segment.length = Inf ,
alpha = 0.6 ) +
scale_y_continuous (labels = scales:: percent) +
geom_line (data = avg_attend_by_good,
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good),
linetype = "dashed" , alpha = 0.6 , inherit.aes = FALSE ) +
facet_wrap (vars (sen_level)) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and SEN level" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ())
```
### SEN primary specific need
```{r}
#| label: plot pc meeting eyfs good by primary specific need
#| fig-height: 3
avg_attend_by_good_primary_specific_need |>
select (primary_specific_need, n) |>
group_by (primary_specific_need, good) |>
summarise (n = sum (n)) |>
mutate (n = replace_na (n)) |>
pivot_wider (values_from = n,
names_from = good,
values_fill = 0 ) |>
rename (meeting = "1" ,
not_meeting = "0" ) |>
mutate (total = meeting + not_meeting,
pc_good = meeting / (meeting + not_meeting)) |>
ggplot (aes (x = fct_reorder (primary_specific_need, pc_good),
y = pc_good,
label = scales:: percent (pc_good, accuracy = 0.1 )) ) +
geom_col (fill = "steel blue" ) +
geom_text (aes (hjust = ifelse (pc_good > 0.05 ,1 ,- 0.5 ), colour = pc_good > 0.05 ), size = 2.5 ) +
scale_color_manual (values = c ("steel blue" , "white" ), guide = "none" ) +
coord_flip () +
labs (title = "% achieving EYFS good measure, by primary specific special educational need" ) +
barplottheme_minimal +
theme (axis.text.x = eb, axis.title = eb)
```
```{r}
#| label: plot attendance by ncy, good and sen specific need
#| fig-height: 10
ggplot (avg_attend_by_good_primary_specific_need |>
filter (! primary_specific_need %in% c ("No Specialist Assessment" ,"Profound And Multiple Learning Difficulty" ,
"Severe Learning Difficulty" )) |>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_label_repel (size = 2 ,
nudge_y = - 0.01 ,
min.segment.length = Inf ,
alpha = 0.6 ) +
scale_y_continuous (labels = scales:: percent, limits = c (0.8 ,1 )) +
facet_wrap (vars (primary_specific_need)) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and primary SEN specific need" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ())
```
### Geography and deprivation
As in other areas of analysis within the Inclusion & Attendance project, we'll use the 2019 Indices of Multiple Deprivation (IMD) scores of the child's ward of residence. Those scores are divided into four quartiles, with 1 represents the most affluent 25% of the city, and quartile 4 the most deprived 25%.
There is a clear relationship between deprivation score and EYFS attainment:
```{r}
#| label: plot pc meeting eyfs good by imd quartile
#| fig-height: 3
avg_attend_by_good_imd_quartile |>
mutate (imd_quartile = factor (imd_quartile)) |>
select (imd_quartile, n) |>
group_by (imd_quartile, good) |>
summarise (n = sum (n)) |>
mutate (n = replace_na (n)) |>
pivot_wider (values_from = n,
names_from = good,
values_fill = 0 ) |>
rename (meeting = "1" ,
not_meeting = "0" ) |>
mutate (total = meeting + not_meeting,
pc_good = meeting / (meeting + not_meeting)) |>
ggplot (aes (x = fct_reorder (imd_quartile, pc_good),
y = pc_good,
label = scales:: percent (pc_good, accuracy = 0.1 )) ) +
geom_col (fill = "steel blue" ) +
geom_text (hjust = 1.5 , size = 3 , colour = "white" ) +
scale_color_manual (values = c ("steel blue" , "white" ), guide = "none" ) +
coord_flip () +
labs (title = "% achieving EYFS good measure, by IMD quartile" ) +
barplottheme_minimal +
theme (axis.text.x = eb, axis.title = eb)
```
To look at attendance, here the middle two are removed, so the pairs of lines below represent the most deprived 25% and least deprived 25% of the city, each split by the EYFS good measure.
Two things are worth noting in this chart: 1. The gap between those meeting expected EYFS level and not is greater in poorer areas of the city, and grows faster through secondary school. 2. Children in the poorest areas who do not meet the expected level shows the steepest drop off in attendance between Y6 and Y7, as they transition to secondary school. 2. The blue line (children *not* meeting the expected level in the least deprived wards) is higher than the green line (children who *do* meet the expected level in the most deprived wards). So deprivation makes more of a difference to attendance than the EYFS measure.
```{r}
#| label: plot attendance by ncy, good and imd quartile
#| fig-height: 5
ggplot (avg_attend_by_good_imd_quartile |>
filter (imd_quartile %in% c (1 ,4 )) |>
mutate (category = if_else (good == 1 ,if_else (imd_quartile == 4 ,"most deprived wards - expected level" ,"least deprived wards - expected level" ),
if_else (imd_quartile == 4 ,"most deprived wards - not meeting expected level" ,"least deprived wards - not meeting expected level" ),NA_character_ )) |>
mutate (label = if_else (ncy == "y11" ,category,NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = category,
group = category,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_label_repel (aes (x = "y18" ),
size = 3 ,
#nudge_x = -1,
min.segment.length = Inf ,
alpha = 0.7 ,
hjust = 1 ) +
scale_y_continuous (labels = scales:: percent) +
scale_x_discrete (breaks = c ("y1" ,"y2" ,"y3" ,"y4" ,"y5" ,"y6" ,"y7" ,"y8" ,"y9" ,"y10" ,"y11" )) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure, and IMD quartile" ,
subtitle = "% of available sessions attended, by national curriculum year +- 95 CI, top and bottom IMD quartiles shown only" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ()) +
scale_colour_brewer (palette = "Set1" )
```
Plot by ward
```{r}
#| label: plot attendance by ncy, good and ward
#| fig-height: 10
ggplot (avg_attend_by_good_ward |>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_line (data = avg_attend_by_good,
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good),
linetype = "dashed" , alpha = 0.4 , inherit.aes = FALSE ) +
#geom_label_repel(size = 2,
# nudge_y = -0.01,
# min.segment.length = Inf,
# alpha = 0.6) +
scale_y_continuous (labels = scales:: percent, limits = c (0.8 ,1 )) +
facet_wrap (vars (ward)) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ward of residence" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "none" , plot.subtitle = element_markdown ())
```
## School
```{r}
avg_attend_by_good_school |> group_by (school) |> rename (child_count = n) |> summarise (n = n (), total = sum (child_count)) |> arrange (desc (total)) |> copy_excel () #gt()
```
```{r}
#| label: plot attendance by ncy, good and school
#| fig-height: 10
ggplot (avg_attend_by_good_school |>
filter (school %in% c (
"All Saints" ,
"Birley Academy" ,
"Bradfield" ,
"Chaucer" ,
"Ecclesfield" ,
"Fir Vale" ,
"Firth Park Academy" ,
"Forge Valley" ,
"Handsworth Grange Sports College" ,
"High Storrs" ,
"King Ecgbert" ,
"King Edward VII" ,
"Meadowhead" ,
"Mercia" ,
"Newfield" ,
"Notre Dame High" ,
"Oasis Academy" ,
"Outwood Academy City" ,
"Parkwood E-Act Academy" ,
"Sheffield Park Academy" ,
"Sheffield Springs Academy" ,
"Silverdale" ,
"Stocksbridge High" ,
"Tapton" ,
"Westfield" ,
"Yewlands Academy"
) ,
ncy %in% c ("y7" ,"y8" ,"y9" ,"y10" ,"y11" ))|>
mutate (label = if_else (ncy == "y11" ,
if_else (good == 1 ,"expected level" ,"not meeting expected level" ),NA_character_ )),
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good,
label = label
)) +
geom_point () +
geom_line () +
geom_errorbar (aes (ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2 , alpha = 0.5 ) +
geom_line (data = avg_attend_by_good,
aes (x = ncy,
y = mean_pc_present,
colour = good,
group = good),
linetype = "dashed" , alpha = 0.4 , inherit.aes = FALSE ) +
scale_y_continuous (labels = scales:: percent, limits = c (0.7 ,1 )) +
xlim ("y7" ,"y8" ,"y9" ,"y10" ,"y11" ) +
facet_wrap (vars (school)) +
labs (title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ward of residence" ,
subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI" ,
caption = "data from Capita One" ) +
theme (axis.title = eb, legend.position = "top" , plot.subtitle = element_markdown ())
```
## Annual cohort analysis
```{r}
#| label: plot attendance by deprivation, annual cohort & ncy - primary
##| fig-height: 8
#| warning: false
plot_data <- attend |>
left_join (eyfs |> select (stud_id, good) |> distinct ()) |>
mutate (covid_year_flag = case_when (year < 2020 ~ "pre-COVID" , year == 2020 ~ "lockdown years" , year == 2021 ~ "lockdown years" , year > 2021 ~ "post-pandemic" )) |>
mutate (covid_year_flag = fct_relevel (covid_year_flag, c ("pre-COVID" ,"lockdown years" ,"post-pandemic" ))) |>
filter (cohort >= 2016 , phase == "Primary" ) |>
ungroup () |>
summarise_attendance (grouping_vars = c ("ncy" ,"class_of" ,"year" ,"covid_year_flag" ,"phase" ,"good"
)) |>
filter (ncy <= 11 & ncy >= 1 ,
child_count > 100
) |>
group_by (class_of) |>
filter (! is.na (good))
ggplot (plot_data,
aes (x = ncy,
y = percent_present,
colour = good,
group = good,
label = year,
shape = covid_year_flag)) +
geom_point (size = 3 ) +
scale_shape_manual (values = c ("pre-COVID" = 1 , "lockdown years" = 8 , "post-pandemic" = 4 )) +
geom_line () +
#geom_text(data = plot_data |> filter(imd_quartile == 4),
# aes(label = year), size = 2.5, nudge_y = -0.01, colour = "darkgrey") +
#geom_text(aes(label = scales::percent(percent_present, accuracy = 0.1L)), size = 2.5, nudge_y = 0.01, alpha = 0.7) +
scale_y_continuous (labels = scales:: percent) +
scale_x_continuous (breaks = seq (1 ,6 ))+
facet_wrap (vars (class_of))+
theme (legend.position = "top" ,
axis.title = eb, strip.background = eb) +
labs (title = "Primary school attendance over time by national curriculum year, and deprivation quartile" ,
subtitle = "% of available sessions attended, top & bottom 25% by 2019 Indices of Multiple Deprivation (IMD) score of ward of residence" ,
caption = "data from Capita One" ,
shape = "COVID time period" )
```
```{r}
#| label: plot attendance by deprivation, annual cohort & ncy - secondary
##| fig-height: 8
#| warning: false
plot_data <- attend |>
left_join (eyfs |> select (stud_id, good) |> distinct ()) |>
mutate (covid_year_flag = case_when (year < 2020 ~ "pre-COVID" , year == 2020 ~ "lockdown years" , year == 2021 ~ "lockdown years" , year > 2021 ~ "post-pandemic" )) |>
mutate (covid_year_flag = fct_relevel (covid_year_flag, c ("pre-COVID" ,"lockdown years" ,"post-pandemic" ))) |>
filter (cohort >= 2016 , phase == "Secondary" ) |>
ungroup () |>
summarise_attendance (grouping_vars = c ("ncy" ,"class_of" ,"year" ,"covid_year_flag" ,"phase" ,"good"
)) |>
filter (ncy <= 11 & ncy >= 1 ,
child_count > 100
) |>
group_by (class_of) |>
filter (! is.na (good))
ggplot (plot_data,
aes (x = ncy,
y = percent_present,
colour = good,
group = good,
label = year,
shape = covid_year_flag)) +
geom_point (size = 3 ) +
scale_shape_manual (values = c ("pre-COVID" = 1 , "lockdown years" = 8 , "post-pandemic" = 4 )) +
geom_line () +
#geom_text(data = plot_data |> filter(imd_quartile == 4),
# aes(label = year), size = 2.5, nudge_y = -0.01, colour = "darkgrey") +
#geom_text(aes(label = scales::percent(percent_present, accuracy = 0.1L)), size = 2.5, nudge_y = 0.01, alpha = 0.7) +
geom_text (aes (label = year), size = 2.5 , nudge_y = - 0.01 , colour = "darkgrey" ) +
scale_y_continuous (labels = scales:: percent) +
scale_x_continuous (breaks = seq (7 ,11 ))+
facet_wrap (vars (class_of))+
theme (legend.position = "top" ,
axis.title = eb, strip.background = eb) +
labs (title = "Secondary school attendance over time by national curriculum year, and deprivation quartile" ,
subtitle = "% of available sessions attended, top & bottom 25% by 2019 Indices of Multiple Deprivation (IMD) score of ward of residence" ,
caption = "data from Capita One" ,
shape = "COVID time period" )
```