attendance by early years foundation stage attainment

Author

Giles Robinson

Published

July 9, 2024

1 Introduction

This report details the findings of analysis of how school attendance varies by the ‘school readiness’ scores which are reported by teachers at Early Years Foundation Stage (EYFS), and how that relationship varies with other characteristics. This is a replication of a research study (Wood et al, 2024). The study took data on 62,000 children in Bradford to establish a link between school readiness (as assessed in the EYFS scores) and persistent school absence later in life.

EYFS measures school readiness across a number of domains ….

This article gives an overview of the Bradford study: https://www.nurseryworld.co.uk/news/article/link-between-meeting-early-learning-goals-in-eyfs-and-later-school-absence-research

And the full research paper can be found in the PDF https://royalsocietypublishing.org/doi/pdf/10.1098/rsos.240272

The Bradford study found ….

This document shows the findings of a similar approach using local Sheffield data. We have around 44,000 results for EYFS in Sheffield between ***** and *****

Code
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

# clear the environment
remove(list = ls())

# load packages
library(tidyverse)
library(janitor)
library(lubridate)
library(ggtext)
library(ggrepel)
library(gghighlight)
library(kableExtra)
library(MetBrewer)
library(corrplot) 
library(ggcorrplot)
#library(shadowtext)
library(readxl)
library(ggstatsplot)
library(geosphere)
library(ggridges)
library(forecast)
library(tsibble)
library(gt)

# specify data folder
data_folder <- str_c("S:/Public Health/Policy Performance Communications/Business Intelligence/Projects/EIP/data/inclusion/")

# copy to excel function
copy_excel <- function(input)
  {write.table(input, file = "clipboard-20000", sep = "\t", row.names = F)}

# ggplot themes
eb <- element_blank()

# Set default ggplot theme
theme_set(
  theme_classic() +
    theme(
      #plot.title = element_text(),
      plot.subtitle = element_text(size = 9, face = "italic"),
      plot.caption = element_text(size = 8, face = "italic"),
      plot.title.position = "plot",
      plot.title = element_markdown(size = 12),
      strip.background = eb
    )
)

# theme for minimal bar charts
barplottheme_minimal <- theme(
  axis.title.y = eb,
  axis.line.y = eb,
  axis.ticks.y = eb,
  axis.line.x = eb,
  axis.ticks.x = eb
)

gannt_theme <-  theme_classic() +
  theme(
      plot.title = element_text(size = 12),
      plot.subtitle = element_text(size = 8, face = "italic"),
      plot.caption = element_text(size = 8, face = "italic"),
      plot.title.position = "plot",
      axis.title = eb, 
      axis.line.y = eb,
      axis.ticks.y = eb,
      axis.text.y = eb, 
      legend.position = "right",
      legend.title = eb,
      legend.text = element_text(size = 8)
      )

# Connect to OSCAR database via ODBC
oscar_con <- DBI::dbConnect(
  odbc::odbc(),
  Driver = "Oracle in OraClient12Home1",
  Dbq = "SCPRFLVE",
  UID = if (Sys.getenv("oscar_userid") == "") {
    rstudioapi::askForPassword("OSCAR User ID")
  } else {
    Sys.getenv("oscar_userid")
  },
  PWD = if (Sys.getenv("oscar_pwd") == "") {
    rstudioapi::askForPassword("OSCAR Password")
  } else {
    Sys.getenv("oscar_pwd")
  },
  timeout = 10
)

# summarising attendance function
# this is copied from the attendance & exclusion data model.
# any changes made there should be reflected here & vice versa
# note that the groupings appear TWICE in this function, once for grouped data and once for the "no grouping" scenario (grouping_vars = "none"). Any changes must be consistent across both.
summarise_attendance <- function(input_data, grouping_vars) {
  ifelse (grouping_vars == "none", {
    # Aggregate without grouping
    result <- input_data |> 
      mutate(zero_attendance = if_else(present == 0, 1, 0)) |> 
      summarise(child_count = n_distinct(stud_id, na.rm = TRUE),
                row_count = n(),
                possible_sessions = sum(possible_sessions, na.rm = TRUE),
                present = sum(present, na.rm = TRUE),
                authorised = sum(authorised, na.rm = TRUE),
                unauthorised = sum(unauthorised, na.rm = TRUE),
                missing = sum(missing, na.rm = TRUE),
                excluded = sum(excluded, na.rm = TRUE),
                family_holiday_agreed = sum(family_holiday_agreed, na.rm = TRUE),
                family_holiday_not_agreed = sum(family_holiday_not_agreed, na.rm = TRUE),
                family_holiday_total = sum(family_holiday_total, na.rm = TRUE),
                illness = sum(illness, na.rm = TRUE),
                med_appt = sum(med_appt, na.rm = TRUE),
                no_reason = sum(no_reason, na.rm = TRUE),
                late_absent = sum(late_absent, na.rm = TRUE),
                late_pres = sum(late_pres, na.rm = TRUE),
                late_total = sum(late_absent, na.rm = TRUE) + sum(late_pres, na.rm = TRUE),
                study_leave = sum(study_leave, na.rm = TRUE),
                approved_offsite = sum(approved_offsite, na.rm = TRUE),
                fixed_exclusions = sum(fixed_exclusions, na.rm = TRUE),
                perm_exclusions = sum(perm_exclusions, na.rm = TRUE),
                total_exclusions = sum(total_exclusions, na.rm = TRUE),
                persistent_absent_count = sum(persistent_absence, na.rm = TRUE),
                severe_absent_count = sum(severe_absence, na.rm = TRUE),
                zero_attendance_count = sum(zero_attendance, na.rm = TRUE)
      ) |> 
      mutate(percent_present = present / possible_sessions,
             percent_auth_absence = authorised / possible_sessions,
             percent_unauth_absence = unauthorised / possible_sessions,
             percent_missing = missing / possible_sessions,
             percent_family_holiday_agreed = family_holiday_agreed / possible_sessions,
             percent_family_holiday_not_agreed = family_holiday_not_agreed / possible_sessions,
             percent_family_holiday = family_holiday_total / possible_sessions,
             percent_excluded = excluded / possible_sessions,
             percent_illness = illness / possible_sessions,
             percent_med_appt = med_appt / possible_sessions,
             percent_no_reason = no_reason / possible_sessions,
             percent_late_absent = late_absent / possible_sessions,
             percent_late_pres = late_pres / possible_sessions,
             percent_late_total = late_total / possible_sessions,
             percent_study_leave = study_leave / possible_sessions,
             percent_approved_offsite = approved_offsite / possible_sessions,
             pc_of_pupils_persistent_absent = persistent_absent_count / row_count,
             pc_of_pupils_severely_absent = severe_absent_count / row_count,
             pc_of_pupils_zero_attendance = zero_attendance_count / row_count
      ) |> 
      mutate(percent_absent = 1 - percent_present)
  },
  {
    # Group by specified variables and then summarize
    result <- input_data |> 
      mutate(zero_attendance = if_else(present == 0, 1, 0)) |> 
      group_by(across(all_of(grouping_vars))) |> 
      summarise(child_count = n_distinct(stud_id, na.rm = TRUE),
                row_count = n(),
                possible_sessions = sum(possible_sessions, na.rm = TRUE),
                present = sum(present, na.rm = TRUE),
                authorised = sum(authorised, na.rm = TRUE),
                unauthorised = sum(unauthorised, na.rm = TRUE),
                missing = sum(missing, na.rm = TRUE),
                excluded = sum(excluded, na.rm = TRUE),
                family_holiday_agreed = sum(family_holiday_agreed, na.rm = TRUE),
                family_holiday_not_agreed = sum(family_holiday_not_agreed, na.rm = TRUE),
                family_holiday_total = sum(family_holiday_total, na.rm = TRUE),
                illness = sum(illness, na.rm = TRUE),
                med_appt = sum(med_appt, na.rm = TRUE),
                no_reason = sum(no_reason, na.rm = TRUE),
                late_absent = sum(late_absent, na.rm = TRUE),
                late_pres = sum(late_pres, na.rm = TRUE),
                late_total = sum(late_absent, na.rm = TRUE) + sum(late_pres, na.rm = TRUE),
                study_leave = sum(study_leave, na.rm = TRUE),
                approved_offsite = sum(approved_offsite, na.rm = TRUE),
                fixed_exclusions = sum(fixed_exclusions, na.rm = TRUE),
                perm_exclusions = sum(perm_exclusions, na.rm = TRUE),
                total_exclusions = sum(total_exclusions, na.rm = TRUE),
                persistent_absent_count = sum(persistent_absence, na.rm = TRUE),
                severe_absent_count = sum(severe_absence, na.rm = TRUE),
                zero_attendance_count = sum(zero_attendance, na.rm = TRUE)
      ) |> 
      mutate(percent_of_pupils = child_count / sum(child_count, na.rm = TRUE),
             percent_present = present / possible_sessions,
             percent_auth_absence = authorised / possible_sessions,
             percent_unauth_absence = unauthorised / possible_sessions,
             percent_missing = missing / possible_sessions,
             percent_family_holiday_agreed = family_holiday_agreed / possible_sessions,
             percent_family_holiday_not_agreed = family_holiday_not_agreed / possible_sessions,
             percent_family_holiday = family_holiday_total / possible_sessions,
             percent_excluded = excluded / possible_sessions,
             percent_illness = illness / possible_sessions,
             percent_med_appt = med_appt / possible_sessions,
             percent_no_reason = no_reason / possible_sessions,
             percent_late_absent = late_absent / possible_sessions,
             percent_late_pres = late_pres / possible_sessions,
             percent_late_total = late_total / possible_sessions,
             percent_study_leave = study_leave / possible_sessions,
             percent_approved_offsite = approved_offsite / possible_sessions,
             pc_of_pupils_persistent_absent = persistent_absent_count / row_count,
             pc_of_pupils_severely_absent = severe_absent_count / row_count,
             pc_of_pupils_zero_attendance = zero_attendance_count / row_count
      )|> 
      mutate(percent_absent = 1 - percent_present)
  }
  )
  
  return(result)
}

summarise_avg <- function(input_data)
{ summarise (input_data, 
             mean.percent_present = mean(percent_present, na.rm = TRUE),
             sd.percent_present = sd(percent_present, na.rm = TRUE),
             n.percent_present = n() )  |> 
  mutate(se.percent_present = sd.percent_present / sqrt(n.percent_present),
  lower.ci.percent_present = mean.percent_present - qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present,
  upper.ci.percent_present = mean.percent_present + qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present
  ) }

percent_calc <- function(input_data)
{input_data |> tally() |> mutate(freq = n / sum(n)) |>
  mutate(
    l_ci = freq - (1.96 * sqrt((freq * (1 - freq)) / n)),
    u_ci = freq + (1.96 * sqrt((freq * (1 - freq)) / n))
  )}

presence_mean_calc <- function(input_data)
{input_data |> 
    summarise(mean.percent_present = mean(percent_present, na.rm = TRUE),
             sd.percent_present = sd(percent_present, na.rm = TRUE),
             n.percent_present = n() )  |> 
  mutate(se.percent_present = sd.percent_present / sqrt(n.percent_present),
  lower.ci.percent_present = mean.percent_present - qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present,
  upper.ci.percent_present = mean.percent_present + qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present)
}

#presence_mean_calc_eyfs <- function(input_data = attend_eyfs, grouping_vars)
#{input_data |> ifelse (grouping_vars == "none", {
    # Aggregate without grouping
#    summarise(presence_y1 = mean(y1), na.rm = TRUE)
#    )
              #,
             #sd.percent_present = sd(percent_present, na.rm = TRUE),
             #n.percent_present = n() )  |> 
  #mutate(se.percent_present = sd.percent_present / sqrt(n.percent_present),
  #lower.ci.percent_present = mean.percent_present - qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present,
  #upper.ci.percent_present = mean.percent_present + qt(1 - (0.05 / 2), n.percent_present - 1) * se.percent_present)
#}

2 Data processing

The data is loaded from the attendance data model, which contains pre-aggregated & cleansed attendance data, as well as other descriptive characteristics. EYFS scores are loaded from the SCC OSCAR database, and the attendance data is joined on for each national curriculum year (NCY). Finally the data is aggregated, with COVID years filtered out, to calculate average attendance rates for each NCY, first by the EYFS ‘good’ indicator, and then by other characteristics of interest.

Code
# get the attendance data model
load(str_c(data_folder,"attendance_inclusion_data_model.RData"))


# MOVE THIS TO DATA MODEL ON NEXT FULL REFRESH

# get annual cohort
# cohort <- attend |>
#   group_by(stud_id, dob) |> 
#   summarise(first_year = min(year),
#             first_ncy = min(ncy)) |> 
#   select(stud_id, dob, first_year, first_ncy) |> 
#   ungroup() |> 
#   mutate(cohort = first_year - first_ncy + 1,
#          class_of = str_c("class of ",(first_year - first_ncy + 11))) |> 
#   select(-first_year, -first_ncy, -dob)
# 
# attend <- attend |> 
#   left_join(cohort, by = "stud_id")

# create a simple version of attend_stud_year for joining
asyj <- attend_stud_year |> 
  select(stud_id, 
         year, 
         percent_present)
Code
attend_eyfs <- eyfs |> 
  left_join(asyj |> rename(y1 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y1_year" == "year")) |> 
  left_join(asyj |> rename(y2 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y2_year" == "year")) |> 
  left_join(asyj |> rename(y3 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y3_year" == "year")) |> 
  left_join(asyj |> rename(y4 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y4_year" == "year")) |> 
  left_join(asyj |> rename(y5 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y5_year" == "year")) |> 
  left_join(asyj |> rename(y6 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y6_year" == "year")) |> 
  left_join(asyj |> rename(y7 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y7_year" == "year")) |> 
  left_join(asyj |> rename(y8 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y8_year" == "year")) |> 
  left_join(asyj |> rename(y9 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y9_year" == "year")) |> 
  left_join(asyj |> rename(y10 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y10_year" == "year")) |> 
  left_join(asyj |> rename(y11 = percent_present),
            join_by("stud_id" == "stud_id",
                    "y11_year" == "year")) |> 
  left_join(stud_details_joined) |> # join on the student details
  mutate(sen_level = replace_na(sen_level,"No SEN")) |> 
  mutate(primary_specific_need = ifelse(primary_specific_need == "Behaviour, Emotional And Social Difficulty", 
                                        "Social, Emotional and Mental Health",
                                        primary_specific_need))

# get school
sch_data <- attend_stud_year_school |> 
              ungroup() |> 
              select(stud_id, year, base_id) |> 
              group_by(stud_id, year) |> 
              slice_head(n = 1) |> 
  left_join(school_details_short) |> 
  select(stud_id, year, school = school_short_name)

attend_eyfs <- attend_eyfs |> 
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y1_year" == "year")) |> 
  rename(y1_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y2_year" == "year")) |> 
  rename(y2_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y3_year" == "year")) |> 
  rename(y3_school = school) |> 
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y4_year" == "year")) |> 
  rename(y4_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y5_year" == "year")) |> 
  rename(y5_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y6_year" == "year")) |> 
  rename(y6_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y7_year" == "year")) |> 
  rename(y7_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y8_year" == "year")) |> 
  rename(y8_school = school) |> 
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y9_year" == "year")) |> 
  rename(y9_school = school) |>
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y10_year" == "year")) |> 
  rename(y10_school = school) |> 
  left_join(sch_data,
            join_by("stud_id" == "stud_id",
                    "y11_year" == "year")) |> 
  rename(y11_school = school)
Code
# we have to do this one NCY at a time, because the covid years need filtering out

# average attendance function - we'll apply this to each year & then stack the results
avg_attend_by_good_fun <- function(input_data = attend_eyfs, ncy, ncy_year, grouping_vars = "none")
  {
  ifelse (grouping_vars == "none", 
          {
    # Aggregate without grouping
  result <- input_data |> 
  filter(!get(ncy_year) %in% c(2020,2021),
         !is.na(get(ncy))) |> 
  group_by(good) |> 
  summarise (mean_pc_present = mean(get(ncy), na.rm = TRUE),
             sd = sd(get(ncy), na.rm = TRUE),
             n = n() )  |> 
  mutate(se = sd / sqrt(n),
  lower_ci_pc_present = mean_pc_present - qt(1 - (0.05 / 2), n - 1) * se,
  upper_ci_pc_present = mean_pc_present + qt(1 - (0.05 / 2), n - 1) * se
  ) |> 
  select(-se, -sd) |> 
  mutate(ncy = ncy)
  }
  ,
  # aggregate with grouping
  {
  result <- input_data |> 
  filter(!get(ncy_year) %in% c(2020,2021),
         !is.na(get(ncy))) |> 
  group_by(good,across(all_of(grouping_vars))) |> 
  summarise (mean_pc_present = mean(get(ncy), na.rm = TRUE),
             sd = sd(get(ncy), na.rm = TRUE),
             n = n() )  |> 
  mutate(se = sd / sqrt(n),
  lower_ci_pc_present = mean_pc_present - qt(1 - (0.05 / 2), n - 1) * se,
  upper_ci_pc_present = mean_pc_present + qt(1 - (0.05 / 2), n - 1) * se
  ) |> 
  select(-se, -sd) |> 
  mutate(ncy = ncy)
  }
  )
  return(result)
  }

# first for all children
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year")

# combine
avg_attend_by_good <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")))

# by gender
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "gender")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "gender")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "gender")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "gender")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "gender")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "gender")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "gender")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "gender")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "gender")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "gender")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "gender")

# combine
avg_attend_by_good_gender <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")))

# by ethnicity category
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "ethnicity_category")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "ethnicity_category")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "ethnicity_category")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "ethnicity_category")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "ethnicity_category")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "ethnicity_category")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "ethnicity_category")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "ethnicity_category")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "ethnicity_category")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "ethnicity_category")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "ethnicity_category")

# combine
avg_attend_by_good_eth_cat <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11"))) |> 
  filter(!is.na(ethnicity_category),
         !ethnicity_category %in% c("Refused ","Information Not Yet Obtained"))

# by ethnicity description
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "ethnicity_description")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "ethnicity_description")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "ethnicity_description")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "ethnicity_description")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "ethnicity_description")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "ethnicity_description")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "ethnicity_description")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "ethnicity_description")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "ethnicity_description")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "ethnicity_description")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "ethnicity_description")

# combine
avg_attend_by_good_eth_des <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11"))) |> 
  filter(!is.na(ethnicity_description),
         !ethnicity_description %in% c("Irish","Not Known","not known"))

# by deprivation quartile
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "imd_quartile")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "imd_quartile")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "imd_quartile")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "imd_quartile")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "imd_quartile")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "imd_quartile")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "imd_quartile")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "imd_quartile")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "imd_quartile")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "imd_quartile")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "imd_quartile")

# combine
avg_attend_by_good_imd_quartile <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11"))) |> 
  filter(!is.na(imd_quartile))

# by sen_level
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "sen_level")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "sen_level")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "sen_level")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "sen_level")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "sen_level")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "sen_level")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "sen_level")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "sen_level")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "sen_level")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "sen_level")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "sen_level")

# combine
avg_attend_by_good_sen_level <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11"))) |> 
  filter(!is.na(sen_level))

# by sen primary specific need
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "primary_specific_need")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "primary_specific_need")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "primary_specific_need")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "primary_specific_need")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "primary_specific_need")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "primary_specific_need")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "primary_specific_need")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "primary_specific_need")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "primary_specific_need")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "primary_specific_need")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "primary_specific_need")

# combine
avg_attend_by_good_primary_specific_need <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11"))) |> 
  filter(!is.na(primary_specific_need))#,
         #!primary_specific_need %in% c("Irish","Not Known","not known"))

# by ward
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "ward")
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "ward")
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "ward")
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "ward")
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "ward")
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "ward")
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "ward")
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "ward")
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "ward")
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "ward")
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "ward")

# combine
avg_attend_by_good_ward <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11"))) |> 
  filter(!is.na(ward))#,
         #!ward %in% c("Irish","Not Known","not known"))

# primary schools
# create averages for each ncy
y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "y1_school") |> rename(school = y1_school)
y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "y2_school") |> rename(school = y2_school)
y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "y3_school") |> rename(school = y3_school)
y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "y4_school") |> rename(school = y4_school)
y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "y5_school") |> rename(school = y5_school)
y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "y6_school") |> rename(school = y6_school)
y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "y7_school") |> rename(school = y7_school)
y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "y8_school") |> rename(school = y8_school)
y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "y9_school") |> rename(school = y9_school)
y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "y10_school") |> rename(school = y10_school)
y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "y11_school") |> rename(school = y11_school)

# combine
avg_attend_by_good_school <- 
  rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
  mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")))

# tidy up
remove(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg)


# annual cohorts TO DO
# create averages for each ncy
# y1_avg <- avg_attend_by_good_fun(ncy = "y1",ncy_year = "y1_year", grouping_vars = "y1_school") |> rename(school = y1_school)
# y2_avg <- avg_attend_by_good_fun(ncy = "y2",ncy_year = "y2_year", grouping_vars = "y2_school") |> rename(school = y2_school)
# y3_avg <- avg_attend_by_good_fun(ncy = "y3",ncy_year = "y3_year", grouping_vars = "y3_school") |> rename(school = y3_school)
# y4_avg <- avg_attend_by_good_fun(ncy = "y4",ncy_year = "y4_year", grouping_vars = "y4_school") |> rename(school = y4_school)
# y5_avg <- avg_attend_by_good_fun(ncy = "y5",ncy_year = "y5_year", grouping_vars = "y5_school") |> rename(school = y5_school)
# y6_avg <- avg_attend_by_good_fun(ncy = "y6",ncy_year = "y6_year", grouping_vars = "y6_school") |> rename(school = y6_school)
# y7_avg <- avg_attend_by_good_fun(ncy = "y7",ncy_year = "y7_year", grouping_vars = "y7_school") |> rename(school = y7_school)
# y8_avg <- avg_attend_by_good_fun(ncy = "y8",ncy_year = "y8_year", grouping_vars = "y8_school") |> rename(school = y8_school)
# y9_avg <- avg_attend_by_good_fun(ncy = "y9",ncy_year = "y9_year", grouping_vars = "y9_school") |> rename(school = y9_school)
# y10_avg <- avg_attend_by_good_fun(ncy = "y10",ncy_year = "y10_year", grouping_vars = "y10_school") |> rename(school = y10_school)
# y11_avg <- avg_attend_by_good_fun(ncy = "y11",ncy_year = "y11_year", grouping_vars = "y11_school") |> rename(school = y11_school)
# 
# # combine
# avg_attend_by_good_school <- 
#   rbind(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg) |> 
#   mutate(ncy = factor(ncy, levels = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")))
# 
# # tidy up
# remove(y1_avg,y2_avg,y3_avg,y4_avg,y5_avg,y6_avg,y7_avg,y8_avg,y9_avg,y10_avg,y11_avg)

2.1 Analysis

Attainment over time

Code
eyfs_year <- eyfs |> 
  group_by(year, good) |> 
  summarise(child_count = n_distinct(stud_id)) |> 
  mutate(percent = child_count / sum(child_count))
 
ggplot(eyfs_year,
       aes(x = year, 
           y = child_count, 
           fill = good,
           label = str_c(child_count,"\n  (",scales::percent(percent, accuracy = 0.1),")"))
       )+
  geom_col(position ="stack") +
  geom_text(data = eyfs_year |> filter(good == 1), aes(y = 2000), colour = "white", fontface = "bold", size = 3.5) +
  geom_text(data = eyfs_year |> filter(good == 0), aes(y = 5500), colour = "white", fontface = "bold", size = 3.5) +
  labs(title = "Sheffield children <b><span style='color:#208cc0'>meeting </span></b>and <b><span style='color:#a82203'>not meeting </span></b>expected standards at early years foundation stage") +
  scale_x_continuous(breaks = seq(2013,2019,by = 1)) +
  #scale_x_reverse() +
  #coord_flip() +
  theme(legend.position = "none", 
        axis.title = eb, 
        axis.line = eb, 
        axis.text.y = eb, 
        axis.ticks = eb) +
  scale_fill_met_d("Juarez")

Plotting the overall attendance rates for each year, by the EYFS good score shows a clear gap in attendance between children meeting expected level and those not. This gap is present in all years and grows during secondary school.

Code
ggplot(avg_attend_by_good |> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  geom_label_repel(size = 2.8,
                   nudge_y = -0.01,
                   min.segment.length = Inf) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown())

Next we’ll explore how this difference varies by different characteristics, in each case recreating the plot above.

2.1.1 Gender

Code
ggplot(avg_attend_by_good_gender |> 
         mutate(category = if_else(good == 1,if_else(gender == "M","boys - expected level","girls - expected level"),
                                              if_else(gender == "M","boys - not meeting expected level","girls - not meeting expected level"),NA_character_)) |> 
         mutate(label = if_else(ncy == "y11",category,NA_character_)),
         aes(x = ncy,
           y = mean_pc_present,
           colour = category,
           group = category,
           label = label
           )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  geom_label_repel(size = 2.8,
                   nudge_y = -0.01,
                   min.segment.length = Inf,
                   alpha = 0.7) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure, and gender",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown()) +
  scale_colour_brewer(palette = "Set1")

2.1.2 Ethnicity

This table provides the numbers for the following plot

Code
avg_attend_by_good_eth_cat |> 
  select(ethnicity_category, n, ncy) |> 
  pivot_wider(c(ethnicity_category, good),
              names_from = "ncy",
              values_from = "n") |> 
  group_by(ethnicity_category) |> 
  gt(
    rowname_col = "good"
  ) 
y1 y2 y3 y4 y5 y6 y7 y8 y9 y10 y11
Any Other Ethnic Group
0 557 437 423 427 411 382 287 293 308 244 162
1 672 509 497 521 544 541 408 361 326 238 152
Asian or Asian British
0 1768 1475 1399 1356 1337 1250 865 921 1013 799 580
1 2666 2103 2061 2014 2031 2064 1514 1453 1338 951 598
Black or Black British
0 785 661 647 590 565 558 420 435 468 347 240
1 1305 1041 1011 953 963 1023 822 761 655 436 254
Chinese
0 88 72 63 52 45 40 34 41 40 34 19
1 175 132 131 128 121 118 78 74 67 51 31
Mixed / Dual Background
0 1141 948 946 909 889 865 624 645 722 553 395
1 1995 1587 1514 1513 1555 1578 1284 1220 1048 750 443
White
0 8825 7407 7144 6990 6742 6268 4534 4840 5342 4208 2986
1 16951 13724 13321 13172 13280 13472 10772 10322 9530 6809 4294
not known
0 308 257 247 249 223 182 114 142 212 182 144
1 465 338 324 366 332 314 250 214 262 207 141
Code
avg_attend_by_good_eth_cat |> 
  select(ethnicity_category, n) |> 
  group_by(ethnicity_category, good) |> 
  summarise(n = sum(n)) |> 
  pivot_wider(values_from = n,
              names_from = good) |>
  rename(meeting = "1",
         not_meeting = "0") |> 
  mutate(total = meeting + not_meeting,
         pc_good = meeting / (meeting + not_meeting)) |> 
  ggplot(aes(x = fct_reorder(ethnicity_category, pc_good),
             y = pc_good,
             label = scales::percent(pc_good)) ) +
  geom_col(fill = "steel blue") +
  geom_text(hjust = 1, colour = "white") +
  coord_flip() +
  labs(title = "% achieving EYFS good measure, by ethnicity category") +
  barplottheme_minimal +
  theme(axis.text.x = eb, axis.title = eb)

Code
ggplot(avg_attend_by_good_eth_cat |> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  #geom_label_repel(size = 2,
  #                 nudge_y = -0.01,
  #                 min.segment.length = Inf,
  #                 alpha = 0.6) +
  scale_colour_hue(labels = c("1" = "meeting expected level","0" = "not meeting expected level")) +
  scale_y_continuous(labels = scales::percent) +
  geom_line(data = avg_attend_by_good,
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good), 
       linetype = "dashed", alpha = 0.6, inherit.aes = FALSE) +
  facet_wrap(vars(ethnicity_category)) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ethnicity category",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.title = eb, 
        plot.subtitle = element_markdown(),
        legend.position = "top")

Code
ggplot(avg_attend_by_good_eth_des |> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  #geom_label_repel(size = 2,
  #                 nudge_y = -0.01,
  #                 min.segment.length = Inf,
   #                alpha = 0.6) +
  scale_y_continuous(labels = scales::percent) +
   geom_line(data = avg_attend_by_good,
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good), 
       linetype = "dashed", alpha = 0.6, inherit.aes = FALSE) +
  facet_wrap(vars(ethnicity_description)) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ethnicity description",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown())

2.1.3 SEN level

Code
avg_attend_by_good_sen_level |> 
  select(sen_level, n) |> 
  group_by(sen_level, good) |> 
  summarise(n = sum(n)) |>
  pivot_wider(values_from = n,
              names_from = good) |>
  rename(meeting = "1",
         not_meeting = "0") |> 
  #arrange(desc(pc_good)) |> 
  mutate(total = meeting + not_meeting,
         pc_good = meeting / (meeting + not_meeting)) |> 
  ggplot(aes(x = fct_reorder(sen_level, pc_good),
             y = pc_good,
             label = scales::percent(pc_good, accuracy = 0.1)) ) +
  geom_col(fill = "steel blue") +
  geom_text(aes(hjust = ifelse(pc_good > 0.05,1.5,-0.5), colour = pc_good > 0.05)) +
  scale_color_manual(values = c("steel blue", "white"), guide = "none") +
  coord_flip() +
  labs(title = "% achieving EYFS good measure, by SEN level") +
  barplottheme_minimal +
  theme(axis.text.x = eb, axis.title = eb)

Children with an EHCP plan are generally unlikely to meet the expected level at EYFS, and so the numbers do not allow a useful comparison here, and they are removed from this chart.

Children with no special educational needs make up the majority, and so closely follow the overall averages (shown here as dotted lines).

Children requiring SEN support attend worse than those with no SEN regardless of their EYFS score - those who do meet the expected standard still attend worse than average throughout their school career.

Code
ggplot(avg_attend_by_good_sen_level |> 
         filter(sen_level != "EHCP") |> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  geom_label_repel(size = 2,
                   nudge_y = -0.01,
                   min.segment.length = Inf,
                   alpha = 0.6) +
  scale_y_continuous(labels = scales::percent) +
   geom_line(data = avg_attend_by_good,
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good), 
       linetype = "dashed", alpha = 0.6, inherit.aes = FALSE) +
  facet_wrap(vars(sen_level)) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and SEN level",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown())

2.1.4 SEN primary specific need

Code
avg_attend_by_good_primary_specific_need |> 
  select(primary_specific_need, n) |> 
  group_by(primary_specific_need, good) |> 
  summarise(n = sum(n)) |>
  mutate(n = replace_na(n)) |> 
  pivot_wider(values_from = n,
              names_from = good,
              values_fill = 0) |>
  rename(meeting = "1",
         not_meeting = "0") |> 
  mutate(total = meeting + not_meeting,
         pc_good = meeting / (meeting + not_meeting)) |> 
  ggplot(aes(x = fct_reorder(primary_specific_need, pc_good),
             y = pc_good,
             label = scales::percent(pc_good, accuracy = 0.1)) ) +
  geom_col(fill = "steel blue") +
  geom_text(aes(hjust = ifelse(pc_good > 0.05,1,-0.5), colour = pc_good > 0.05), size = 2.5) +
  scale_color_manual(values = c("steel blue", "white"), guide = "none") +
  coord_flip() +
  labs(title = "% achieving EYFS good measure, by primary specific special educational need") +
  barplottheme_minimal +
  theme(axis.text.x = eb, axis.title = eb)

Code
ggplot(avg_attend_by_good_primary_specific_need |> 
         filter(!primary_specific_need %in% c("No Specialist Assessment","Profound And Multiple Learning Difficulty",
                                              "Severe Learning Difficulty")) |> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  geom_label_repel(size = 2,
                   nudge_y = -0.01,
                   min.segment.length = Inf,
                   alpha = 0.6) +
  scale_y_continuous(labels = scales::percent, limits = c(0.8,1)) +
  facet_wrap(vars(primary_specific_need)) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and primary SEN specific need",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown())

2.1.5 Geography and deprivation

As in other areas of analysis within the Inclusion & Attendance project, we’ll use the 2019 Indices of Multiple Deprivation (IMD) scores of the child’s ward of residence. Those scores are divided into four quartiles, with 1 represents the most affluent 25% of the city, and quartile 4 the most deprived 25%.

There is a clear relationship between deprivation score and EYFS attainment:

Code
avg_attend_by_good_imd_quartile |> 
  mutate(imd_quartile = factor(imd_quartile)) |> 
  select(imd_quartile, n) |> 
  group_by(imd_quartile, good) |> 
  summarise(n = sum(n)) |>
  mutate(n = replace_na(n)) |> 
  pivot_wider(values_from = n,
              names_from = good,
              values_fill = 0) |>
  rename(meeting = "1",
         not_meeting = "0") |> 
  mutate(total = meeting + not_meeting,
         pc_good = meeting / (meeting + not_meeting)) |> 
  ggplot(aes(x = fct_reorder(imd_quartile, pc_good),
             y = pc_good,
             label = scales::percent(pc_good, accuracy = 0.1)) ) +
  geom_col(fill = "steel blue") +
  geom_text(hjust = 1.5, size = 3, colour = "white") +
  scale_color_manual(values = c("steel blue", "white"), guide = "none") +
  coord_flip() +
  labs(title = "% achieving EYFS good measure, by IMD quartile") +
  barplottheme_minimal +
  theme(axis.text.x = eb, axis.title = eb)

To look at attendance, here the middle two are removed, so the pairs of lines below represent the most deprived 25% and least deprived 25% of the city, each split by the EYFS good measure.

Two things are worth noting in this chart: 1. The gap between those meeting expected EYFS level and not is greater in poorer areas of the city, and grows faster through secondary school. 2. Children in the poorest areas who do not meet the expected level shows the steepest drop off in attendance between Y6 and Y7, as they transition to secondary school. 2. The blue line (children not meeting the expected level in the least deprived wards) is higher than the green line (children who do meet the expected level in the most deprived wards). So deprivation makes more of a difference to attendance than the EYFS measure.

Code
ggplot(avg_attend_by_good_imd_quartile |> 
         filter(imd_quartile %in% c(1,4)) |> 
         mutate(category = if_else(good == 1,if_else(imd_quartile == 4,"most deprived wards - expected level","least deprived wards - expected level"),
                                              if_else(imd_quartile == 4,"most deprived wards - not meeting expected level","least deprived wards - not meeting expected level"),NA_character_)) |> 
         mutate(label = if_else(ncy == "y11",category,NA_character_)),
         aes(x = ncy,
           y = mean_pc_present,
           colour = category,
           group = category,
           label = label
           )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
  geom_label_repel(aes(x = "y18"),
                       size = 3,
                   #nudge_x = -1,
                   min.segment.length = Inf,
                   alpha = 0.7,
                   hjust = 1) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(breaks = c("y1","y2","y3","y4","y5","y6","y7","y8","y9","y10","y11")) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure, and IMD quartile",
       subtitle = "% of available sessions attended, by national curriculum year +- 95 CI, top and bottom IMD quartiles shown only",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown()) +
  scale_colour_brewer(palette = "Set1")

Plot by ward

Code
ggplot(avg_attend_by_good_ward |> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
   geom_line(data = avg_attend_by_good,
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good), 
       linetype = "dashed", alpha = 0.4, inherit.aes = FALSE) +
  #geom_label_repel(size = 2,
  #                 nudge_y = -0.01,
  #                 min.segment.length = Inf,
  #                 alpha = 0.6) +
  scale_y_continuous(labels = scales::percent, limits = c(0.8,1)) +
  facet_wrap(vars(ward)) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ward of residence",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "none", plot.subtitle = element_markdown())

2.2 School

Code
avg_attend_by_good_school |> group_by(school) |> rename(child_count = n) |> summarise(n = n(), total = sum(child_count)) |> arrange(desc(total)) |> copy_excel() #gt()
Code
ggplot(avg_attend_by_good_school |> 
         filter(school %in% c(
           "All Saints",
"Birley Academy",
"Bradfield",
"Chaucer",
"Ecclesfield",
"Fir Vale",
"Firth Park Academy",
"Forge Valley",
"Handsworth Grange Sports College",
"High Storrs",
"King Ecgbert",
"King Edward VII",
"Meadowhead",
"Mercia",
"Newfield",
"Notre Dame High",
"Oasis Academy",
"Outwood Academy City",
"Parkwood E-Act Academy",
"Sheffield Park Academy",
"Sheffield Springs Academy",
"Silverdale",
"Stocksbridge High",
"Tapton",
"Westfield",
"Yewlands Academy"
         ) ,
ncy %in% c("y7","y8","y9","y10","y11"))|> 
         mutate(label = if_else(ncy == "y11",
                                if_else(good == 1,"expected level","not meeting expected level"),NA_character_)),
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good,
           label = label
       )) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax = upper_ci_pc_present,ymin = lower_ci_pc_present), width = 0.2, alpha = 0.5) +
   geom_line(data = avg_attend_by_good,
       aes(x = ncy,
           y = mean_pc_present,
           colour = good,
           group = good), 
       linetype = "dashed", alpha = 0.4, inherit.aes = FALSE) +
  scale_y_continuous(labels = scales::percent, limits = c(0.7,1)) +
  xlim("y7","y8","y9","y10","y11") +
  facet_wrap(vars(school)) +
  labs(title = "Attendance in Sheffield Schools by EYFS <i>good</span></i> measure and ward of residence",
       subtitle = "Early Years Foundation Stage measure of school readiness; mean % of available sessions attended, by national curriculum year +- 95 CI",
       caption = "data from Capita One") +
  theme(axis.title = eb, legend.position = "top", plot.subtitle = element_markdown())

2.3 Annual cohort analysis

Code
##| fig-height: 8
#| warning: false
plot_data <- attend |> 
  left_join(eyfs |> select(stud_id, good) |> distinct()) |> 
  mutate(covid_year_flag = case_when(year < 2020 ~ "pre-COVID", year == 2020 ~ "lockdown years", year == 2021 ~ "lockdown years", year > 2021 ~ "post-pandemic")) |> 
  mutate(covid_year_flag = fct_relevel(covid_year_flag, c("pre-COVID","lockdown years","post-pandemic"))) |> 
  filter(cohort >= 2016, phase == "Primary") |> 
  ungroup() |> 
  summarise_attendance(grouping_vars = c("ncy","class_of","year","covid_year_flag","phase","good"
                                         )) |> 
  filter(ncy <= 11 & ncy >= 1,
         child_count > 100
         ) |> 
  group_by(class_of) |> 
  filter(!is.na(good))

ggplot(plot_data,
         aes(x = ncy,
             y = percent_present,
             colour = good,
             group = good,
             label = year,
             shape = covid_year_flag)) +
  geom_point(size = 3) +
  scale_shape_manual(values = c("pre-COVID" = 1, "lockdown years" = 8, "post-pandemic" = 4)) +
  geom_line() +
  #geom_text(data = plot_data |> filter(imd_quartile == 4), 
  #          aes(label = year), size = 2.5, nudge_y = -0.01, colour = "darkgrey") +
  #geom_text(aes(label = scales::percent(percent_present, accuracy = 0.1L)), size = 2.5, nudge_y = 0.01, alpha = 0.7) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(1,6))+
  facet_wrap(vars(class_of))+
  theme(legend.position = "top", 
        axis.title = eb, strip.background = eb) +
  labs(title = "Primary school attendance over time by national curriculum year, and deprivation quartile",
           subtitle = "% of available sessions attended, top & bottom 25% by 2019 Indices of Multiple Deprivation (IMD) score of ward of residence",
           caption = "data from Capita One",
           shape = "COVID time period")

Code
##| fig-height: 8
#| warning: false
plot_data <- attend |> 
  left_join(eyfs |> select(stud_id, good) |> distinct()) |> 
  mutate(covid_year_flag = case_when(year < 2020 ~ "pre-COVID", year == 2020 ~ "lockdown years", year == 2021 ~ "lockdown years", year > 2021 ~ "post-pandemic")) |> 
  mutate(covid_year_flag = fct_relevel(covid_year_flag, c("pre-COVID","lockdown years","post-pandemic"))) |> 
  filter(cohort >= 2016, phase == "Secondary") |> 
  ungroup() |> 
  summarise_attendance(grouping_vars = c("ncy","class_of","year","covid_year_flag","phase","good"
                                         )) |> 
  filter(ncy <= 11 & ncy >= 1,
         child_count > 100
         ) |> 
  group_by(class_of) |> 
  filter(!is.na(good))

ggplot(plot_data,
         aes(x = ncy,
             y = percent_present,
             colour = good,
             group = good,
             label = year,
             shape = covid_year_flag)) +
  geom_point(size = 3) +
  scale_shape_manual(values = c("pre-COVID" = 1, "lockdown years" = 8, "post-pandemic" = 4)) +
  geom_line() +
  #geom_text(data = plot_data |> filter(imd_quartile == 4), 
  #          aes(label = year), size = 2.5, nudge_y = -0.01, colour = "darkgrey") +
  #geom_text(aes(label = scales::percent(percent_present, accuracy = 0.1L)), size = 2.5, nudge_y = 0.01, alpha = 0.7) +
  geom_text(aes(label = year), size = 2.5, nudge_y = -0.01, colour = "darkgrey") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(7,11))+
  facet_wrap(vars(class_of))+
  theme(legend.position = "top", 
        axis.title = eb, strip.background = eb) +
  labs(title = "Secondary school attendance over time by national curriculum year, and deprivation quartile",
           subtitle = "% of available sessions attended, top & bottom 25% by 2019 Indices of Multiple Deprivation (IMD) score of ward of residence",
           caption = "data from Capita One",
           shape = "COVID time period")