Homelessness Risk

Author

Laurie Platt

Published

Last updated 17 June, 2025

Code
import os
import datetime
import pandas as pd
import numpy as np
import hl_risk as hl

# Load the homelessness case & household membership dataframes
hl_case = hl.load_hl_case()
hl_case_hh = hl.load_hl_case_hh()

# Load the homelessness client, high level support client, 
# and rough sleeper dataframes 
hl_client = hl.load_hl_client()
hl_high = hl.load_hl_high()
hl_rough = hl.load_hl_rough()

# Load the homeless households & people dataframes
hl_hh = hl.load_hl_hh()
hl_person = hl.load_hl_person()

# Load the Council Tax arrears data frame
ctax_arrears = hl.load_ctax_arrears()

# Number of rows in each dataframe
n_hl_case = hl_case.shape[0]
n_hl_case_hh = hl_case_hh.shape[0]
n_ctax_arrears = ctax_arrears.shape[0]

# Earliest and last case
case_earliest_date = hl_case["application_date"].min()
case_last_date = hl_case["application_date"].max()

1 Examples

1.1 Linking two tables of persons

Linking without deduplication:
moj-analytical-services.github.io/splink/demos/examples/duckdb/link_only.html

Code
from splink import splink_datasets

df = splink_datasets.fake_1000

# Split a simple dataset into two, separate datasets which can be linked together.
df_l = df.sample(frac=0.5)
df_r = df.drop(df_l.index)

df_l.head(2)
downloading: https://raw.githubusercontent.com/moj-analytical-services/splink_datasets/master/data/fake_1000.csv
Sample dataset
unique_id first_name surname dob city email cluster
86 86 Charlotte Johnson 2012-01-06 fTelford charlottej68@lee-taylor@.org 25
937 937 Isabelle Hall 1984-11-20 Swansae isabelleh97m@lewis-gregory.com 235