france-crime-datasette/createdb.sh
2023-04-18 20:36:51 -06:00

46 lines
1.6 KiB
Bash

#!/bin/sh
set -e
DB=francecrime.db
DEPT=department
REG=region
DEPT_CRIMES=department_crimes
rm -f "$DB"
sqlite-utils create-database "$DB"
# Region lookup table
curl -L https://www.insee.fr/fr/statistiques/fichier/6800675/v_region_2023.csv | \
sqlite-utils insert "$DB" "$REG" - --csv --convert 'return {"code": row["REG"], "name": row["NCCENR"]}'
sqlite-utils transform "$DB" "$REG" --pk code
# Department lookup table
curl -L https://www.insee.fr/fr/statistiques/fichier/6800675/v_departement_2023.csv | \
sqlite-utils insert "$DB" "$DEPT" - --csv --convert 'return {"code": row["DEP"], "name": row["NCCENR"]}'
sqlite-utils transform "$DB" "$DEPT" --pk code
# TODO Add a way to filter out island possessions
# Crimes by department
curl -L https://www.data.gouv.fr/fr/datasets/r/acc332f6-92be-42af-9721-f3609bea8cfc | gzip -d | \
sqlite-utils insert "$DB" "$DEPT_CRIMES" - --csv --delimiter=";" --convert '
def full_year(year):
return "20" + year
def comma_float(s):
return float(s.replace(",", "."))
return {
"class": row["classe"],
"year": full_year(row["annee"]),
"department": row["Code.département"],
"region": row["Code.région"],
"cases": int(row["faits"]),
"population": int(row["POP"]),
"rate_per_thousand": comma_float(row["tauxpourmille"])}'
sqlite-utils extract "$DB" "$DEPT_CRIMES" class
sqlite-utils transform "$DB" "$DEPT_CRIMES" --pk class_id --pk year --pk department
sqlite-utils add-foreign-keys "$DB" \
"$DEPT_CRIMES" department "$DEPT" code \
"$DEPT_CRIMES" region "$REG" code
# TODO Rename class_id to class and show English text using crime_classes.csv
datasette "$DB" -o