Justfile for importing schedule data.

This commit is contained in:
projectmoon 2024-04-11 11:35:52 +02:00
parent b8d43b42fa
commit 8d5033a8ee
3 changed files with 67 additions and 2 deletions

11
.gitignore vendored
View File

@ -1,8 +1,17 @@
# tools
gtfs-sql-importer/
# random things
*.cert
*.cer
*.crt
.tools
.env
# supabase
supabase/.temp
supabase/.branches
node_modules/
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules

46
Justfile Normal file
View File

@ -0,0 +1,46 @@
# other
cert_path := ""
prod_db_url := ""
# repo settings
tools_dir := justfile_directory() / ".tools"
gtfs_importer_dir := tools_dir / "gtfs-sql-importer"
gtfs_importer_repo := "https://github.com/fitnr/gtfs-sql-importer"
# data urls
straeto_schedule_data := "https://opendata.straeto.is/data/gtfs/gtfs.zip"
straeto_dl_file := tools_dir / "straeto-gtfs.zip"
# prod postgres settings
export PGSSLROOTCERT := justfile_directory() / "prod-supabase.cer"
export PGUSER := "postgres.yexfstvjfxhursmqcqcu"
export PGPASSWORD := env_var("PGPASSWORD")
export PGHOST := "aws-0-eu-central-1.pooler.supabase.com"
export PGSSLMODE := "verify-full"
export PGDATABASE := "postgres"
@ensure_tool_dir:
mkdir -p {{ tools_dir }}
@clone_gtfs_sql_importer: ensure_tool_dir
test -d {{ gtfs_importer_dir }} || git clone {{ gtfs_importer_repo }} {{ gtfs_importer_dir }}
cd {{ gtfs_importer_dir }} && \
git fetch origin master > /dev/null 2>&1 && \
git reset --hard origin/master > /dev/null 2>&1
clean:
rm -f {{ straeto_dl_file }}
add_indices_prod: clone_gtfs_sql_importer
cd {{ gtfs_importer_dir }} && make add_indices
import_schedules_prod: clone_gtfs_sql_importer
cd {{ gtfs_importer_dir }} && make init > /dev/null 2>&1 || true
test -e {{ straeto_dl_file }} || \
wget {{ straeto_schedule_data }} -O {{ straeto_dl_file }}
cd {{ gtfs_importer_dir }} && make drop_notnull
cd {{ gtfs_importer_dir }} && make drop_constraints
cd {{ gtfs_importer_dir }} && make -j load GTFS="{{ straeto_dl_file }}"
cd {{ gtfs_importer_dir }} && make add_constraints

View File

@ -19,14 +19,24 @@ How to track how late buses are:
for bus positions.
- [ ] Analyze actual arrival vs what the stop endpoint said about the
arrival.
- Use PostGIS to compute "bus arrived" event: When bus is within X
- [x] Use PostGIS to compute "bus arrived" event: When bus is within X
meters of a stop on its route, mark that as an arrival event.
- [ ] Sanity check potential arrivals by removing values that are weird:
- [ ] Bus does not belong to the route on the stop.
- [ ] Bus not lingering at stop long enough. (e.g. driver goes right past).
- [ ] ???
- Once we have bus arrival events, we can compare them to arrivals
throughout the day.
- We can then discard the raw bus position data, as it's not needed
to store it: delete every raw data point between the last arrival
and the newly computed one.
Downloading route data:
- Fuck using GraphQL!
- https://straeto.is/en/about-straeto/open-data
- https://straeto.is/en/about-straeto/open-data/real-time-data
- https://github.com/public-transport/gtfs-via-postgres
Arrival computation:
- Can use edge functions for this?
- Function takes a bus route number, start time, end time.