Exploring San Francisco Bikeshare Public Data Using BigQuery

Khoirun Nisa Alfaini
2 min readDec 11, 2022

--

  1. Each bike station’s capacity (dockcount), ordered by its capacity
select
name, dockcount
from
`bigquery-public-data.san_francisco.bikeshare_stations`
order by
dockcount

2. Landmark with the most bike stations

select
landmark, count(name) as stations_num
from
`bigquery-public-data.san_francisco.bikeshare_stations`
group by
landmark
order by
stations_num desc
limit 1

3. Start-End station names with the most trips

select
start_station_name,
end_station_name,
count(trip_id) as trips_num
from
`bigquery-public-data.san_francisco.bikeshare_trips`
group by
start_station_name,
end_station_name
order by
trips_num desc

4. Average trip duration in minutes for each subcription type

select
subscriber_type, avg(duration_sec / 60) as avg_duration_min
from
`bigquery-public-data.san_francisco.bikeshare_trips`
group by
subscriber_type

5. Total duration and number of trips from every station cointains “Redwood” in its name

select
start_station_name,
sum(trip_id) as trips_num,
sum(duration_sec / 60 / 60) as duration_hr
from
`bigquery-public-data.san_francisco.bikeshare_trips`
where
start_station_name like '%Redwood%'
group by
start_station_name

6. Most trips’ destination landmark

select
end_station_name,
landmark,
sum(trip_id) as trips_num
from
`bigquery-public-data.san_francisco.bikeshare_trips` as trips
left join
`bigquery-public-data.san_francisco.bikeshare_stations` as stations
on
trips.end_station_id = stations.station_id
group by
end_station_name, landmark
order by
trips_num desc
limit 1

7. Destination’s latitude-longitude with the most trips

select
end_station_name,
latitude,
longitude,
sum(trip_id) as trips_num
from
`bigquery-public-data.san_francisco.bikeshare_trips` as trips
left join
`bigquery-public-data.san_francisco.bikeshare_stations` as stations
on
trips.end_station_id = stations.station_id
group by
end_station_name, latitude, longitude
order by
trips_num desc
limit 1

8. Average and total distance of each bike numbers

select
bike_number,
avg(dist_in_m) as avg_dist_m,
sum(dist_in_m) as total_dist_m
from
(select
st_distance(
st_geogpoint(start_lon, start_lat),
st_geogpoint(end_lon, end_lat)
) as dist_in_m,
starts.bike_number
from
(select
start_station_name,
latitude as start_lat,
longitude as start_lon,
bike_number,
trip_id
from
`bigquery-public-data.san_francisco.bikeshare_trips` as trips
left join
`bigquery-public-data.san_francisco.bikeshare_stations` as stations
on
trips.start_station_id = stations.station_id) as starts
left join
(select
end_station_name,
latitude as end_lat,
longitude as end_lon,
bike_number,
trip_id
from
`bigquery-public-data.san_francisco.bikeshare_trips` as trips
left join
`bigquery-public-data.san_francisco.bikeshare_stations` as stations
on
trips.end_station_id = stations.station_id) as ends
on ends.trip_id = starts.trip_id)
group by
bike_number
order by
total_dist_m desc

--

--