-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcollect.py
147 lines (125 loc) · 6.3 KB
/
collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import xmltodict
import requests
import time
import os
import log
import traceback
import sqlite3
import sql
import predict
import sys
from georgiatech import GeorgiaTech
from sklearn.externals import joblib
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def run(make_prediction=True):
print make_prediction
print("v4") # Print current iteration/version for sanity
session = requests.Session() # Construct a NextBus API compliant requester
session.headers.update({"User-Agent": "NextBuzz ([email protected])"})
if make_prediction:
model_path = os.path.join(BASE_DIR, "model.pkl")
model = joblib.load(model_path) # Load in the regression model
sql.create_table() # Create database infra
gt = GeorgiaTech() # Instatiate context object
while True: # Big loop for scraping bus data.
try:
time.sleep(5) # Pause between requests
# Collect weather data
weather = session.get("https://api.openweathermap.org/data/2.5/weather?q=atlanta&APPID=00c4c655fa601a48dc5bf4f34c4ce86a")
if weather.status_code != 200: # Restart loop if we can't get weather data.
continue
weather_json = weather.json()
# Collect and parse NextBus data
for route in gt.all_routes:
time.sleep(2) # Pause between queries
r = session.get("https://gtbuses.herokuapp.com/agencies/georgia-tech/routes/" + route + "/predictions")
r2 = session.get("https://gtbuses.herokuapp.com/agencies/georgia-tech/routes/" + route + "/vehicles")
if r.status_code != 200 or r2.status_code != 200:
continue
stops = xmltodict.parse(r.text)["body"]["predictions"]
# All stops for this route
for stop in stops:
stop_name = stop["@stopTag"]
route_name = stop["@routeTag"]
# First determine if there are any predictions
if "direction" not in stop:
log.log("No predictions for stop " + stop_name + " for route " + route_name)
continue
stop_predictions = stop["direction"]["prediction"]
if type(stop_predictions) == list:
prediction = stop_predictions[0]
else:
prediction = stop_predictions
# Next extract prediction data
layover = "@affectedByLayover" in prediction
is_departure = prediction["@isDeparture"] == "true"
arrival_epoch = int(int(prediction["@epochTime"])/1000)
seconds_arrival = int(prediction["@seconds"])
current_epoch = arrival_epoch - seconds_arrival
bus_number = prediction["@vehicle"]
# Next extract vehicle data
vehicles = xmltodict.parse(r2.text)["body"]["vehicle"]
if type(vehicles) != list:
vehicles = [vehicles]
numbuses = len(vehicles)
kmperhr = -1
buslat = -1
buslong = -1
for v in vehicles:
if bus_number == v["@id"]:
kmperhr = v["@speedKmHr"]
buslat = v["@lat"]
buslong = v["@lon"]
# Next is weather data
weather_name = None
if type(weather_json["weather"]) == list and len(weather_json["weather"]) > 0:
weather_name = weather_json["weather"][0]["main"]
# Build the row
row = []
row.append(current_epoch) # Timestamp
row.append(stop_name) # Stop being approached
row.append(route_name) # Red, blue...
row.append(kmperhr) # Speed of bus
row.append(bus_number) # Bus ID
row.append(numbuses) # Number of buses
row.append(buslat) # Latitude of bus
row.append(buslong) # Longitude of bus
row.append(str(layover)) # Is this bus' prediction inacurrate?
row.append(str(is_departure)) # Is the bus waiting?
row.append(arrival_epoch) # Predicted timestamp of arrival
row.append(seconds_arrival) # Seconds to arrival prediction
row.append(weather_json["main"]["temp"]) # Temp in kelvin
row.append(weather_json["main"]["pressure"]) # Air pressure
row.append(weather_json["main"]["humidity"]) # Air humidity
row.append(weather_json["visibility"]) # Air visibility
row.append(weather_name) # cloudy, rainy, sunny...
row.append(weather_json["wind"]["speed"]) # Wind speed
row.append(weather_json["clouds"]["all"]) # Cloud coverage
if make_prediction:
# Use these features to predict actualSecondsToArrival
my_prediction = predict.predict(model, row)[0]
row.append(my_prediction)
print(str(my_prediction) + " from " + str(seconds_arrival))
output = "("
for item in row:
if isinstance(item, basestring):
output += "\'" + str(item) + "\',"
else:
output += str(item) + ","
output = output[0: -1]
output += ")"
query = "INSERT INTO NEXTBUS VALUES " + output
print(query)
sql.query_write(query)
log.log("Inserted for " + route_name + " at " + stop_name)
except Exception as e:
log.log("Exception:")
log.log(traceback.format_exc())
if __name__ == "__main__":
print sys.argv
if len(sys.argv) == 1:
run()
elif len(sys.argv) == 2:
run(make_prediction="True" == sys.argv[1])
else:
print("collect.py <make_prediction>")