Using SQL in Python, Citi Bike Analysis, as a Case Study
import pandas as pd
import sqlite3 as sql
read csv from url
pd.read_csv('https://s3.amazonaws.com/tripdata/JC-201705-citibike-tripdata.csv.zip',nrows=10)
tripduration | starttime | stoptime | start station id | start station name | start station latitude | start station longitude | end station id | end station name | end station latitude | end station longitude | bikeid | usertype | birth year | gender | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 347 | 2017-05-01 00:03:19 | 2017-05-01 00:09:06 | 3276 | Marin Light Rail | 40.714584 | -74.042817 | 3214 | Essex Light Rail | 40.712774 | -74.036486 | 26177 | Subscriber | 1958 | 1 |
1 | 108 | 2017-05-01 00:16:04 | 2017-05-01 00:17:53 | 3275 | Columbus Drive | 40.718355 | -74.038914 | 3187 | Warren St | 40.721124 | -74.038051 | 26266 | Subscriber | 1963 | 1 |
2 | 165 | 2017-05-01 00:29:59 | 2017-05-01 00:32:44 | 3267 | Morris Canal | 40.712419 | -74.038526 | 3275 | Columbus Drive | 40.718355 | -74.038914 | 26168 | Subscriber | 1988 | 1 |
3 | 289 | 2017-05-01 00:30:47 | 2017-05-01 00:35:36 | 3209 | Brunswick St | 40.724176 | -74.050656 | 3205 | JC Medical Center | 40.716540 | -74.049638 | 26273 | Subscriber | 1986 | 2 |
4 | 115 | 2017-05-01 00:41:14 | 2017-05-01 00:43:09 | 3185 | City Hall | 40.717733 | -74.043845 | 3213 | Van Vorst Park | 40.718489 | -74.047727 | 26293 | Subscriber | 1983 | 1 |
5 | 285 | 2017-05-01 00:47:59 | 2017-05-01 00:52:44 | 3205 | JC Medical Center | 40.716540 | -74.049638 | 3209 | Brunswick St | 40.724176 | -74.050656 | 26273 | Subscriber | 1986 | 2 |
6 | 163 | 2017-05-01 04:46:48 | 2017-05-01 04:49:32 | 3267 | Morris Canal | 40.712419 | -74.038526 | 3183 | Exchange Place | 40.716247 | -74.033459 | 29645 | Subscriber | 1980 | 1 |
7 | 876 | 2017-05-01 05:02:30 | 2017-05-01 05:17:06 | 3207 | Oakland Ave | 40.737604 | -74.052478 | 3185 | City Hall | 40.717733 | -74.043845 | 29272 | Subscriber | 1984 | 2 |
8 | 394 | 2017-05-01 05:17:09 | 2017-05-01 05:23:43 | 3202 | Newport PATH | 40.727224 | -74.033759 | 3272 | Jersey & 3rd | 40.723332 | -74.045953 | 26211 | Subscriber | 1976 | 1 |
9 | 164 | 2017-05-01 05:31:28 | 2017-05-01 05:34:12 | 3214 | Essex Light Rail | 40.712774 | -74.036486 | 3183 | Exchange Place | 40.716247 | -74.033459 | 19318 | Subscriber | 1979 | 1 |
read csv into sqlite
db = sql.connect('citibike.db')
pd.DataFrame.to_sql?
url = 'https://s3.amazonaws.com/tripdata/20170%s-citibike-tripdata.csv.zip'
for i in [5,6,7,8,9]:
print(url % i)
chunks = pd.read_csv(url %i, chunksize=100_000)
for chunk in chunks:
chunk.columns = [column.replace(' ','_') for column in chunk.columns]
chunk.to_sql('tripdata', db,if_exists='append')
https://s3.amazonaws.com/tripdata/201705-citibike-tripdata.csv.zip
https://s3.amazonaws.com/tripdata/201706-citibike-tripdata.csv.zip
https://s3.amazonaws.com/tripdata/201707-citibike-tripdata.csv.zip
https://s3.amazonaws.com/tripdata/201708-citibike-tripdata.csv.zip
https://s3.amazonaws.com/tripdata/201709-citibike-tripdata.csv.zip
pd.read_sql_query('SELECT count(*) FROM tripdata',db)
count(*) | |
---|---|
0 | 8685057 |
pd.read_sql_query('SELECT * FROM tripdata LIMIT 10',db)
index | tripduration | starttime | stoptime | start_station_id | start_station_name | start_station_latitude | start_station_longitude | end_station_id | end_station_name | end_station_latitude | end_station_longitude | bikeid | usertype | birth_year | gender | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 254 | 2017-05-01 00:00:13 | 2017-05-01 00:04:27 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 394 | E 9 St & Avenue C | 40.725213 | -73.977688 | 27695 | Subscriber | 1996.0 | 2 |
1 | 1 | 248 | 2017-05-01 00:00:19 | 2017-05-01 00:04:28 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 394 | E 9 St & Avenue C | 40.725213 | -73.977688 | 15869 | Subscriber | 1996.0 | 1 |
2 | 2 | 1120 | 2017-05-01 00:00:19 | 2017-05-01 00:19:00 | 242 | Carlton Ave & Flushing Ave | 40.697787 | -73.973736 | 3083 | Bushwick Ave & Powers St | 40.712477 | -73.941000 | 18700 | Subscriber | 1985.0 | 2 |
3 | 3 | 212 | 2017-05-01 00:00:24 | 2017-05-01 00:03:56 | 168 | W 18 St & 6 Ave | 40.739713 | -73.994564 | 116 | W 17 St & 8 Ave | 40.741776 | -74.001497 | 24981 | Subscriber | 1993.0 | 1 |
4 | 4 | 686 | 2017-05-01 00:00:29 | 2017-05-01 00:11:55 | 494 | W 26 St & 8 Ave | 40.747348 | -73.997236 | 527 | E 33 St & 2 Ave | 40.744023 | -73.976056 | 25407 | Subscriber | 1964.0 | 1 |
5 | 5 | 577 | 2017-05-01 00:00:35 | 2017-05-01 00:10:12 | 334 | W 20 St & 7 Ave | 40.742388 | -73.997262 | 504 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 28713 | Subscriber | 1956.0 | 2 |
6 | 6 | 523 | 2017-05-01 00:00:41 | 2017-05-01 00:09:24 | 335 | Washington Pl & Broadway | 40.729039 | -73.994046 | 487 | E 20 St & FDR Drive | 40.733143 | -73.975739 | 15385 | Subscriber | 1994.0 | 1 |
7 | 7 | 419 | 2017-05-01 00:00:45 | 2017-05-01 00:07:44 | 336 | Sullivan St & Washington Sq | 40.730477 | -73.999061 | 369 | Washington Pl & 6 Ave | 40.732241 | -74.000264 | 18295 | Customer | NaN | 0 |
8 | 8 | 518 | 2017-05-01 00:00:47 | 2017-05-01 00:09:25 | 335 | Washington Pl & Broadway | 40.729039 | -73.994046 | 487 | E 20 St & FDR Drive | 40.733143 | -73.975739 | 15608 | Subscriber | 1995.0 | 1 |
9 | 9 | 1296 | 2017-05-01 00:00:48 | 2017-05-01 00:22:25 | 291 | Madison St & Montgomery St | 40.713126 | -73.984844 | 291 | Madison St & Montgomery St | 40.713126 | -73.984844 | 17351 | Subscriber | 1994.0 | 1 |
pd.read_sql_query('SELECT min(birth_year), max(birth_year) FROM tripdata',db)
min(birth_year) | max(birth_year) | |
---|---|---|
0 | 1874.0 | 2001.0 |
pd.read_sql_query('SELECT min(tripduration),max(tripduration) FROM tripdata',db)
min(tripduration) | max(tripduration) | |
---|---|---|
0 | 61 | 5807661 |
pd.read_sql_query('SELECT avg(tripduration) FROM tripdata',db)
avg(tripduration) | |
---|---|
0 | 1055.770104 |
res =pd.read_sql_query('SELECT * FROM tripdata',db,chunksize=100_000)
next(res)
index | tripduration | starttime | stoptime | start_station_id | start_station_name | start_station_latitude | start_station_longitude | end_station_id | end_station_name | end_station_latitude | end_station_longitude | bikeid | usertype | birth_year | gender | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 254 | 2017-05-01 00:00:13 | 2017-05-01 00:04:27 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 394 | E 9 St & Avenue C | 40.725213 | -73.977688 | 27695 | Subscriber | 1996.0 | 2 |
1 | 1 | 248 | 2017-05-01 00:00:19 | 2017-05-01 00:04:28 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 394 | E 9 St & Avenue C | 40.725213 | -73.977688 | 15869 | Subscriber | 1996.0 | 1 |
2 | 2 | 1120 | 2017-05-01 00:00:19 | 2017-05-01 00:19:00 | 242 | Carlton Ave & Flushing Ave | 40.697787 | -73.973736 | 3083 | Bushwick Ave & Powers St | 40.712477 | -73.941000 | 18700 | Subscriber | 1985.0 | 2 |
3 | 3 | 212 | 2017-05-01 00:00:24 | 2017-05-01 00:03:56 | 168 | W 18 St & 6 Ave | 40.739713 | -73.994564 | 116 | W 17 St & 8 Ave | 40.741776 | -74.001497 | 24981 | Subscriber | 1993.0 | 1 |
4 | 4 | 686 | 2017-05-01 00:00:29 | 2017-05-01 00:11:55 | 494 | W 26 St & 8 Ave | 40.747348 | -73.997236 | 527 | E 33 St & 2 Ave | 40.744023 | -73.976056 | 25407 | Subscriber | 1964.0 | 1 |
5 | 5 | 577 | 2017-05-01 00:00:35 | 2017-05-01 00:10:12 | 334 | W 20 St & 7 Ave | 40.742388 | -73.997262 | 504 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 28713 | Subscriber | 1956.0 | 2 |
6 | 6 | 523 | 2017-05-01 00:00:41 | 2017-05-01 00:09:24 | 335 | Washington Pl & Broadway | 40.729039 | -73.994046 | 487 | E 20 St & FDR Drive | 40.733143 | -73.975739 | 15385 | Subscriber | 1994.0 | 1 |
7 | 7 | 419 | 2017-05-01 00:00:45 | 2017-05-01 00:07:44 | 336 | Sullivan St & Washington Sq | 40.730477 | -73.999061 | 369 | Washington Pl & 6 Ave | 40.732241 | -74.000264 | 18295 | Customer | NaN | 0 |
8 | 8 | 518 | 2017-05-01 00:00:47 | 2017-05-01 00:09:25 | 335 | Washington Pl & Broadway | 40.729039 | -73.994046 | 487 | E 20 St & FDR Drive | 40.733143 | -73.975739 | 15608 | Subscriber | 1995.0 | 1 |
9 | 9 | 1296 | 2017-05-01 00:00:48 | 2017-05-01 00:22:25 | 291 | Madison St & Montgomery St | 40.713126 | -73.984844 | 291 | Madison St & Montgomery St | 40.713126 | -73.984844 | 17351 | Subscriber | 1994.0 | 1 |
10 | 10 | 418 | 2017-05-01 00:00:59 | 2017-05-01 00:07:58 | 336 | Sullivan St & Washington Sq | 40.730477 | -73.999061 | 369 | Washington Pl & 6 Ave | 40.732241 | -74.000264 | 28237 | Customer | NaN | 0 |
11 | 11 | 589 | 2017-05-01 00:01:00 | 2017-05-01 00:10:50 | 415 | Pearl St & Hanover Square | 40.704718 | -74.009260 | 340 | Madison St & Clinton St | 40.712690 | -73.987763 | 15289 | Subscriber | 1952.0 | 1 |
12 | 12 | 735 | 2017-05-01 00:01:03 | 2017-05-01 00:13:18 | 330 | Reade St & Broadway | 40.714505 | -74.005628 | 312 | Allen St & Stanton St | 40.722055 | -73.989111 | 27270 | Subscriber | 1993.0 | 1 |
13 | 13 | 950 | 2017-05-01 00:01:00 | 2017-05-01 00:16:50 | 439 | E 4 St & 2 Ave | 40.726281 | -73.989780 | 293 | Lafayette St & E 8 St | 40.730207 | -73.991026 | 18740 | Customer | NaN | 0 |
14 | 14 | 2205 | 2017-05-01 00:01:03 | 2017-05-01 00:37:49 | 306 | Cliff St & Fulton St | 40.708235 | -74.005301 | 306 | Cliff St & Fulton St | 40.708235 | -74.005301 | 17746 | Subscriber | 1986.0 | 1 |
15 | 15 | 391 | 2017-05-01 00:01:10 | 2017-05-01 00:07:41 | 3429 | Hanson Pl & Ashland Pl | 40.685068 | -73.977908 | 419 | Carlton Ave & Park Ave | 40.695807 | -73.973556 | 26118 | Subscriber | 1984.0 | 1 |
16 | 16 | 320 | 2017-05-01 00:01:11 | 2017-05-01 00:06:32 | 387 | Centre St & Chambers St | 40.712733 | -74.004607 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 26691 | Subscriber | 1992.0 | 1 |
17 | 17 | 273 | 2017-05-01 00:01:11 | 2017-05-01 00:05:45 | 3090 | N 8 St & Driggs Ave | 40.717746 | -73.956001 | 3111 | Norman Ave & Leonard St - 2 | 40.725848 | -73.950649 | 28209 | Subscriber | 1988.0 | 1 |
18 | 18 | 461 | 2017-05-01 00:01:40 | 2017-05-01 00:09:21 | 357 | E 11 St & Broadway | 40.732618 | -73.991580 | 301 | E 2 St & Avenue B | 40.722174 | -73.983688 | 27536 | Subscriber | 1994.0 | 1 |
19 | 19 | 345 | 2017-05-01 00:01:46 | 2017-05-01 00:07:32 | 350 | Clinton St & Grand St | 40.715595 | -73.987030 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 29347 | Subscriber | 1974.0 | 1 |
20 | 20 | 616 | 2017-05-01 00:02:01 | 2017-05-01 00:12:17 | 3074 | Montrose Ave & Bushwick Ave | 40.707678 | -73.940162 | 3107 | Bedford Ave & Nassau Ave | 40.723117 | -73.952123 | 27667 | Subscriber | 1980.0 | 2 |
21 | 21 | 552 | 2017-05-01 00:02:04 | 2017-05-01 00:11:16 | 446 | W 24 St & 7 Ave | 40.744876 | -73.995299 | 368 | Carmine St & 6 Ave | 40.730386 | -74.002150 | 28913 | Subscriber | 1985.0 | 1 |
22 | 22 | 628 | 2017-05-01 00:02:14 | 2017-05-01 00:12:42 | 434 | 9 Ave & W 18 St | 40.743174 | -74.003664 | 236 | St Marks Pl & 2 Ave | 40.728419 | -73.987140 | 29346 | Subscriber | 1996.0 | 1 |
23 | 23 | 156 | 2017-05-01 00:02:18 | 2017-05-01 00:04:55 | 3428 | 8 Ave & W 16 St | 40.740983 | -74.001702 | 509 | 9 Ave & W 22 St | 40.745497 | -74.001971 | 27170 | Subscriber | 1981.0 | 1 |
24 | 24 | 305 | 2017-05-01 00:02:27 | 2017-05-01 00:07:33 | 504 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 403 | E 2 St & 2 Ave | 40.725029 | -73.990697 | 21322 | Subscriber | 1997.0 | 1 |
25 | 25 | 372 | 2017-05-01 00:02:47 | 2017-05-01 00:08:59 | 387 | Centre St & Chambers St | 40.712733 | -74.004607 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 25564 | Subscriber | 1982.0 | 1 |
26 | 26 | 161 | 2017-05-01 00:02:54 | 2017-05-01 00:05:35 | 399 | Lafayette Ave & St James Pl | 40.688515 | -73.964763 | 384 | Fulton St & Washington Ave | 40.683048 | -73.964915 | 20545 | Subscriber | 1987.0 | 1 |
27 | 27 | 227 | 2017-05-01 00:02:55 | 2017-05-01 00:06:42 | 508 | W 46 St & 11 Ave | 40.763414 | -73.996674 | 450 | W 49 St & 8 Ave | 40.762272 | -73.987882 | 26622 | Subscriber | 1981.0 | 1 |
28 | 28 | 1636 | 2017-05-01 00:03:12 | 2017-05-01 00:30:29 | 477 | W 41 St & 8 Ave | 40.756405 | -73.990026 | 410 | Suffolk St & Stanton St | 40.720664 | -73.985180 | 24814 | Subscriber | 1962.0 | 2 |
29 | 29 | 1667 | 2017-05-01 00:03:16 | 2017-05-01 00:31:03 | 173 | Broadway & W 49 St | 40.760683 | -73.984527 | 358 | Christopher St & Greenwich St | 40.732916 | -74.007114 | 26887 | Customer | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99970 | 99970 | 1606 | 2017-05-02 18:04:12 | 2017-05-02 18:30:59 | 168 | W 18 St & 6 Ave | 40.739713 | -73.994564 | 360 | William St & Pine St | 40.707179 | -74.008873 | 25697 | Subscriber | 1988.0 | 2 |
99971 | 99971 | 325 | 2017-05-02 18:04:15 | 2017-05-02 18:09:41 | 533 | Broadway & W 39 St | 40.752996 | -73.987216 | 533 | Broadway & W 39 St | 40.752996 | -73.987216 | 26634 | Subscriber | 1980.0 | 1 |
99972 | 99972 | 468 | 2017-05-02 18:04:23 | 2017-05-02 18:12:11 | 3440 | Fulton St & Adams St | 40.692418 | -73.989495 | 467 | Dean St & 4 Ave | 40.683125 | -73.978951 | 25152 | Subscriber | 1963.0 | 1 |
99973 | 99973 | 974 | 2017-05-02 18:04:16 | 2017-05-02 18:20:31 | 3165 | Central Park West & W 72 St | 40.775794 | -73.976206 | 3137 | 5 Ave & E 73 St | 40.772828 | -73.966853 | 20869 | Customer | NaN | 0 |
99974 | 99974 | 243 | 2017-05-02 18:04:16 | 2017-05-02 18:08:20 | 3093 | N 6 St & Bedford Ave | 40.717452 | -73.958509 | 3102 | Driggs Ave & Lorimer St | 40.721791 | -73.950415 | 28112 | Subscriber | 1981.0 | 1 |
99975 | 99975 | 486 | 2017-05-02 18:04:16 | 2017-05-02 18:12:22 | 3443 | W 52 St & 6 Ave | 40.761330 | -73.979820 | 305 | E 58 St & 3 Ave | 40.760958 | -73.967245 | 20010 | Subscriber | 1988.0 | 1 |
99976 | 99976 | 125 | 2017-05-02 18:04:17 | 2017-05-02 18:06:23 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 29367 | Customer | NaN | 0 |
99977 | 99977 | 432 | 2017-05-02 18:04:17 | 2017-05-02 18:11:30 | 3390 | E 109 St & 3 Ave | 40.793297 | -73.943208 | 3312 | 1 Ave & E 94 St | 40.781721 | -73.945940 | 27005 | Subscriber | 1985.0 | 1 |
99978 | 99978 | 1980 | 2017-05-02 18:04:17 | 2017-05-02 18:37:17 | 440 | E 45 St & 3 Ave | 40.752554 | -73.972826 | 426 | West St & Chambers St | 40.717548 | -74.013221 | 27077 | Subscriber | 1981.0 | 2 |
99979 | 99979 | 1013 | 2017-05-02 18:04:19 | 2017-05-02 18:21:13 | 3057 | Kosciuszko St & Tompkins Ave | 40.691283 | -73.945242 | 3102 | Driggs Ave & Lorimer St | 40.721791 | -73.950415 | 27805 | Subscriber | 1992.0 | 2 |
99980 | 99980 | 2273 | 2017-05-02 18:04:19 | 2017-05-02 18:42:13 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 530 | 11 Ave & W 59 St | 40.771522 | -73.990541 | 17279 | Subscriber | 1955.0 | 2 |
99981 | 99981 | 353 | 2017-05-02 18:04:21 | 2017-05-02 18:10:14 | 252 | MacDougal St & Washington Sq | 40.732264 | -73.998522 | 445 | E 10 St & Avenue A | 40.727408 | -73.981420 | 27765 | Subscriber | 1992.0 | 1 |
99982 | 99982 | 1642 | 2017-05-02 18:04:22 | 2017-05-02 18:31:44 | 359 | E 47 St & Park Ave | 40.755103 | -73.974987 | 3147 | E 85 St & 3 Ave | 40.778012 | -73.954071 | 20809 | Subscriber | 1961.0 | 2 |
99983 | 99983 | 1402 | 2017-05-02 18:04:21 | 2017-05-02 18:27:44 | 3158 | W 63 St & Broadway | 40.771639 | -73.982614 | 402 | Broadway & E 22 St | 40.740343 | -73.989551 | 27577 | Subscriber | 1978.0 | 1 |
99984 | 99984 | 515 | 2017-05-02 18:04:22 | 2017-05-02 18:12:57 | 382 | University Pl & E 14 St | 40.734927 | -73.992005 | 446 | W 24 St & 7 Ave | 40.744876 | -73.995299 | 25648 | Subscriber | 1986.0 | 1 |
99985 | 99985 | 424 | 2017-05-02 18:04:23 | 2017-05-02 18:11:28 | 478 | 11 Ave & W 41 St | 40.760301 | -73.998842 | 529 | W 42 St & 8 Ave | 40.757570 | -73.990985 | 27017 | Subscriber | 1968.0 | 1 |
99986 | 99986 | 816 | 2017-05-02 18:04:24 | 2017-05-02 18:18:01 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 358 | Christopher St & Greenwich St | 40.732916 | -74.007114 | 17504 | Subscriber | 1971.0 | 2 |
99987 | 99987 | 1712 | 2017-05-02 18:04:33 | 2017-05-02 18:33:05 | 3137 | 5 Ave & E 73 St | 40.772828 | -73.966853 | 3282 | 5 Ave & E 88 St | 40.783070 | -73.959390 | 28558 | Subscriber | 1994.0 | 2 |
99988 | 99988 | 1194 | 2017-05-02 18:04:26 | 2017-05-02 18:24:21 | 253 | W 13 St & 5 Ave | 40.735439 | -73.994539 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 25335 | Subscriber | 1974.0 | 1 |
99989 | 99989 | 598 | 2017-05-02 18:04:27 | 2017-05-02 18:14:25 | 268 | Howard St & Centre St | 40.719105 | -73.999733 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 28662 | Subscriber | 1984.0 | 1 |
99990 | 99990 | 408 | 2017-05-02 18:04:27 | 2017-05-02 18:11:16 | 501 | FDR Drive & E 35 St | 40.744219 | -73.971212 | 498 | Broadway & W 32 St | 40.748549 | -73.988084 | 27307 | Subscriber | 1983.0 | 1 |
99991 | 99991 | 972 | 2017-05-02 18:04:26 | 2017-05-02 18:20:39 | 3158 | W 63 St & Broadway | 40.771639 | -73.982614 | 490 | 8 Ave & W 33 St | 40.751551 | -73.993934 | 18888 | Subscriber | 1963.0 | 1 |
99992 | 99992 | 1345 | 2017-05-02 18:04:28 | 2017-05-02 18:26:54 | 494 | W 26 St & 8 Ave | 40.747348 | -73.997236 | 457 | Broadway & W 58 St | 40.766953 | -73.981693 | 28880 | Subscriber | 1980.0 | 2 |
99993 | 99993 | 1178 | 2017-05-02 18:04:29 | 2017-05-02 18:24:08 | 3178 | Riverside Dr & W 78 St | 40.784145 | -73.983625 | 388 | W 26 St & 10 Ave | 40.749718 | -74.002950 | 18646 | Subscriber | 1988.0 | 1 |
99994 | 99994 | 1582 | 2017-05-02 18:04:28 | 2017-05-02 18:30:51 | 3117 | Franklin St & Dupont St | 40.735640 | -73.958660 | 414 | Pearl St & Anchorage Pl | 40.702819 | -73.987658 | 28792 | Subscriber | 1976.0 | 1 |
99995 | 99995 | 2530 | 2017-05-02 18:04:29 | 2017-05-02 18:46:39 | 3169 | Riverside Dr & W 82 St | 40.787209 | -73.981281 | 515 | W 43 St & 10 Ave | 40.760094 | -73.994618 | 26463 | Customer | NaN | 0 |
99996 | 99996 | 1760 | 2017-05-02 18:04:27 | 2017-05-02 18:33:48 | 382 | University Pl & E 14 St | 40.734927 | -73.992005 | 497 | E 17 St & Broadway | 40.737050 | -73.990093 | 29094 | Subscriber | 1987.0 | 1 |
99997 | 99997 | 944 | 2017-05-02 18:04:30 | 2017-05-02 18:20:15 | 402 | Broadway & E 22 St | 40.740343 | -73.989551 | 524 | W 43 St & 6 Ave | 40.755273 | -73.983169 | 28542 | Subscriber | 1987.0 | 2 |
99998 | 99998 | 352 | 2017-05-02 18:04:30 | 2017-05-02 18:10:22 | 498 | Broadway & W 32 St | 40.748549 | -73.988084 | 442 | W 27 St & 7 Ave | 40.746647 | -73.993915 | 16621 | Subscriber | 1989.0 | 1 |
99999 | 99999 | 227 | 2017-05-02 18:04:31 | 2017-05-02 18:08:18 | 3150 | E 85 St & York Ave | 40.775369 | -73.948034 | 3305 | E 91 St & 2 Ave | 40.781122 | -73.949656 | 28116 | Subscriber | 1969.0 | 1 |
100000 rows × 16 columns
def Q(sql):
res= pd.read_sql_query(sql,db, chunksize=100_000)
return next(res)
Q('SELECT * FROM tripdata')
index | tripduration | starttime | stoptime | start_station_id | start_station_name | start_station_latitude | start_station_longitude | end_station_id | end_station_name | end_station_latitude | end_station_longitude | bikeid | usertype | birth_year | gender | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 254 | 2017-05-01 00:00:13 | 2017-05-01 00:04:27 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 394 | E 9 St & Avenue C | 40.725213 | -73.977688 | 27695 | Subscriber | 1996.0 | 2 |
1 | 1 | 248 | 2017-05-01 00:00:19 | 2017-05-01 00:04:28 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 394 | E 9 St & Avenue C | 40.725213 | -73.977688 | 15869 | Subscriber | 1996.0 | 1 |
2 | 2 | 1120 | 2017-05-01 00:00:19 | 2017-05-01 00:19:00 | 242 | Carlton Ave & Flushing Ave | 40.697787 | -73.973736 | 3083 | Bushwick Ave & Powers St | 40.712477 | -73.941000 | 18700 | Subscriber | 1985.0 | 2 |
3 | 3 | 212 | 2017-05-01 00:00:24 | 2017-05-01 00:03:56 | 168 | W 18 St & 6 Ave | 40.739713 | -73.994564 | 116 | W 17 St & 8 Ave | 40.741776 | -74.001497 | 24981 | Subscriber | 1993.0 | 1 |
4 | 4 | 686 | 2017-05-01 00:00:29 | 2017-05-01 00:11:55 | 494 | W 26 St & 8 Ave | 40.747348 | -73.997236 | 527 | E 33 St & 2 Ave | 40.744023 | -73.976056 | 25407 | Subscriber | 1964.0 | 1 |
5 | 5 | 577 | 2017-05-01 00:00:35 | 2017-05-01 00:10:12 | 334 | W 20 St & 7 Ave | 40.742388 | -73.997262 | 504 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 28713 | Subscriber | 1956.0 | 2 |
6 | 6 | 523 | 2017-05-01 00:00:41 | 2017-05-01 00:09:24 | 335 | Washington Pl & Broadway | 40.729039 | -73.994046 | 487 | E 20 St & FDR Drive | 40.733143 | -73.975739 | 15385 | Subscriber | 1994.0 | 1 |
7 | 7 | 419 | 2017-05-01 00:00:45 | 2017-05-01 00:07:44 | 336 | Sullivan St & Washington Sq | 40.730477 | -73.999061 | 369 | Washington Pl & 6 Ave | 40.732241 | -74.000264 | 18295 | Customer | NaN | 0 |
8 | 8 | 518 | 2017-05-01 00:00:47 | 2017-05-01 00:09:25 | 335 | Washington Pl & Broadway | 40.729039 | -73.994046 | 487 | E 20 St & FDR Drive | 40.733143 | -73.975739 | 15608 | Subscriber | 1995.0 | 1 |
9 | 9 | 1296 | 2017-05-01 00:00:48 | 2017-05-01 00:22:25 | 291 | Madison St & Montgomery St | 40.713126 | -73.984844 | 291 | Madison St & Montgomery St | 40.713126 | -73.984844 | 17351 | Subscriber | 1994.0 | 1 |
10 | 10 | 418 | 2017-05-01 00:00:59 | 2017-05-01 00:07:58 | 336 | Sullivan St & Washington Sq | 40.730477 | -73.999061 | 369 | Washington Pl & 6 Ave | 40.732241 | -74.000264 | 28237 | Customer | NaN | 0 |
11 | 11 | 589 | 2017-05-01 00:01:00 | 2017-05-01 00:10:50 | 415 | Pearl St & Hanover Square | 40.704718 | -74.009260 | 340 | Madison St & Clinton St | 40.712690 | -73.987763 | 15289 | Subscriber | 1952.0 | 1 |
12 | 12 | 735 | 2017-05-01 00:01:03 | 2017-05-01 00:13:18 | 330 | Reade St & Broadway | 40.714505 | -74.005628 | 312 | Allen St & Stanton St | 40.722055 | -73.989111 | 27270 | Subscriber | 1993.0 | 1 |
13 | 13 | 950 | 2017-05-01 00:01:00 | 2017-05-01 00:16:50 | 439 | E 4 St & 2 Ave | 40.726281 | -73.989780 | 293 | Lafayette St & E 8 St | 40.730207 | -73.991026 | 18740 | Customer | NaN | 0 |
14 | 14 | 2205 | 2017-05-01 00:01:03 | 2017-05-01 00:37:49 | 306 | Cliff St & Fulton St | 40.708235 | -74.005301 | 306 | Cliff St & Fulton St | 40.708235 | -74.005301 | 17746 | Subscriber | 1986.0 | 1 |
15 | 15 | 391 | 2017-05-01 00:01:10 | 2017-05-01 00:07:41 | 3429 | Hanson Pl & Ashland Pl | 40.685068 | -73.977908 | 419 | Carlton Ave & Park Ave | 40.695807 | -73.973556 | 26118 | Subscriber | 1984.0 | 1 |
16 | 16 | 320 | 2017-05-01 00:01:11 | 2017-05-01 00:06:32 | 387 | Centre St & Chambers St | 40.712733 | -74.004607 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 26691 | Subscriber | 1992.0 | 1 |
17 | 17 | 273 | 2017-05-01 00:01:11 | 2017-05-01 00:05:45 | 3090 | N 8 St & Driggs Ave | 40.717746 | -73.956001 | 3111 | Norman Ave & Leonard St - 2 | 40.725848 | -73.950649 | 28209 | Subscriber | 1988.0 | 1 |
18 | 18 | 461 | 2017-05-01 00:01:40 | 2017-05-01 00:09:21 | 357 | E 11 St & Broadway | 40.732618 | -73.991580 | 301 | E 2 St & Avenue B | 40.722174 | -73.983688 | 27536 | Subscriber | 1994.0 | 1 |
19 | 19 | 345 | 2017-05-01 00:01:46 | 2017-05-01 00:07:32 | 350 | Clinton St & Grand St | 40.715595 | -73.987030 | 511 | E 14 St & Avenue B | 40.729387 | -73.977724 | 29347 | Subscriber | 1974.0 | 1 |
20 | 20 | 616 | 2017-05-01 00:02:01 | 2017-05-01 00:12:17 | 3074 | Montrose Ave & Bushwick Ave | 40.707678 | -73.940162 | 3107 | Bedford Ave & Nassau Ave | 40.723117 | -73.952123 | 27667 | Subscriber | 1980.0 | 2 |
21 | 21 | 552 | 2017-05-01 00:02:04 | 2017-05-01 00:11:16 | 446 | W 24 St & 7 Ave | 40.744876 | -73.995299 | 368 | Carmine St & 6 Ave | 40.730386 | -74.002150 | 28913 | Subscriber | 1985.0 | 1 |
22 | 22 | 628 | 2017-05-01 00:02:14 | 2017-05-01 00:12:42 | 434 | 9 Ave & W 18 St | 40.743174 | -74.003664 | 236 | St Marks Pl & 2 Ave | 40.728419 | -73.987140 | 29346 | Subscriber | 1996.0 | 1 |
23 | 23 | 156 | 2017-05-01 00:02:18 | 2017-05-01 00:04:55 | 3428 | 8 Ave & W 16 St | 40.740983 | -74.001702 | 509 | 9 Ave & W 22 St | 40.745497 | -74.001971 | 27170 | Subscriber | 1981.0 | 1 |
24 | 24 | 305 | 2017-05-01 00:02:27 | 2017-05-01 00:07:33 | 504 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 403 | E 2 St & 2 Ave | 40.725029 | -73.990697 | 21322 | Subscriber | 1997.0 | 1 |
25 | 25 | 372 | 2017-05-01 00:02:47 | 2017-05-01 00:08:59 | 387 | Centre St & Chambers St | 40.712733 | -74.004607 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 25564 | Subscriber | 1982.0 | 1 |
26 | 26 | 161 | 2017-05-01 00:02:54 | 2017-05-01 00:05:35 | 399 | Lafayette Ave & St James Pl | 40.688515 | -73.964763 | 384 | Fulton St & Washington Ave | 40.683048 | -73.964915 | 20545 | Subscriber | 1987.0 | 1 |
27 | 27 | 227 | 2017-05-01 00:02:55 | 2017-05-01 00:06:42 | 508 | W 46 St & 11 Ave | 40.763414 | -73.996674 | 450 | W 49 St & 8 Ave | 40.762272 | -73.987882 | 26622 | Subscriber | 1981.0 | 1 |
28 | 28 | 1636 | 2017-05-01 00:03:12 | 2017-05-01 00:30:29 | 477 | W 41 St & 8 Ave | 40.756405 | -73.990026 | 410 | Suffolk St & Stanton St | 40.720664 | -73.985180 | 24814 | Subscriber | 1962.0 | 2 |
29 | 29 | 1667 | 2017-05-01 00:03:16 | 2017-05-01 00:31:03 | 173 | Broadway & W 49 St | 40.760683 | -73.984527 | 358 | Christopher St & Greenwich St | 40.732916 | -74.007114 | 26887 | Customer | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
99970 | 99970 | 1606 | 2017-05-02 18:04:12 | 2017-05-02 18:30:59 | 168 | W 18 St & 6 Ave | 40.739713 | -73.994564 | 360 | William St & Pine St | 40.707179 | -74.008873 | 25697 | Subscriber | 1988.0 | 2 |
99971 | 99971 | 325 | 2017-05-02 18:04:15 | 2017-05-02 18:09:41 | 533 | Broadway & W 39 St | 40.752996 | -73.987216 | 533 | Broadway & W 39 St | 40.752996 | -73.987216 | 26634 | Subscriber | 1980.0 | 1 |
99972 | 99972 | 468 | 2017-05-02 18:04:23 | 2017-05-02 18:12:11 | 3440 | Fulton St & Adams St | 40.692418 | -73.989495 | 467 | Dean St & 4 Ave | 40.683125 | -73.978951 | 25152 | Subscriber | 1963.0 | 1 |
99973 | 99973 | 974 | 2017-05-02 18:04:16 | 2017-05-02 18:20:31 | 3165 | Central Park West & W 72 St | 40.775794 | -73.976206 | 3137 | 5 Ave & E 73 St | 40.772828 | -73.966853 | 20869 | Customer | NaN | 0 |
99974 | 99974 | 243 | 2017-05-02 18:04:16 | 2017-05-02 18:08:20 | 3093 | N 6 St & Bedford Ave | 40.717452 | -73.958509 | 3102 | Driggs Ave & Lorimer St | 40.721791 | -73.950415 | 28112 | Subscriber | 1981.0 | 1 |
99975 | 99975 | 486 | 2017-05-02 18:04:16 | 2017-05-02 18:12:22 | 3443 | W 52 St & 6 Ave | 40.761330 | -73.979820 | 305 | E 58 St & 3 Ave | 40.760958 | -73.967245 | 20010 | Subscriber | 1988.0 | 1 |
99976 | 99976 | 125 | 2017-05-02 18:04:17 | 2017-05-02 18:06:23 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 2008 | Little West St & 1 Pl | 40.705693 | -74.016777 | 29367 | Customer | NaN | 0 |
99977 | 99977 | 432 | 2017-05-02 18:04:17 | 2017-05-02 18:11:30 | 3390 | E 109 St & 3 Ave | 40.793297 | -73.943208 | 3312 | 1 Ave & E 94 St | 40.781721 | -73.945940 | 27005 | Subscriber | 1985.0 | 1 |
99978 | 99978 | 1980 | 2017-05-02 18:04:17 | 2017-05-02 18:37:17 | 440 | E 45 St & 3 Ave | 40.752554 | -73.972826 | 426 | West St & Chambers St | 40.717548 | -74.013221 | 27077 | Subscriber | 1981.0 | 2 |
99979 | 99979 | 1013 | 2017-05-02 18:04:19 | 2017-05-02 18:21:13 | 3057 | Kosciuszko St & Tompkins Ave | 40.691283 | -73.945242 | 3102 | Driggs Ave & Lorimer St | 40.721791 | -73.950415 | 27805 | Subscriber | 1992.0 | 2 |
99980 | 99980 | 2273 | 2017-05-02 18:04:19 | 2017-05-02 18:42:13 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 530 | 11 Ave & W 59 St | 40.771522 | -73.990541 | 17279 | Subscriber | 1955.0 | 2 |
99981 | 99981 | 353 | 2017-05-02 18:04:21 | 2017-05-02 18:10:14 | 252 | MacDougal St & Washington Sq | 40.732264 | -73.998522 | 445 | E 10 St & Avenue A | 40.727408 | -73.981420 | 27765 | Subscriber | 1992.0 | 1 |
99982 | 99982 | 1642 | 2017-05-02 18:04:22 | 2017-05-02 18:31:44 | 359 | E 47 St & Park Ave | 40.755103 | -73.974987 | 3147 | E 85 St & 3 Ave | 40.778012 | -73.954071 | 20809 | Subscriber | 1961.0 | 2 |
99983 | 99983 | 1402 | 2017-05-02 18:04:21 | 2017-05-02 18:27:44 | 3158 | W 63 St & Broadway | 40.771639 | -73.982614 | 402 | Broadway & E 22 St | 40.740343 | -73.989551 | 27577 | Subscriber | 1978.0 | 1 |
99984 | 99984 | 515 | 2017-05-02 18:04:22 | 2017-05-02 18:12:57 | 382 | University Pl & E 14 St | 40.734927 | -73.992005 | 446 | W 24 St & 7 Ave | 40.744876 | -73.995299 | 25648 | Subscriber | 1986.0 | 1 |
99985 | 99985 | 424 | 2017-05-02 18:04:23 | 2017-05-02 18:11:28 | 478 | 11 Ave & W 41 St | 40.760301 | -73.998842 | 529 | W 42 St & 8 Ave | 40.757570 | -73.990985 | 27017 | Subscriber | 1968.0 | 1 |
99986 | 99986 | 816 | 2017-05-02 18:04:24 | 2017-05-02 18:18:01 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 358 | Christopher St & Greenwich St | 40.732916 | -74.007114 | 17504 | Subscriber | 1971.0 | 2 |
99987 | 99987 | 1712 | 2017-05-02 18:04:33 | 2017-05-02 18:33:05 | 3137 | 5 Ave & E 73 St | 40.772828 | -73.966853 | 3282 | 5 Ave & E 88 St | 40.783070 | -73.959390 | 28558 | Subscriber | 1994.0 | 2 |
99988 | 99988 | 1194 | 2017-05-02 18:04:26 | 2017-05-02 18:24:21 | 253 | W 13 St & 5 Ave | 40.735439 | -73.994539 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 25335 | Subscriber | 1974.0 | 1 |
99989 | 99989 | 598 | 2017-05-02 18:04:27 | 2017-05-02 18:14:25 | 268 | Howard St & Centre St | 40.719105 | -73.999733 | 3002 | South End Ave & Liberty St | 40.711512 | -74.015756 | 28662 | Subscriber | 1984.0 | 1 |
99990 | 99990 | 408 | 2017-05-02 18:04:27 | 2017-05-02 18:11:16 | 501 | FDR Drive & E 35 St | 40.744219 | -73.971212 | 498 | Broadway & W 32 St | 40.748549 | -73.988084 | 27307 | Subscriber | 1983.0 | 1 |
99991 | 99991 | 972 | 2017-05-02 18:04:26 | 2017-05-02 18:20:39 | 3158 | W 63 St & Broadway | 40.771639 | -73.982614 | 490 | 8 Ave & W 33 St | 40.751551 | -73.993934 | 18888 | Subscriber | 1963.0 | 1 |
99992 | 99992 | 1345 | 2017-05-02 18:04:28 | 2017-05-02 18:26:54 | 494 | W 26 St & 8 Ave | 40.747348 | -73.997236 | 457 | Broadway & W 58 St | 40.766953 | -73.981693 | 28880 | Subscriber | 1980.0 | 2 |
99993 | 99993 | 1178 | 2017-05-02 18:04:29 | 2017-05-02 18:24:08 | 3178 | Riverside Dr & W 78 St | 40.784145 | -73.983625 | 388 | W 26 St & 10 Ave | 40.749718 | -74.002950 | 18646 | Subscriber | 1988.0 | 1 |
99994 | 99994 | 1582 | 2017-05-02 18:04:28 | 2017-05-02 18:30:51 | 3117 | Franklin St & Dupont St | 40.735640 | -73.958660 | 414 | Pearl St & Anchorage Pl | 40.702819 | -73.987658 | 28792 | Subscriber | 1976.0 | 1 |
99995 | 99995 | 2530 | 2017-05-02 18:04:29 | 2017-05-02 18:46:39 | 3169 | Riverside Dr & W 82 St | 40.787209 | -73.981281 | 515 | W 43 St & 10 Ave | 40.760094 | -73.994618 | 26463 | Customer | NaN | 0 |
99996 | 99996 | 1760 | 2017-05-02 18:04:27 | 2017-05-02 18:33:48 | 382 | University Pl & E 14 St | 40.734927 | -73.992005 | 497 | E 17 St & Broadway | 40.737050 | -73.990093 | 29094 | Subscriber | 1987.0 | 1 |
99997 | 99997 | 944 | 2017-05-02 18:04:30 | 2017-05-02 18:20:15 | 402 | Broadway & E 22 St | 40.740343 | -73.989551 | 524 | W 43 St & 6 Ave | 40.755273 | -73.983169 | 28542 | Subscriber | 1987.0 | 2 |
99998 | 99998 | 352 | 2017-05-02 18:04:30 | 2017-05-02 18:10:22 | 498 | Broadway & W 32 St | 40.748549 | -73.988084 | 442 | W 27 St & 7 Ave | 40.746647 | -73.993915 | 16621 | Subscriber | 1989.0 | 1 |
99999 | 99999 | 227 | 2017-05-02 18:04:31 | 2017-05-02 18:08:18 | 3150 | E 85 St & York Ave | 40.775369 | -73.948034 | 3305 | E 91 St & 2 Ave | 40.781122 | -73.949656 | 28116 | Subscriber | 1969.0 | 1 |
100000 rows × 16 columns
geneder query
res=Q('SELECT gender, count(*) from tripdata group by gender')
res
gender | count(*) | |
---|---|---|
0 | 0 | 1058729 |
1 | 1 | 5618129 |
2 | 2 | 2008199 |
res=Q( 'select usertype, gender,count(*) from tripdata group by gender, usertype' )
res
usertype | gender | count(*) | |
---|---|---|---|
0 | Customer | 0 | 946624 |
1 | Subscriber | 0 | 112105 |
2 | Customer | 1 | 133632 |
3 | Subscriber | 1 | 5484497 |
4 | Customer | 2 | 78906 |
5 | Subscriber | 2 | 1929293 |
res = Q('''
SELECT
(2020 - birth_year) as age,
count(*)
from tripdata
group by age
''')
res
age | count(*) | |
---|---|---|
0 | NaN | 1026035 |
1 | 19.0 | 3111 |
2 | 20.0 | 13105 |
3 | 21.0 | 22640 |
4 | 22.0 | 35205 |
5 | 23.0 | 43961 |
6 | 24.0 | 61248 |
7 | 25.0 | 82996 |
8 | 26.0 | 145162 |
9 | 27.0 | 225350 |
10 | 28.0 | 275820 |
11 | 29.0 | 303805 |
12 | 30.0 | 329242 |
13 | 31.0 | 333745 |
14 | 32.0 | 336104 |
15 | 33.0 | 324443 |
16 | 34.0 | 320144 |
17 | 35.0 | 315356 |
18 | 36.0 | 290491 |
19 | 37.0 | 272767 |
20 | 38.0 | 254227 |
21 | 39.0 | 235317 |
22 | 40.0 | 213739 |
23 | 41.0 | 195974 |
24 | 42.0 | 178737 |
25 | 43.0 | 167309 |
26 | 44.0 | 164558 |
27 | 45.0 | 151740 |
28 | 46.0 | 153644 |
29 | 47.0 | 140617 |
... | ... | ... |
73 | 91.0 | 99 |
74 | 92.0 | 69 |
75 | 93.0 | 114 |
76 | 94.0 | 95 |
77 | 97.0 | 532 |
78 | 99.0 | 56 |
79 | 100.0 | 8 |
80 | 102.0 | 100 |
81 | 103.0 | 133 |
82 | 104.0 | 7 |
83 | 105.0 | 17 |
84 | 107.0 | 34 |
85 | 108.0 | 137 |
86 | 110.0 | 193 |
87 | 113.0 | 17 |
88 | 115.0 | 3 |
89 | 119.0 | 224 |
90 | 120.0 | 921 |
91 | 121.0 | 165 |
92 | 124.0 | 21 |
93 | 125.0 | 76 |
94 | 126.0 | 32 |
95 | 127.0 | 105 |
96 | 130.0 | 5 |
97 | 131.0 | 3 |
98 | 132.0 | 21 |
99 | 133.0 | 48 |
100 | 134.0 | 65 |
101 | 135.0 | 245 |
102 | 146.0 | 6 |
103 rows × 2 columns
res = Q('''
SELECT
(2020 - birth_year) as age,
count(*)
from tripdata
group by age
having age >1
''')
res
age | count(*) | |
---|---|---|
0 | 19.0 | 3111 |
1 | 20.0 | 13105 |
2 | 21.0 | 22640 |
3 | 22.0 | 35205 |
4 | 23.0 | 43961 |
5 | 24.0 | 61248 |
6 | 25.0 | 82996 |
7 | 26.0 | 145162 |
8 | 27.0 | 225350 |
9 | 28.0 | 275820 |
10 | 29.0 | 303805 |
11 | 30.0 | 329242 |
12 | 31.0 | 333745 |
13 | 32.0 | 336104 |
14 | 33.0 | 324443 |
15 | 34.0 | 320144 |
16 | 35.0 | 315356 |
17 | 36.0 | 290491 |
18 | 37.0 | 272767 |
19 | 38.0 | 254227 |
20 | 39.0 | 235317 |
21 | 40.0 | 213739 |
22 | 41.0 | 195974 |
23 | 42.0 | 178737 |
24 | 43.0 | 167309 |
25 | 44.0 | 164558 |
26 | 45.0 | 151740 |
27 | 46.0 | 153644 |
28 | 47.0 | 140617 |
29 | 48.0 | 138148 |
... | ... | ... |
72 | 91.0 | 99 |
73 | 92.0 | 69 |
74 | 93.0 | 114 |
75 | 94.0 | 95 |
76 | 97.0 | 532 |
77 | 99.0 | 56 |
78 | 100.0 | 8 |
79 | 102.0 | 100 |
80 | 103.0 | 133 |
81 | 104.0 | 7 |
82 | 105.0 | 17 |
83 | 107.0 | 34 |
84 | 108.0 | 137 |
85 | 110.0 | 193 |
86 | 113.0 | 17 |
87 | 115.0 | 3 |
88 | 119.0 | 224 |
89 | 120.0 | 921 |
90 | 121.0 | 165 |
91 | 124.0 | 21 |
92 | 125.0 | 76 |
93 | 126.0 | 32 |
94 | 127.0 | 105 |
95 | 130.0 | 5 |
96 | 131.0 | 3 |
97 | 132.0 | 21 |
98 | 133.0 | 48 |
99 | 134.0 | 65 |
100 | 135.0 | 245 |
101 | 146.0 | 6 |
102 rows × 2 columns
res = Q('''
SELECT bikeid, sum(tripduration)
FROM tripdata
GROUP BY bikeid
ORDER BY 2
''')
res
bikeid | sum(tripduration) | |
---|---|---|
0 | 31358 | 93 |
1 | 31789 | 157 |
2 | 29838 | 209 |
3 | 26258 | 274 |
4 | 31941 | 304 |
5 | 29301 | 325 |
6 | 30405 | 353 |
7 | 30829 | 451 |
8 | 30402 | 467 |
9 | 29843 | 593 |
10 | 30294 | 642 |
11 | 29610 | 667 |
12 | 25231 | 716 |
13 | 31107 | 741 |
14 | 24357 | 773 |
15 | 30339 | 773 |
16 | 30050 | 877 |
17 | 29644 | 939 |
18 | 31807 | 956 |
19 | 29637 | 1086 |
20 | 30157 | 1265 |
21 | 29836 | 1273 |
22 | 29253 | 1315 |
23 | 29501 | 1326 |
24 | 29845 | 1457 |
25 | 29824 | 1460 |
26 | 29217 | 1498 |
27 | 31784 | 1530 |
28 | 31782 | 1563 |
29 | 31827 | 1593 |
... | ... | ... |
12148 | 16666 | 3874348 |
12149 | 18811 | 3875045 |
12150 | 21308 | 3910883 |
12151 | 20541 | 4071344 |
12152 | 25707 | 4115980 |
12153 | 28014 | 4191835 |
12154 | 20596 | 4225521 |
12155 | 17515 | 4236450 |
12156 | 26484 | 4237832 |
12157 | 29755 | 4330469 |
12158 | 15462 | 4374386 |
12159 | 24843 | 4382533 |
12160 | 25360 | 4500421 |
12161 | 25945 | 4507709 |
12162 | 27892 | 4517035 |
12163 | 26591 | 4517352 |
12164 | 17428 | 4531047 |
12165 | 26970 | 4610802 |
12166 | 28873 | 4644748 |
12167 | 26975 | 4682062 |
12168 | 17799 | 4709650 |
12169 | 28653 | 4781705 |
12170 | 20145 | 4811001 |
12171 | 17928 | 5115114 |
12172 | 15062 | 5189426 |
12173 | 25505 | 5218382 |
12174 | 21683 | 5386738 |
12175 | 26502 | 5867954 |
12176 | 21173 | 6164703 |
12177 | 16811 | 6223765 |
12178 rows × 2 columns
res = Q('''
SELECT bikeid, sum(tripduration)/3600 as trip_duration_hours
FROM tripdata
GROUP BY bikeid
ORDER BY 2
''')
res
bikeid | trip_duration_hours | |
---|---|---|
0 | 19312 | 0 |
1 | 22380 | 0 |
2 | 24357 | 0 |
3 | 24747 | 0 |
4 | 25231 | 0 |
5 | 26152 | 0 |
6 | 26216 | 0 |
7 | 26258 | 0 |
8 | 27275 | 0 |
9 | 29217 | 0 |
10 | 29253 | 0 |
11 | 29273 | 0 |
12 | 29301 | 0 |
13 | 29484 | 0 |
14 | 29501 | 0 |
15 | 29513 | 0 |
16 | 29594 | 0 |
17 | 29610 | 0 |
18 | 29619 | 0 |
19 | 29637 | 0 |
20 | 29638 | 0 |
21 | 29644 | 0 |
22 | 29650 | 0 |
23 | 29673 | 0 |
24 | 29705 | 0 |
25 | 29824 | 0 |
26 | 29831 | 0 |
27 | 29836 | 0 |
28 | 29838 | 0 |
29 | 29843 | 0 |
... | ... | ... |
12148 | 16666 | 1076 |
12149 | 18811 | 1076 |
12150 | 21308 | 1086 |
12151 | 20541 | 1130 |
12152 | 25707 | 1143 |
12153 | 28014 | 1164 |
12154 | 20596 | 1173 |
12155 | 17515 | 1176 |
12156 | 26484 | 1177 |
12157 | 29755 | 1202 |
12158 | 15462 | 1215 |
12159 | 24843 | 1217 |
12160 | 25360 | 1250 |
12161 | 25945 | 1252 |
12162 | 26591 | 1254 |
12163 | 27892 | 1254 |
12164 | 17428 | 1258 |
12165 | 26970 | 1280 |
12166 | 28873 | 1290 |
12167 | 26975 | 1300 |
12168 | 17799 | 1308 |
12169 | 28653 | 1328 |
12170 | 20145 | 1336 |
12171 | 17928 | 1420 |
12172 | 15062 | 1441 |
12173 | 25505 | 1449 |
12174 | 21683 | 1496 |
12175 | 26502 | 1629 |
12176 | 21173 | 1712 |
12177 | 16811 | 1728 |
12178 rows × 2 columns
Q('''
SELECT gender, avg(tripduration)
FROM tripdata
Group by gender
''')
gender | avg(tripduration) | |
---|---|---|
0 | 0 | 1879.283770 |
1 | 1 | 906.374779 |
2 | 2 | 1039.558765 |
Q('''
SELECT bikeid, sum(tripduration) as total
FROM tripdata
group by bikeid
''')
bikeid | total | |
---|---|---|
0 | 14529 | 893894 |
1 | 14530 | 632416 |
2 | 14531 | 694481 |
3 | 14532 | 511588 |
4 | 14533 | 734408 |
5 | 14534 | 657774 |
6 | 14535 | 829025 |
7 | 14536 | 736906 |
8 | 14537 | 643763 |
9 | 14539 | 765586 |
10 | 14540 | 437563 |
11 | 14541 | 800923 |
12 | 14544 | 785139 |
13 | 14545 | 959844 |
14 | 14547 | 624715 |
15 | 14548 | 603874 |
16 | 14549 | 631430 |
17 | 14550 | 520644 |
18 | 14551 | 1608483 |
19 | 14552 | 608622 |
20 | 14553 | 757367 |
21 | 14555 | 638516 |
22 | 14556 | 575367 |
23 | 14558 | 549563 |
24 | 14559 | 796470 |
25 | 14561 | 661922 |
26 | 14562 | 512684 |
27 | 14563 | 588310 |
28 | 14565 | 533523 |
29 | 14566 | 738766 |
... | ... | ... |
12148 | 31941 | 304 |
12149 | 31942 | 11269 |
12150 | 31944 | 23005 |
12151 | 31947 | 11386 |
12152 | 31950 | 20951 |
12153 | 31951 | 3361 |
12154 | 31953 | 9665 |
12155 | 31955 | 19491 |
12156 | 31958 | 18938 |
12157 | 31959 | 14588 |
12158 | 31960 | 12513 |
12159 | 31961 | 4597 |
12160 | 31962 | 14790 |
12161 | 31963 | 16857 |
12162 | 31964 | 18890 |
12163 | 31965 | 4726 |
12164 | 31966 | 11343 |
12165 | 31967 | 7416 |
12166 | 31968 | 15542 |
12167 | 31969 | 9512 |
12168 | 31970 | 17205 |
12169 | 31971 | 15581 |
12170 | 31972 | 7674 |
12171 | 31973 | 7402 |
12172 | 31974 | 9893 |
12173 | 31975 | 18640 |
12174 | 31976 | 14094 |
12175 | 31977 | 13542 |
12176 | 31978 | 2474 |
12177 | 31979 | 18413 |
12178 rows × 2 columns
Q('''
select hour_bucket, count(*) from (
SELECT bikeid, sum(tripduration)/3600 as hours, round(sum(tripduration)/3600 /100)*100 hour_bucket
FROM tripdata
group by bikeid
)
group by hour_bucket
''')
hour_bucket | count(*) | |
---|---|---|
0 | 0.0 | 1725 |
1 | 100.0 | 4336 |
2 | 200.0 | 4686 |
3 | 300.0 | 935 |
4 | 400.0 | 221 |
5 | 500.0 | 102 |
6 | 600.0 | 65 |
7 | 700.0 | 30 |
8 | 800.0 | 29 |
9 | 900.0 | 14 |
10 | 1000.0 | 8 |
11 | 1100.0 | 6 |
12 | 1200.0 | 10 |
13 | 1300.0 | 4 |
14 | 1400.0 | 4 |
15 | 1600.0 | 1 |
16 | 1700.0 | 2 |
Q('''
SELECT starttime
from tripdata
limit 3
'''
)
starttime | |
---|---|
0 | 2017-05-01 00:00:13 |
1 | 2017-05-01 00:00:19 |
2 | 2017-05-01 00:00:19 |
Q('''
SELECT starttime,
substr(starttime,1,10)
from tripdata
limit 3
'''
)
starttime | substr(starttime,1,10) | |
---|---|---|
0 | 2017-05-01 00:00:13 | 2017-05-01 |
1 | 2017-05-01 00:00:19 | 2017-05-01 |
2 | 2017-05-01 00:00:19 | 2017-05-01 |
Q('''
SELECT starttime,
substr(starttime,1,10) as d,
substr(starttime,11,10) as t
from tripdata
limit 3
'''
)
starttime | d | t | |
---|---|---|---|
0 | 2017-05-01 00:00:13 | 2017-05-01 | 00:00:13 |
1 | 2017-05-01 00:00:19 | 2017-05-01 | 00:00:19 |
2 | 2017-05-01 00:00:19 | 2017-05-01 | 00:00:19 |
Q('''
SELECT starttime,
substr(starttime,1,10) as d,
substr(starttime,1,4) as y,
substr(starttime,6,2) as m,
substr(starttime,9,2) as d,
substr(starttime,11,10) as t
from tripdata
limit 3
'''
)
starttime | d | y | m | d | t | |
---|---|---|---|---|---|---|
0 | 2017-05-01 00:00:13 | 2017-05-01 | 2017 | 05 | 01 | 00:00:13 |
1 | 2017-05-01 00:00:19 | 2017-05-01 | 2017 | 05 | 01 | 00:00:19 |
2 | 2017-05-01 00:00:19 | 2017-05-01 | 2017 | 05 | 01 | 00:00:19 |
Q('SELECT bikeid, substr(starttime, 6 , 2) as m, sum(tripduration)/3600 as total from tripdata group by bikeid,m')
bikeid | m | total | |
---|---|---|---|
0 | 14529 | 05 | 34 |
1 | 14529 | 06 | 21 |
2 | 14529 | 07 | 128 |
3 | 14529 | 08 | 31 |
4 | 14529 | 09 | 31 |
5 | 14530 | 05 | 34 |
6 | 14530 | 06 | 39 |
7 | 14530 | 07 | 30 |
8 | 14530 | 08 | 31 |
9 | 14530 | 09 | 39 |
10 | 14531 | 05 | 35 |
11 | 14531 | 06 | 34 |
12 | 14531 | 07 | 45 |
13 | 14531 | 08 | 32 |
14 | 14531 | 09 | 45 |
15 | 14532 | 05 | 16 |
16 | 14532 | 06 | 40 |
17 | 14532 | 07 | 20 |
18 | 14532 | 08 | 33 |
19 | 14532 | 09 | 31 |
20 | 14533 | 05 | 12 |
21 | 14533 | 06 | 47 |
22 | 14533 | 07 | 48 |
23 | 14533 | 08 | 47 |
24 | 14533 | 09 | 49 |
25 | 14534 | 05 | 30 |
26 | 14534 | 06 | 51 |
27 | 14534 | 07 | 44 |
28 | 14534 | 08 | 12 |
29 | 14534 | 09 | 43 |
... | ... | ... | ... |
52849 | 31941 | 09 | 0 |
52850 | 31942 | 09 | 3 |
52851 | 31944 | 09 | 6 |
52852 | 31947 | 09 | 3 |
52853 | 31950 | 09 | 5 |
52854 | 31951 | 09 | 0 |
52855 | 31953 | 09 | 2 |
52856 | 31955 | 09 | 5 |
52857 | 31958 | 09 | 5 |
52858 | 31959 | 09 | 4 |
52859 | 31960 | 09 | 3 |
52860 | 31961 | 09 | 1 |
52861 | 31962 | 09 | 4 |
52862 | 31963 | 09 | 4 |
52863 | 31964 | 09 | 5 |
52864 | 31965 | 09 | 1 |
52865 | 31966 | 09 | 3 |
52866 | 31967 | 09 | 2 |
52867 | 31968 | 09 | 4 |
52868 | 31969 | 09 | 2 |
52869 | 31970 | 09 | 4 |
52870 | 31971 | 09 | 4 |
52871 | 31972 | 09 | 2 |
52872 | 31973 | 09 | 2 |
52873 | 31974 | 09 | 2 |
52874 | 31975 | 09 | 5 |
52875 | 31976 | 09 | 3 |
52876 | 31977 | 09 | 3 |
52877 | 31978 | 09 | 0 |
52878 | 31979 | 09 | 5 |
52879 rows × 3 columns
Q('''
SELECT
bikeid,
sum(case when m = '05' then total end)as may,
sum(case when m = '06' then total end)as jun,
sum(case when m = '07' then total end)as jul,
sum(case when m = '08' then total end)as aug,
sum(case when m = '09' then total end)as sep
FROM
(
SELECT
bikeid, substr(starttime, 6 , 2) as m,
sum(tripduration)/3600 as total
from
tripdata
group by
bikeid,
m
)
group by bikeid
''')
bikeid | may | jun | jul | aug | sep | |
---|---|---|---|---|---|---|
0 | 14529 | 34.0 | 21.0 | 128.0 | 31.0 | 31.0 |
1 | 14530 | 34.0 | 39.0 | 30.0 | 31.0 | 39.0 |
2 | 14531 | 35.0 | 34.0 | 45.0 | 32.0 | 45.0 |
3 | 14532 | 16.0 | 40.0 | 20.0 | 33.0 | 31.0 |
4 | 14533 | 12.0 | 47.0 | 48.0 | 47.0 | 49.0 |
5 | 14534 | 30.0 | 51.0 | 44.0 | 12.0 | 43.0 |
6 | 14535 | 42.0 | 57.0 | 43.0 | 43.0 | 42.0 |
7 | 14536 | 21.0 | 68.0 | 53.0 | 35.0 | 25.0 |
8 | 14537 | 19.0 | 40.0 | 42.0 | 38.0 | 38.0 |
9 | 14539 | 38.0 | 35.0 | 54.0 | 51.0 | 32.0 |
10 | 14540 | 20.0 | NaN | 12.0 | 50.0 | 37.0 |
11 | 14541 | 38.0 | 44.0 | 40.0 | 43.0 | 55.0 |
12 | 14544 | 49.0 | 59.0 | 49.0 | 27.0 | 31.0 |
13 | 14545 | 70.0 | 50.0 | 40.0 | 56.0 | 48.0 |
14 | 14547 | 30.0 | 4.0 | 33.0 | 57.0 | 48.0 |
15 | 14548 | 34.0 | 56.0 | 23.0 | 13.0 | 39.0 |
16 | 14549 | 21.0 | 27.0 | 56.0 | 28.0 | 40.0 |
17 | 14550 | 41.0 | 23.0 | 36.0 | 14.0 | 29.0 |
18 | 14551 | 22.0 | 40.0 | 311.0 | 31.0 | 40.0 |
19 | 14552 | 34.0 | 23.0 | 32.0 | 40.0 | 37.0 |
20 | 14553 | 33.0 | 37.0 | 44.0 | 52.0 | 41.0 |
21 | 14555 | 52.0 | 49.0 | 47.0 | 6.0 | 21.0 |
22 | 14556 | 13.0 | 24.0 | 56.0 | 36.0 | 29.0 |
23 | 14558 | 20.0 | 46.0 | 11.0 | 33.0 | 41.0 |
24 | 14559 | 43.0 | 48.0 | 54.0 | 40.0 | 33.0 |
25 | 14561 | 53.0 | 28.0 | 38.0 | 44.0 | 18.0 |
26 | 14562 | 21.0 | 37.0 | 40.0 | 29.0 | 13.0 |
27 | 14563 | 39.0 | 26.0 | 18.0 | 25.0 | 53.0 |
28 | 14565 | 22.0 | 27.0 | 22.0 | 35.0 | 40.0 |
29 | 14566 | 30.0 | 47.0 | 44.0 | 45.0 | 37.0 |
... | ... | ... | ... | ... | ... | ... |
12148 | 31941 | NaN | NaN | NaN | NaN | 0.0 |
12149 | 31942 | NaN | NaN | NaN | NaN | 3.0 |
12150 | 31944 | NaN | NaN | NaN | NaN | 6.0 |
12151 | 31947 | NaN | NaN | NaN | NaN | 3.0 |
12152 | 31950 | NaN | NaN | NaN | NaN | 5.0 |
12153 | 31951 | NaN | NaN | NaN | NaN | 0.0 |
12154 | 31953 | NaN | NaN | NaN | NaN | 2.0 |
12155 | 31955 | NaN | NaN | NaN | NaN | 5.0 |
12156 | 31958 | NaN | NaN | NaN | NaN | 5.0 |
12157 | 31959 | NaN | NaN | NaN | NaN | 4.0 |
12158 | 31960 | NaN | NaN | NaN | NaN | 3.0 |
12159 | 31961 | NaN | NaN | NaN | NaN | 1.0 |
12160 | 31962 | NaN | NaN | NaN | NaN | 4.0 |
12161 | 31963 | NaN | NaN | NaN | NaN | 4.0 |
12162 | 31964 | NaN | NaN | NaN | NaN | 5.0 |
12163 | 31965 | NaN | NaN | NaN | NaN | 1.0 |
12164 | 31966 | NaN | NaN | NaN | NaN | 3.0 |
12165 | 31967 | NaN | NaN | NaN | NaN | 2.0 |
12166 | 31968 | NaN | NaN | NaN | NaN | 4.0 |
12167 | 31969 | NaN | NaN | NaN | NaN | 2.0 |
12168 | 31970 | NaN | NaN | NaN | NaN | 4.0 |
12169 | 31971 | NaN | NaN | NaN | NaN | 4.0 |
12170 | 31972 | NaN | NaN | NaN | NaN | 2.0 |
12171 | 31973 | NaN | NaN | NaN | NaN | 2.0 |
12172 | 31974 | NaN | NaN | NaN | NaN | 2.0 |
12173 | 31975 | NaN | NaN | NaN | NaN | 5.0 |
12174 | 31976 | NaN | NaN | NaN | NaN | 3.0 |
12175 | 31977 | NaN | NaN | NaN | NaN | 3.0 |
12176 | 31978 | NaN | NaN | NaN | NaN | 0.0 |
12177 | 31979 | NaN | NaN | NaN | NaN | 5.0 |
12178 rows × 6 columns
Q('''
select
start_station_name,
sum(case when gender = 1 then 1 else 0 end)as male,
sum(case when gender = 2 then 2 else 0 end)as female
from tripdata
group by 1
''')
start_station_name | male | female | |
---|---|---|---|
0 | 1 Ave & E 110 St | 193 | 200 |
1 | 1 Ave & E 16 St | 22764 | 17910 |
2 | 1 Ave & E 18 St | 16518 | 12152 |
3 | 1 Ave & E 30 St | 15321 | 10304 |
4 | 1 Ave & E 44 St | 8654 | 6836 |
5 | 1 Ave & E 62 St | 13905 | 9716 |
6 | 1 Ave & E 68 St | 19418 | 14474 |
7 | 1 Ave & E 78 St | 12751 | 9924 |
8 | 1 Ave & E 94 St | 4070 | 3406 |
9 | 1 Pl & Clinton St | 2385 | 2538 |
10 | 10 Hudson Yards | 1569 | 1040 |
11 | 10 St & 5 Ave | 2088 | 1952 |
12 | 10 St & 7 Ave | 3064 | 2856 |
13 | 11 Ave & W 27 St | 20088 | 14972 |
14 | 11 Ave & W 41 St | 17370 | 9942 |
15 | 11 Ave & W 59 St | 16723 | 11244 |
16 | 11 St & 35 Ave | 7 | 8 |
17 | 12 Ave & W 40 St | 26897 | 23924 |
18 | 12 St & 4 Ave | 1490 | 1090 |
19 | 14 St & 5 Ave | 2916 | 3016 |
20 | 14 St & 7 Ave | 3351 | 2734 |
21 | 2 Ave & E 104 St | 3003 | 1946 |
22 | 2 Ave & 36 St - Citi Bike HQ at Industry City | 993 | 930 |
23 | 2 Ave & 9 St | 1344 | 806 |
24 | 2 Ave & E 122 St | 51 | 26 |
25 | 2 Ave & E 31 St | 14612 | 9932 |
26 | 2 Ave & E 96 St | 6948 | 5100 |
27 | 2 Ave & E 99 St | 1904 | 1092 |
28 | 21 St & 31 Dr | 3 | 4 |
29 | 21 St & 36 Ave | 10 | 4 |
... | ... | ... | ... |
680 | W 90 St & Amsterdam Ave | 3919 | 3398 |
681 | W 92 St & Broadway | 5987 | 5006 |
682 | W 95 St & Broadway | 7164 | 5982 |
683 | W Broadway & Spring St | 11765 | 10224 |
684 | W Broadway & Spring Street | 3381 | 3032 |
685 | Warren St & Church St | 9341 | 6404 |
686 | Warren St & Court St | 4011 | 3854 |
687 | Washington Ave & Greene Ave | 2932 | 2790 |
688 | Washington Ave & Park Ave | 4085 | 3430 |
689 | Washington Park | 3616 | 3730 |
690 | Washington Pl & 6 Ave | 10689 | 7972 |
691 | Washington Pl & Broadway | 13054 | 8016 |
692 | Washington St & Gansevoort St | 16815 | 15268 |
693 | Water - Whitehall Plaza | 6126 | 4066 |
694 | Watts St & Greenwich St | 13572 | 9586 |
695 | West Drive & Prospect Park West | 5570 | 6770 |
696 | West End Ave & W 107 St | 3090 | 3104 |
697 | West End Ave & W 94 St | 5133 | 5016 |
698 | West St & Chambers St | 34146 | 35194 |
699 | West Thames St | 13502 | 12856 |
700 | William St & Pine St | 7941 | 5060 |
701 | Willoughby Ave & Hall St | 4714 | 5074 |
702 | Willoughby Ave & Tompkins Ave | 1423 | 1582 |
703 | Willoughby Ave & Walworth St | 1854 | 1234 |
704 | Willoughby St & Fleet St | 7210 | 5786 |
705 | Wolcott St & Dwight St | 807 | 760 |
706 | Wyckoff St & 3 Ave | 2285 | 2512 |
707 | Wythe Ave & Metropolitan Ave | 10631 | 10188 |
708 | Yankee Ferry Terminal | 2203 | 2818 |
709 | York St & Jay St | 9932 | 9494 |
710 rows × 3 columns
Q('''
select
start_station_name,
100.0 * male /(male+female) as pct_male
from
(
select
start_station_name,
sum(case when gender = 1 then 1 else 0 end)as male,
sum(case when gender = 2 then 2 else 0 end)as female
from tripdata
group by 1
)
''')
start_station_name | pct_male | |
---|---|---|
0 | 1 Ave & E 110 St | 49.109415 |
1 | 1 Ave & E 16 St | 55.966957 |
2 | 1 Ave & E 18 St | 57.614231 |
3 | 1 Ave & E 30 St | 59.789268 |
4 | 1 Ave & E 44 St | 55.868302 |
5 | 1 Ave & E 62 St | 58.867110 |
6 | 1 Ave & E 68 St | 57.293757 |
7 | 1 Ave & E 78 St | 56.233738 |
8 | 1 Ave & E 94 St | 54.440877 |
9 | 1 Pl & Clinton St | 48.446069 |
10 | 10 Hudson Yards | 60.137984 |
11 | 10 St & 5 Ave | 51.683168 |
12 | 10 St & 7 Ave | 51.756757 |
13 | 11 Ave & W 27 St | 57.296064 |
14 | 11 Ave & W 41 St | 63.598418 |
15 | 11 Ave & W 59 St | 59.795473 |
16 | 11 St & 35 Ave | 46.666667 |
17 | 12 Ave & W 40 St | 52.924972 |
18 | 12 St & 4 Ave | 57.751938 |
19 | 14 St & 5 Ave | 49.157114 |
20 | 14 St & 7 Ave | 55.069844 |
21 | 2 Ave & E 104 St | 60.678925 |
22 | 2 Ave & 36 St - Citi Bike HQ at Industry City | 51.638066 |
23 | 2 Ave & 9 St | 62.511628 |
24 | 2 Ave & E 122 St | 66.233766 |
25 | 2 Ave & E 31 St | 59.533898 |
26 | 2 Ave & E 96 St | 57.669323 |
27 | 2 Ave & E 99 St | 63.551402 |
28 | 21 St & 31 Dr | 42.857143 |
29 | 21 St & 36 Ave | 71.428571 |
... | ... | ... |
680 | W 90 St & Amsterdam Ave | 53.560202 |
681 | W 92 St & Broadway | 54.461930 |
682 | W 95 St & Broadway | 54.495664 |
683 | W Broadway & Spring St | 53.504025 |
684 | W Broadway & Spring Street | 52.721035 |
685 | Warren St & Church St | 59.326770 |
686 | Warren St & Court St | 50.998093 |
687 | Washington Ave & Greene Ave | 51.240825 |
688 | Washington Ave & Park Ave | 54.357951 |
689 | Washington Park | 49.224068 |
690 | Washington Pl & 6 Ave | 57.279889 |
691 | Washington Pl & Broadway | 61.955387 |
692 | Washington St & Gansevoort St | 52.410934 |
693 | Water - Whitehall Plaza | 60.105965 |
694 | Watts St & Greenwich St | 58.606097 |
695 | West Drive & Prospect Park West | 45.137763 |
696 | West End Ave & W 107 St | 49.886987 |
697 | West End Ave & W 94 St | 50.576411 |
698 | West St & Chambers St | 49.244303 |
699 | West Thames St | 51.225434 |
700 | William St & Pine St | 61.079917 |
701 | Willoughby Ave & Hall St | 48.161013 |
702 | Willoughby Ave & Tompkins Ave | 47.354409 |
703 | Willoughby Ave & Walworth St | 60.038860 |
704 | Willoughby St & Fleet St | 55.478609 |
705 | Wolcott St & Dwight St | 51.499681 |
706 | Wyckoff St & 3 Ave | 47.633938 |
707 | Wythe Ave & Metropolitan Ave | 51.063932 |
708 | Yankee Ferry Terminal | 43.875722 |
709 | York St & Jay St | 51.127355 |
710 rows × 2 columns
Q('''
select
start_station_name,
round((2020 - birth_year)/5)*5 as age,
substr(starttime,11,2) as h,
sum(case when gender = 1 then 1 else 0 end)as male,
sum(case when gender = 2 then 2 else 0 end)as female
from tripdata
group by 1,2,3
''')
start_station_name | age | h | male | female | |
---|---|---|---|---|---|
0 | 1 Ave & E 110 St | NaN | 0 | 0 | 0 |
1 | 1 Ave & E 110 St | NaN | 1 | 0 | 0 |
2 | 1 Ave & E 110 St | NaN | 2 | 0 | 0 |
3 | 1 Ave & E 110 St | 25.0 | 0 | 6 | 2 |
4 | 1 Ave & E 110 St | 25.0 | 1 | 6 | 6 |
5 | 1 Ave & E 110 St | 25.0 | 2 | 6 | 0 |
6 | 1 Ave & E 110 St | 30.0 | 0 | 2 | 6 |
7 | 1 Ave & E 110 St | 30.0 | 1 | 10 | 40 |
8 | 1 Ave & E 110 St | 30.0 | 2 | 4 | 2 |
9 | 1 Ave & E 110 St | 35.0 | 0 | 22 | 6 |
10 | 1 Ave & E 110 St | 35.0 | 1 | 24 | 28 |
11 | 1 Ave & E 110 St | 35.0 | 2 | 3 | 0 |
12 | 1 Ave & E 110 St | 40.0 | 0 | 28 | 0 |
13 | 1 Ave & E 110 St | 40.0 | 1 | 14 | 36 |
14 | 1 Ave & E 110 St | 40.0 | 2 | 8 | 2 |
15 | 1 Ave & E 110 St | 45.0 | 0 | 13 | 6 |
16 | 1 Ave & E 110 St | 45.0 | 1 | 17 | 8 |
17 | 1 Ave & E 110 St | 45.0 | 2 | 3 | 0 |
18 | 1 Ave & E 110 St | 50.0 | 0 | 2 | 0 |
19 | 1 Ave & E 110 St | 50.0 | 1 | 2 | 0 |
20 | 1 Ave & E 110 St | 50.0 | 2 | 0 | 2 |
21 | 1 Ave & E 110 St | 55.0 | 0 | 1 | 32 |
22 | 1 Ave & E 110 St | 55.0 | 1 | 9 | 16 |
23 | 1 Ave & E 110 St | 60.0 | 1 | 6 | 2 |
24 | 1 Ave & E 110 St | 65.0 | 0 | 0 | 4 |
25 | 1 Ave & E 110 St | 65.0 | 1 | 5 | 0 |
26 | 1 Ave & E 110 St | 65.0 | 2 | 1 | 0 |
27 | 1 Ave & E 110 St | 75.0 | 0 | 0 | 2 |
28 | 1 Ave & E 110 St | 75.0 | 1 | 1 | 0 |
29 | 1 Ave & E 16 St | NaN | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... |
27659 | York St & Jay St | 30.0 | 1 | 1388 | 1592 |
27660 | York St & Jay St | 30.0 | 2 | 254 | 228 |
27661 | York St & Jay St | 35.0 | 0 | 389 | 322 |
27662 | York St & Jay St | 35.0 | 1 | 1755 | 1970 |
27663 | York St & Jay St | 35.0 | 2 | 244 | 244 |
27664 | York St & Jay St | 40.0 | 0 | 294 | 252 |
27665 | York St & Jay St | 40.0 | 1 | 1219 | 1190 |
27666 | York St & Jay St | 40.0 | 2 | 199 | 126 |
27667 | York St & Jay St | 45.0 | 0 | 220 | 192 |
27668 | York St & Jay St | 45.0 | 1 | 805 | 552 |
27669 | York St & Jay St | 45.0 | 2 | 119 | 72 |
27670 | York St & Jay St | 50.0 | 0 | 105 | 162 |
27671 | York St & Jay St | 50.0 | 1 | 846 | 342 |
27672 | York St & Jay St | 50.0 | 2 | 115 | 36 |
27673 | York St & Jay St | 55.0 | 0 | 94 | 60 |
27674 | York St & Jay St | 55.0 | 1 | 437 | 254 |
27675 | York St & Jay St | 55.0 | 2 | 66 | 56 |
27676 | York St & Jay St | 60.0 | 0 | 21 | 36 |
27677 | York St & Jay St | 60.0 | 1 | 176 | 262 |
27678 | York St & Jay St | 60.0 | 2 | 32 | 20 |
27679 | York St & Jay St | 65.0 | 0 | 32 | 22 |
27680 | York St & Jay St | 65.0 | 1 | 99 | 114 |
27681 | York St & Jay St | 65.0 | 2 | 11 | 10 |
27682 | York St & Jay St | 70.0 | 0 | 6 | 6 |
27683 | York St & Jay St | 70.0 | 1 | 56 | 46 |
27684 | York St & Jay St | 70.0 | 2 | 4 | 6 |
27685 | York St & Jay St | 75.0 | 0 | 3 | 0 |
27686 | York St & Jay St | 75.0 | 1 | 4 | 4 |
27687 | York St & Jay St | 80.0 | 1 | 4 | 0 |
27688 | York St & Jay St | 80.0 | 2 | 2 | 0 |
27689 rows × 5 columns
res = Q('select count(*) from tripdata')
res
count(*) | |
---|---|
0 | 8685057 |
Creating a clean dataset
db.execute('''
CREATE TABLE
tripdata_clean as
SELECT
(2020- birth_year) as age,
CASE WHEN gender = 0 then 'X'
WHEN gender = 1 then 'Male'
WHEN gender = 2 then 'Female' end as sex,
*
FROM tripdata
WHERE age > 0
AND age < 80
AND tripduration <6000
''')
<sqlite3.Cursor at 0x29822a0af10>
Q('select count(*) from tripdata_clean')
count(*) | |
---|---|
0 | 7630014 |
Plotting:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
import seaborn as sb
res = Q('''
select age, count(*) as c from tripdata_clean group by age
''')
hist(res.age, weights=res.c, bins= len(res) )
(array([ 3091., 13052., 22426., 34788., 43686., 60885., 82500.,
144783., 224798., 275293., 303208., 328422., 333107., 335369.,
321919., 319496., 314728., 289934., 272027., 253770., 234885.,
213225., 195550., 178354., 166852., 164220., 151432., 153265.,
140317., 137885., 145172., 153601., 142248., 126760., 116005.,
109145., 112818., 107669., 102578., 100664., 86778., 87151.,
76582., 68865., 63716., 53088., 44981., 43182., 38850.,
26466., 23917., 17193., 14196., 12097., 12677., 9279.,
5376., 4472., 3260., 4688., 3273.]),
array([19. , 19.98360656, 20.96721311, 21.95081967, 22.93442623,
23.91803279, 24.90163934, 25.8852459 , 26.86885246, 27.85245902,
28.83606557, 29.81967213, 30.80327869, 31.78688525, 32.7704918 ,
33.75409836, 34.73770492, 35.72131148, 36.70491803, 37.68852459,
38.67213115, 39.6557377 , 40.63934426, 41.62295082, 42.60655738,
43.59016393, 44.57377049, 45.55737705, 46.54098361, 47.52459016,
48.50819672, 49.49180328, 50.47540984, 51.45901639, 52.44262295,
53.42622951, 54.40983607, 55.39344262, 56.37704918, 57.36065574,
58.3442623 , 59.32786885, 60.31147541, 61.29508197, 62.27868852,
63.26229508, 64.24590164, 65.2295082 , 66.21311475, 67.19672131,
68.18032787, 69.16393443, 70.14754098, 71.13114754, 72.1147541 ,
73.09836066, 74.08196721, 75.06557377, 76.04918033, 77.03278689,
78.01639344, 79. ]),
<a list of 61 Patch objects>)
res = Q('''
select
age,
sum(case when sex = 'Female' then 1 end) as 'Female',
sum(case when sex = 'Male' then 1 end) as 'Male'
from
tripdata_clean
group by 1
''')
res.head()
age | Female | Male | |
---|---|---|---|
0 | 19.0 | 488 | 2520 |
1 | 20.0 | 2245 | 10693 |
2 | 21.0 | 5102 | 17286 |
3 | 22.0 | 7509 | 26574 |
4 | 23.0 | 11465 | 32083 |
figure(figsize = (5,12))
barh(res.age, res.Female/res.Female.sum(), color ='r', label = 'Female')
barh(res.age,-res.Male/res.Male.sum(), color='b', label = 'Male')
title('age distribution')
ylabel('age')
grid()
legend()
<matplotlib.legend.Legend at 0x29800abec48>
res = Q('''
select
age,
sum(case when usertype = 'Subscriber' then 1 end ) as Subscriber,
sum(case when usertype != 'Subscriber' then 1 end ) as Customer
from
tripdata_clean
group by
1
''')
res.head()
age | Subscriber | Customer | |
---|---|---|---|
0 | 19.0 | 3089 | 2 |
1 | 20.0 | 13051 | 1 |
2 | 21.0 | 20178 | 2248 |
3 | 22.0 | 30122 | 4666 |
4 | 23.0 | 39579 | 4107 |
figure(figsize = (5,12))
barh(res.age, res.Subscriber/res.Subscriber.sum(), color ='y', label = 'Subscriber')
barh(res.age,-res.Customer/res.Customer.sum(), color='g', label = 'Customer')
title('age distribution')
ylabel('age')
grid()
legend()
<matplotlib.legend.Legend at 0x2980f0ca8c8>
res = Q('''
select
tripduration / 60 as 'duration',
sum(case when sex = 'Female' then 1 end) as 'Female',
sum(case when sex = 'Male' then 1 end) as 'Male'
from
tripdata_clean
group by
1
''')
res.head()
duration | Female | Male | |
---|---|---|---|
0 | 1 | 17006 | 85222 |
1 | 2 | 45796 | 213363 |
2 | 3 | 78325 | 325656 |
3 | 4 | 104389 | 393120 |
4 | 5 | 120663 | 414420 |
figure(figsize = (5,12))
barh(res.duration, res.Female/res.Female.sum(), color ='r', label = 'Female')
barh(res.duration,-res.Male/res.Male.sum(), color='b', label = 'Male')
title('duration distribution')
ylabel('duration[in minutes]')
grid()
legend()
ylim(0,50)
(0, 50)
res = Q('''
select
start_station_name,
start_station_latitude as lat,
start_station_longitude as long,
count(*) as c
from
tripdata_clean
group by
1,2,3
''')
res.head()
start_station_name | lat | long | c | |
---|---|---|---|---|
0 | 1 Ave & E 110 St | 40.792327 | -73.938300 | 291 |
1 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 31789 |
2 | 1 Ave & E 18 St | 40.733812 | -73.980544 | 22571 |
3 | 1 Ave & E 30 St | 40.741444 | -73.975361 | 20550 |
4 | 1 Ave & E 44 St | 40.750020 | -73.969053 | 12099 |
hist(res.lat, range=(40.6, 40.9), bins= 100);
hist(res.long, bins= 100, range = (-74.1,-73.8));
plot(res.long, res.lat, '.')
xlim(-80,-70)
ylim(40,43)
(40, 43)
plot(res.long, res.lat, '.')
xlim(-74.5,-73.5)
ylim(40.5,41)
(40.5, 41)
plot(res.long, res.lat, '.')
xlim(-74.1,-73.8)
ylim(40.6,40.85)
(40.6, 40.85)
figure(figsize =(10,10))
xstart=-74.1
ystart=40.6
extent=.25
plot(res.long, res.lat, '.')
xlim(xstart,xstart+extent)
ylim(ystart,ystart+extent)
(40.6, 40.85)
figure(figsize =(10,10))
xstart=-74.05
ystart=40.645
extent=.18
plot(res.long, res.lat, 'o', ms=5)
xlim(xstart,xstart+extent)
ylim(ystart,ystart+extent)
(40.645, 40.825)
figure(figsize =(10,10))
xstart=-74.05
ystart=40.645
extent=.18
scatter(res.long, res.lat, 35*res.c/res.c.max())
xlim(xstart,xstart+extent)
ylim(ystart,ystart+extent)
(40.645, 40.825)
figure(figsize =(10,10))
xstart=-74.05
ystart=40.645
extent=.18
scatter(res.long, res.lat, c= 35*res.c/res.c.max(), cmap = 'rainbow')
xlim(xstart,xstart+extent)
ylim(ystart,ystart+extent)
colorbar()
<matplotlib.colorbar.Colorbar at 0x298307f1b88>
def my_plot(x,y,s,c,max_size=50):
figure(figsize =(10,10))
xstart=-74.05
ystart=40.645
extent=.18
scatter(x, y, s= max_size*s/max(s), c=c, cmap = 'rainbow')
xlim(xstart,xstart+extent)
ylim(ystart,ystart+extent)
colorbar()
my_plot(res.long , res.lat,res.c, res.c)
res = Q('''
select
start_station_name,
start_station_latitude as lat,
start_station_longitude as long,
avg(tripduration) as d,
sum(case when sex = 'Female' then 1 end) as 'Female',
sum(case when sex = 'Male' then 1 end) as 'Male'
from
tripdata_clean
group by
1,2,3
''')
res.head()
start_station_name | lat | long | d | Female | Male | |
---|---|---|---|---|---|---|
0 | 1 Ave & E 110 St | 40.792327 | -73.938300 | 868.518900 | 99.0 | 192 |
1 | 1 Ave & E 16 St | 40.732219 | -73.981656 | 665.284658 | 8934.0 | 22702 |
2 | 1 Ave & E 18 St | 40.733812 | -73.980544 | 678.556112 | 6063.0 | 16447 |
3 | 1 Ave & E 30 St | 40.741444 | -73.975361 | 734.345791 | 5146.0 | 15300 |
4 | 1 Ave & E 44 St | 40.750020 | -73.969053 | 857.510621 | 3412.0 | 8644 |
my_plot(res.long, res.lat, res.d,100*res.Male/(res.Male+res.Female))
res = Q('''
select
start_station_name,
start_station_latitude as lat,
start_station_longitude as long,
avg(case when sex = 'Female' then age end) as 'Female',
avg(case when sex = 'Male' then age end) as 'Male'
from
tripdata_clean
group by
1,2,3
''')
my_plot(res.long, res.lat, res.Female*0+1, res.Female)