top of page

CODE

---
title: "DP Final"
output: html_notebook
---

```{r}
library(english)
library(readr)
library(dplyr)
library(tidyverse)
library(magrittr)
library(leaflet) 

set.seed(1998)
```


```{r}
setwd("~/Desktop/College/Senior/OIDD 245/DP 2/Data/")

halloween = NULL

for(year in (2009:2018)) {
  if(year == 2010) next
  as.data.frame(assign(paste(as.english(year-2000), "halloween", sep = "_"), read_csv(paste(toString(year), "_halloween", ".csv", sep=''))))
  
}

```

```{r}
# NYU Coordinates
# 6th Ave, 8th St: 40.733852, -73.999583
# 6th Ave, W Houston St: 40.727922, -74.003160
# Broadway, W Houston St: 40.725564, -73.996783
# Broadway, 8th St: 40.725306, -73.996712

NYU_TL_lat = 40.733852
NYU_TL_lon = -73.999583

NYU_BL_lat = 40.727922
NYU_BL_lon = -74.003160

NYU_BR_lat = 40.725564
NYU_BR_lon = -73.996783

NYU_TR_lat = 40.725306
NYU_TR_lon = -73.996712

range_NYU_min_lat = min(NYU_TR_lat, NYU_BR_lat, NYU_BL_lat,NYU_TL_lat)
range_NYU_max_lat = max(NYU_TR_lat, NYU_BR_lat, NYU_BL_lat,NYU_TL_lat)

range_NYU_min_lon = min(NYU_TR_lon, NYU_BR_lon, NYU_BL_lon, NYU_TL_lon)
range_NYU_max_lon = max(NYU_TR_lon, NYU_BR_lon, NYU_BL_lon, NYU_TL_lon)

# Columbia Coordinates
# Riverside Dr, 122nd St: 40.812765, -73.963072
# Morningside Dr, 122nd St: 40.810018, -73.956806
# Morningside Dr, 110th St: 40.801566, -73.961005
# Riverside Dr, 110th St: 40.804859, -73.968842

Col_TL_lat = 40.812765
Col_TL_lon = -73.963072

Col_BL_lat = 40.804859
Col_BL_lon = -73.968842

Col_BR_lat = 40.801566
Col_BR_lon = -73.961005

Col_TR_lat = 40.810018
Col_TR_lon = -73.956806

range_Col_min_lat = min(Col_TR_lat, Col_BR_lat, Col_BL_lat,Col_TL_lat)
range_Col_max_lat = max(Col_TR_lat, Col_BR_lat, Col_BL_lat,Col_TL_lat)

range_Col_min_lon = min(Col_TR_lon, Col_BR_lon, Col_BL_lon, Col_TL_lon)
range_Col_max_lon = max(Col_TR_lon, Col_BR_lon, Col_BL_lon, Col_TL_lon)

```


```{r}
nine_NYU = filter(nine_halloween, between(pickup_latitude, range_NYU_min_lat, range_NYU_max_lat)) %>%
  filter(between(pickup_longitude, range_NYU_min_lon, range_NYU_max_lon)) %>%
  filter(pickup_latitude != dropoff_latitude & pickup_longitude != dropoff_longitude) %>%
  cbind(reference = toString("2009 NYU"))
  #filter(passenger_count>1)

nine_NYU


nine_Col = filter(nine_halloween, between(pickup_latitude, range_Col_min_lat, range_Col_max_lat)) %>%
  filter(between(pickup_longitude, range_Col_min_lon, range_Col_max_lon)) %>%
  filter(pickup_latitude != dropoff_latitude & pickup_longitude != dropoff_longitude) %>%
  cbind(reference = toString("2009 Col"))
  #filter(passenger_count>1)

nine_Col

twelve_NYU = filter(twelve_halloween, between(pickup_latitude, range_NYU_min_lat, range_NYU_max_lat)) %>%
  filter(between(pickup_longitude, range_NYU_min_lon, range_NYU_max_lon)) %>%
  filter(pickup_latitude != dropoff_latitude & pickup_longitude != dropoff_longitude) %>%
  cbind(reference = toString("2012 NYU"))
  #filter(passenger_count>1)

twelve_NYU

twelve_Col = filter(twelve_halloween, between(pickup_latitude, range_Col_min_lat, range_Col_max_lat)) %>%
  filter(between(pickup_longitude, range_Col_min_lon, range_Col_max_lon)) %>%
  filter(pickup_latitude != dropoff_latitude & pickup_longitude != dropoff_longitude) %>%
  cbind(reference = toString("2012 Col"))
  #filter(passenger_count>1)

twelve_Col

fifteen_NYU = filter(fifteen_halloween, between(pickup_latitude, range_NYU_min_lat, range_NYU_max_lat)) %>%
  filter(between(pickup_longitude, range_NYU_min_lon, range_NYU_max_lon)) %>%
  filter(pickup_latitude != dropoff_latitude & pickup_longitude != dropoff_longitude) %>%
  cbind(reference = toString("2015 NYU"))
  #filter(passenger_count>1)

fifteen_NYU

fifteen_Col = filter(fifteen_halloween, between(pickup_latitude, range_Col_min_lat, range_Col_max_lat)) %>%
  filter(between(pickup_longitude, range_Col_min_lon, range_Col_max_lon)) %>%
  filter(pickup_latitude != dropoff_latitude & pickup_longitude != dropoff_longitude) %>%
  cbind(reference = toString("2015 Col"))
  #filter(passenger_count>1)

fifteen_Col

eighteen_NYU = filter(eighteen_halloween, PULocationID == 114) %>%
  cbind(reference = toString("2018 NYU"))
  #filter(passenger_count>1)

eighteen_NYU

eighteen_Col = filter(eighteen_halloween, PULocationID == 166) %>%
  cbind(reference = toString("2018 Col"))
  #filter(passenger_count>1)

eighteen_Col
```

```{r}
k_max = 25

check_df = nine_NYU[,c(6,7)]

wss = sapply(1:k_max,function(k) {
  kmeans(check_df, k, nstart=50, iter.max = 15)$tot.withinss})
wss

plot(1:k_max, wss,
     type="b", pch = 19, frame = FALSE, 
     xlab="Number of Clusters K",
     ylab="Total Within-Clusters Sum of Squares")

```

```{r}
# Create Clusters
number_clusters = 100

nine_NYU_cluster =  nine_NYU[,c(6,7)]
nine_NYU_cluster_values = kmeans(nine_NYU_cluster, number_clusters)
nine_NYU_cluster = cbind(nine_NYU, cluster = nine_NYU_cluster_values$cluster)
nine_NYU_cluster_group = group_by(nine_NYU_cluster, cluster) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

nine_NYU_cluster_group

nine_Col_cluster =  nine_Col[,c(6,7)]
nine_Col_cluster_values = kmeans(nine_Col_cluster, number_clusters)
nine_Col_cluster = cbind(nine_Col, cluster = nine_Col_cluster_values$cluster)
nine_Col_cluster_group = group_by(nine_Col_cluster, cluster) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

nine_Col_cluster_group

twelve_NYU_cluster =  twelve_NYU[,c(6,7)]
twelve_NYU_cluster_values = kmeans(twelve_NYU_cluster, number_clusters)
twelve_NYU_cluster = cbind(twelve_NYU, cluster = twelve_NYU_cluster_values$cluster)
twelve_NYU_cluster_group = group_by(twelve_NYU_cluster, cluster) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

twelve_NYU_cluster_group

twelve_Col_cluster =  twelve_Col[,c(6,7)]
twelve_Col_cluster_values = kmeans(twelve_Col_cluster, number_clusters)
twelve_Col_cluster = cbind(twelve_Col, cluster = twelve_Col_cluster_values$cluster)
twelve_Col_cluster_group = group_by(twelve_Col_cluster, cluster) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

twelve_Col_cluster_group

fifteen_NYU_cluster =  fifteen_NYU[,c(3,4)]
fifteen_NYU_cluster_values = kmeans(fifteen_NYU_cluster, number_clusters)
fifteen_NYU_cluster = cbind(fifteen_NYU, cluster = fifteen_NYU_cluster_values$cluster)
fifteen_NYU_cluster_group = group_by(fifteen_NYU_cluster, cluster) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

fifteen_NYU_cluster_group

fifteen_Col_cluster =  fifteen_Col[,c(3,4)]
fifteen_Col_cluster_values = kmeans(fifteen_Col_cluster, number_clusters)
fifteen_Col_cluster = cbind(fifteen_Col, cluster = fifteen_Col_cluster_values$cluster)
fifteen_Col_cluster_group = group_by(fifteen_Col_cluster, cluster) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

fifteen_Col_cluster_group

eighten_NYU_cluster_group = group_by(eighteen_NYU, DOLocationID) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

eighten_NYU_cluster_group

eighten_Col_cluster_group = group_by(eighteen_Col, DOLocationID) %>%
  summarise(Freq = n()) %>%
  arrange(desc(Freq))

eighten_Col_cluster_group

```

```{r}
nine_NYU_top = filter(nine_NYU_cluster, nine_NYU_cluster$cluster == nine_NYU_cluster_group$cluster[1] | nine_NYU_cluster$cluster == nine_NYU_cluster_group$cluster[2] | nine_NYU_cluster$cluster == nine_NYU_cluster_group$cluster[3])

nine_NYU_top = nine_NYU_top[,c(10,11,21,22)]

nine_NYU_top

nine_Col_top = filter(nine_Col_cluster, nine_Col_cluster$cluster == nine_Col_cluster_group$cluster[1] | nine_Col_cluster$cluster == nine_Col_cluster_group$cluster[2] | nine_Col_cluster$cluster == nine_Col_cluster_group$cluster[3])

nine_Col_top = nine_Col_top[,c(10,11,21,22)]

nine_Col_top

twelve_NYU_top = filter(twelve_NYU_cluster, twelve_NYU_cluster$cluster == twelve_NYU_cluster_group$cluster[1] | twelve_NYU_cluster$cluster == twelve_NYU_cluster_group$cluster[2] | twelve_NYU_cluster$cluster == twelve_NYU_cluster_group$cluster[3])

twelve_NYU_top = twelve_NYU_top[,c(10,11,21,22)]

twelve_NYU_top

twelve_Col_top = filter(twelve_Col_cluster, twelve_Col_cluster$cluster == twelve_Col_cluster_group$cluster[1] | twelve_Col_cluster$cluster == twelve_Col_cluster_group$cluster[2] | twelve_Col_cluster$cluster == twelve_Col_cluster_group$cluster[3])

twelve_Col_top = twelve_Col_top[,c(10,11,21,22)]

twelve_Col_top

fifteen_NYU_top = filter(fifteen_NYU_cluster, fifteen_NYU_cluster$cluster == fifteen_NYU_cluster_group$cluster[1] | fifteen_NYU_cluster$cluster == fifteen_NYU_cluster_group$cluster[2] | fifteen_NYU_cluster$cluster == fifteen_NYU_cluster_group$cluster[3])

fifteen_NYU_top = fifteen_NYU_top[,c(7,8,21,22)]

fifteen_NYU_top

fifteen_Col_top = filter(fifteen_Col_cluster, fifteen_Col_cluster$cluster == fifteen_Col_cluster_group$cluster[1] | fifteen_Col_cluster$cluster == fifteen_Col_cluster_group$cluster[2] | fifteen_Col_cluster$cluster == fifteen_Col_cluster_group$cluster[3])

fifteen_Col_top = fifteen_Col_top[,c(7,8,21,22)]

fifteen_Col_top

```

```{r}
consolidated_top = rbind(nine_NYU_top, nine_Col_top, twelve_NYU_top, twelve_Col_top, fifteen_NYU_top, fifteen_Col_top)

consolidated_top

```

```{r}
# my_pal = colorRampPalette(colors = c("red", "blue", "orange", "purple", "yellow", "green"), space = "Lab")(6)

# pal_map = colorFactor(palette = my_pal, domain = consolidated_top$reference)
# pal_map
```


```{r}
consolidated_top_map = leaflet() %>%
   addTiles() %>%
    addCircleMarkers(lng=consolidated_top$dropoff_longitude, lat= consolidated_top$dropoff_latitude, color = pal_map(consolidated_top$reference), popup = paste(as.character(consolidated_top$reference))) %>% 
    addProviderTiles(providers$CartoDB.Positron)
  
consolidated_top_map
```

```{r}
top_map = leaflet() %>%
  addTiles() %>%
  addCircleMarkers(lng=nine_NYU_top$dropoff_longitude, lat= nine_NYU_top$dropoff_latitude, popup = paste(as.character(nine_NYU_top$reference)), group = "2009 NYU", color = "red") %>%
  
  addCircleMarkers(lng=nine_Col_top$dropoff_longitude, lat= nine_Col_top$dropoff_latitude, popup = paste(as.character(nine_Col_top$reference)), group = "2009 Col", color = "blue") %>%
  
  addCircleMarkers(lng=twelve_NYU_top$dropoff_longitude, lat= twelve_NYU_top$dropoff_latitude, popup = paste(as.character(twelve_NYU_top$reference)), group = "2012 NYU", color = "green") %>%
  
  addCircleMarkers(lng=twelve_Col_top$dropoff_longitude, lat= twelve_Col_top$dropoff_latitude, popup = paste(as.character(twelve_Col_top$reference)), group = "2012 Col", color = "orange") %>%
  
  addCircleMarkers(lng=fifteen_NYU_top$dropoff_longitude, lat= fifteen_NYU_top$dropoff_latitude, popup = paste(as.character(fifteen_NYU_top$reference)), group = "2015 NYU", color = "purple") %>%
  
  addCircleMarkers(lng=fifteen_Col_top$dropoff_longitude, lat= fifteen_Col_top$dropoff_latitude, popup = paste(as.character(fifteen_Col_top$reference)), group = "2015 Col", color = "yellow") %>%
  
  addProviderTiles(providers$CartoDB.Positron) %>%
  
  addLayersControl(overlayGroups = c("2009 NYU", "2009 Col", "2012 NYU", "2012 Col", "2015 NYU", "2015 Col"), options = layersControlOptions(collapsed = F))

top_map
```


```{r}
consolidated_top_map = leaflet() %>%
   addTiles() %>%
   addMarkers(lng=consolidated_top$dropoff_longitude, lat= consolidated_top$dropoff_latitude, clusterOptions = markerClusterOptions(), popup = paste(as.character(consolidated_top$reference))) %>% 
   addProviderTiles(providers$CartoDB.Positron)
  
consolidated_top_map
```


```{r}
setwd("~/Desktop/College/Senior/OIDD 245/DP 2/Data/")

NYC_restaurants = read_csv("NYC Restaurants Geocoded.csv", col_names = FALSE)

colnames(NYC_restaurants) = c("name","latitude", "longitude", "raw")

```

```{r}
NYC_restaurants_map = leaflet() %>%
  addTiles() %>%
  addMarkers(lng = NYC_restaurants$longitude, lat = NYC_restaurants$latitude, popup = paste(as.character(NYC_restaurants$name)), clusterOptions = markerClusterOptions()) %>%
  addProviderTiles(providers$CartoDB.Positron)

NYC_restaurants_map
```


```{r}
# nine_NYU
# nine_Col
# twelve_NYU
# twelve_Col
# fifteen_NYU
# fifteen_Col
# eighteen_NYU
# eighteen_Col

stats_nine_NYU = nine_NYU[,c(4,5,13,16)]
stats_nine_Col = nine_Col[,c(4,5,13,16)]
stats_nine_NYU

stats_twelve_NYU = twelve_NYU[,c(4,5,13,16)]
stats_twelve_Col = twelve_Col[,c(4,5,13,16)]
stats_twelve_NYU

stats_fifteen_NYU = fifteen_NYU[,c(1,2,9,12)]
stats_fifteen_Col = fifteen_Col[,c(1,2,9,12)]
stats_fifteen_NYU

stats_eighteen_NYU = eighteen_NYU[,c(4,5,11,14)]
stats_eighteen_Col = eighteen_Col[,c(4,5,11,14)]
stats_eighteen_NYU

stats_NYU = rbind(stats_nine_NYU, stats_twelve_NYU, stats_fifteen_NYU, stats_eighteen_NYU)
stats_NYU$indicator = 1

stats_Col = rbind(stats_nine_Col, stats_twelve_Col, stats_fifteen_Col, stats_eighteen_Col)
stats_Col$indicator = 0

stats_NYU
stats_Col

stats_consolidated = rbind(stats_NYU, stats_Col)

stats_consolidated

```

```{r}

mean(stats_NYU$tip_amount)
mean(stats_Col$tip_amount)

max(stats_NYU$tip_amount)
max(stats_Col$tip_amount)

mean(stats_NYU$fare_amount)
mean(stats_Col$fare_amount)

max(stats_NYU$fare_amount)
max(stats_Col$fare_amount)

mean(stats_NYU$trip_distance)
mean(stats_Col$trip_distance)

max(stats_NYU$trip_distance)
max(stats_Col$trip_distance)

mean(stats_NYU$passenger_count)
mean(stats_Col$passenger_count)

max(stats_NYU$passenger_count)
max(stats_Col$passenger_count)

t.test(tip_amount ~ indicator, data = stats_consolidated)

t.test(fare_amount ~ indicator, data = stats_consolidated)

t.test(trip_distance ~ indicator, data = stats_consolidated)

t.test(passenger_count ~ indicator, data = stats_consolidated)

```




```{r}

# list_data2 = c('nine_hal', 'eleven_hal', 'twelve_hal','thirteen_hal', 'fourteen_hal', 'fifteen_hal', 'sixteen_hal', 'seventeen_hal', 'eighteen_hal')

# datasets = list(nine_hal, eleven_hal, twelve_hal, thirteen_hal, fourteen_hal, fifteen_hal, sixteen_hal, seventeen_hal, eighteen_hal)

# lapply(datasets, function(x) {
#   filter(x, between(pickup_latitude, range_NYU_min_lat, range_NYU_max_lat)) %>%
#   filter(between(pickup_longitude, range_NYU_min_lon, range_NYU_max_lon))

# })
```

Contact
Code: About

©2019 by My Site. Proudly created with Wix.com

bottom of page