<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>segmentation | Jane Li</title>
    <link>/tag/segmentation/</link>
      <atom:link href="/tag/segmentation/index.xml" rel="self" type="application/rss+xml" />
    <description>segmentation</description>
    <generator>Source Themes Academic (https://sourcethemes.com/academic/)</generator><language>en-us</language><lastBuildDate>Sat, 23 Nov 2019 00:00:00 +0000</lastBuildDate>
    <image>
      <url>/images/icon_hu0b7a4cb9992c9ac0e91bd28ffd38dd00_9727_512x512_fill_lanczos_center_2.png</url>
      <title>segmentation</title>
      <link>/tag/segmentation/</link>
    </image>
    
    <item>
      <title>Visualizing Chicago Public Transit Ridership</title>
      <link>/post/2019-11-23-r-rmarkdown/</link>
      <pubDate>Sat, 23 Nov 2019 00:00:00 +0000</pubDate>
      <guid>/post/2019-11-23-r-rmarkdown/</guid>
      <description>
&lt;script src=&#34;/rmarkdown-libs/kePrint/kePrint.js&#34;&gt;&lt;/script&gt;
&lt;script src=&#34;/rmarkdown-libs/htmlwidgets/htmlwidgets.js&#34;&gt;&lt;/script&gt;
&lt;script src=&#34;/rmarkdown-libs/pymjs/pym.v1.js&#34;&gt;&lt;/script&gt;
&lt;script src=&#34;/rmarkdown-libs/widgetframe-binding/widgetframe.js&#34;&gt;&lt;/script&gt;


&lt;p&gt;In this post, I used Open data from Chicago Transit Authority (CTA) to segment routes based on ridership, and developed a visualization dashboard to identify stops with unbalanced boardings and alightings and stops with the most ridership. Get the raw data &lt;a href=&#34;https://raw.githubusercontent.com/enjieli/CAT-data-demo/master/app/CTA_-_Ridership_-_Avg._Weekday_Bus_Stop_Boardings_in_October_2012.csv&#34;&gt;here&lt;/a&gt;.&lt;/p&gt;
&lt;div id=&#34;load-package&#34; class=&#34;section level3&#34;&gt;
&lt;h3&gt;load package&lt;/h3&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;library(knitr)
library(tidyverse)
library(stringr)
library(sf)
library(kableExtra)
library(RColorBrewer)
library(viridis)
library(leaflet)
library(ggmap)
library(widgetframe)&lt;/code&gt;&lt;/pre&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;raw_df&amp;lt;- read_csv(&amp;quot;CTA_-_Ridership_-_Avg._Weekday_Bus_Stop_Boardings_in_October_2012.csv&amp;quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#clean raw_df 
raw_df&amp;lt;- 
  raw_df %&amp;gt;%
  mutate( location= str_replace_all(location, &amp;quot;[()]&amp;quot;, &amp;quot;&amp;quot;)) %&amp;gt;%
  separate(location, c(&amp;quot;lat&amp;quot;, &amp;quot;lon&amp;quot;), sep = &amp;quot;,&amp;quot;) %&amp;gt;%
  mutate( lat =as.numeric(lat)) %&amp;gt;%
  mutate( lon= as.numeric(lon))&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;div style=&#34;margin-top:50px;&#34;&gt;
&lt;div id=&#34;table-1-routes-with-most-stops&#34; class=&#34;section level5&#34;&gt;
&lt;h5&gt;Table 1 Routes with most stops&lt;/h5&gt;
&lt;div style=&#34;margin-bottom:20px;&#34;&gt;

&lt;/div&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#What is the route with the most stops?
routes &amp;lt;- 
  raw_df %&amp;gt;%
  separate_rows(routes) %&amp;gt;%
  group_by(routes) %&amp;gt;%
  summarise(Number_of_stops=n_distinct(stop_id)) %&amp;gt;%
  arrange(desc(Number_of_stops))

kable(routes[1:10, ],  booktabs = T) %&amp;gt;%
  kable_styling(bootstrap_options = &amp;quot;striped&amp;quot;, full_width = F, position = &amp;quot;left&amp;quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;table class=&#34;table table-striped&#34; style=&#34;width: auto !important; &#34;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th style=&#34;text-align:left;&#34;&gt;
routes
&lt;/th&gt;
&lt;th style=&#34;text-align:right;&#34;&gt;
Number_of_stops
&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
9
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
273
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
49
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
242
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
151
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
221
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
8
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
220
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
3
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
213
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
82
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
209
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
62
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
206
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
79
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
206
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
N5
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
206
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
4
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
202
&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;div style=&#34;margin-top:100px;&#34;&gt;
&lt;/div&gt;
&lt;div id=&#34;table-2-stops-with-the-most-routes&#34; class=&#34;section level5&#34;&gt;
&lt;h5&gt;Table 2 Stops with the most routes&lt;/h5&gt;
&lt;div style=&#34;margin-bottom:20px;&#34;&gt;

&lt;/div&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#What is the stop with the most routes?
stops &amp;lt;- 
  raw_df %&amp;gt;%
  mutate(number_of_routes = str_count(routes, &amp;quot;\\d+&amp;quot;)) %&amp;gt;%
  arrange(desc(number_of_routes)) %&amp;gt;%
  select(stop_id,on_street, cross_street, routes, number_of_routes)

kable(stops[1:10, ], booktabs = T) %&amp;gt;%
  kable_styling(bootstrap_options = &amp;quot;striped&amp;quot;, full_width = F, position = &amp;quot;left&amp;quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;table class=&#34;table table-striped&#34; style=&#34;width: auto !important; &#34;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th style=&#34;text-align:right;&#34;&gt;
stop_id
&lt;/th&gt;
&lt;th style=&#34;text-align:left;&#34;&gt;
on_street
&lt;/th&gt;
&lt;th style=&#34;text-align:left;&#34;&gt;
cross_street
&lt;/th&gt;
&lt;th style=&#34;text-align:left;&#34;&gt;
routes
&lt;/th&gt;
&lt;th style=&#34;text-align:right;&#34;&gt;
number_of_routes
&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1106
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
WASHINGTON
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
3,4,19,20,26,60,N66,124,143,145,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
14
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
73
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
betw. VAN BUREN &amp;amp; CONGRES
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
1,3,4,7,J14,26,X28,126,129,132,143,147,148
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
13
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1103
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
HUBBARD (WRIGLEY BLDG.)
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
2,3,10,26,N66,143,144,145,146,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
13
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1120
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
SOUTH WATER
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
3,6,20,26,N66,143,144,145,146,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
13
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1122
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
HUBBARD (TRIBUNE BLDG.)
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
2,3,10,26,N66,143,144,145,146,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
13
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1100
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
ERIE
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
3,10,26,33,125,143,144,145,146,147,148,151
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
12
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1101
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
OHIO
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
2,3,10,26,143,144,145,146,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
12
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1102
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
ILLINOIS
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
2,3,10,26,143,144,145,146,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
12
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1105
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
betw. LAKE &amp;amp; RANDOLPH
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
3,4,19,26,N66,124,143,145,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
12
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
1123
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
MICHIGAN
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
GRAND
&lt;/td&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
2,3,10,26,143,144,145,146,147,148,151,157
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
12
&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;div style=&#34;margin-top:100px;&#34;&gt;
&lt;h5 id=&#34;figure-1-stop-density-map&#34;&gt;Figure 1 Stop Density Map&lt;/h5&gt;
&lt;div style=&#34;margin-bottom:20px;&#34;&gt;

&lt;/div&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#get chicago basemap 
chi_bb &amp;lt;- c(left = -87.88430,
            bottom = 41.64416,
            right = -87.52570,
            top = 42.06470)

chicago_stamen &amp;lt;- get_stamenmap(bbox = chi_bb, zoom = 11, maptype = &amp;#39;toner-lite&amp;#39;)

# calculate stop density
df_sp &amp;lt;- 
  st_as_sf(raw_df, coords = c(&amp;quot;lon&amp;quot;, &amp;quot;lat&amp;quot;), crs = 4326) %&amp;gt;%
  select(-c(&amp;#39;month_beginning&amp;#39;,&amp;#39;daytype&amp;#39;))

#plot them together
stops_density_map &amp;lt;- 
  ggmap(chicago_stamen)+ 
  stat_density_2d(data = raw_df,
                  aes(x = lon, y = lat, fill = stat(level)),
                  alpha = .4,
                  bins = 5,
                  geom = &amp;quot;polygon&amp;quot;) +
  scale_fill_gradientn(&amp;#39;number of stops&amp;#39;,colors = brewer.pal(5, &amp;quot;YlOrRd&amp;quot;)) +
  geom_sf(data = df_sp, inherit.aes = FALSE,color=&amp;#39;black&amp;#39;,
          size = 0.2, alpha=0.1,show.legend = &amp;#39;point&amp;#39;) 

stops_density_map&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;img src=&#34;/post/2019-11-23-r-rmarkdown_files/figure-html/chicago_basemap-1.png&#34; width=&#34;672&#34; /&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;div style=&#34;margin-top:100px;&#34;&gt;
&lt;h5 id=&#34;figure-2-map-of-stops-with-no-ridership&#34;&gt;Figure 2 Map of stops with no ridership&lt;/h5&gt;
&lt;div style=&#34;margin-bottom:20px;&#34;&gt;

&lt;/div&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#tranform to long dataframe
stops_route &amp;lt;- 
  raw_df %&amp;gt;%
  mutate(number_of_routes = str_count(routes, &amp;quot;\\d+&amp;quot;)) %&amp;gt;%
  separate_rows(routes) %&amp;gt;%
  mutate( boardings = boardings/number_of_routes) %&amp;gt;%
  mutate(alightings =  alightings/ number_of_routes)  %&amp;gt;%
  select( -c(number_of_routes,daytype,month_beginning)) %&amp;gt;%
  gather(&amp;quot;boardings&amp;quot;, &amp;quot;alightings&amp;quot;, key = type, value = ridership)

#make it sf object 
stops_route_sp &amp;lt;- 
  st_as_sf(stops_route, coords = c(&amp;quot;lon&amp;quot;, &amp;quot;lat&amp;quot;), crs = 4326) 

#stops with no boardings and alightings
#transform stops_no_ridership to spatial dataframe

stops_no_ridership_sp &amp;lt;-
  stops_route_sp %&amp;gt;%
  filter (ridership==0) %&amp;gt;%
  group_by(stop_id, routes) %&amp;gt;%
  filter(n()&amp;gt;1)

#n_distinct(stops_no_ridership_sp$stop_id)
#n_distinct(stops_no_ridership_sp$routes)

#set color scheme
pal &amp;lt;- colorFactor(palette = &amp;#39;Spectral&amp;#39;, domain =stops_no_ridership_sp$routes)

#plot
p1&amp;lt;-leaflet(stops_no_ridership_sp) %&amp;gt;%
  setView(lng=-87.705, lat=41.85443, zoom = 10) %&amp;gt;% 
  addProviderTiles(providers$Stamen.Toner,group=&amp;quot;Stamen Toner&amp;quot;) %&amp;gt;%
  addCircleMarkers( color = ~pal(routes),
                        radius = ~ ifelse(ridership &amp;lt;=100, 6,
                                           ifelse((ridership &amp;lt;=1000),10,15)),
                        stroke = FALSE, 
                        fillOpacity = 0.8,
                        popup = ~paste(&amp;quot;Stop_id:&amp;quot;, stop_id, 
                          &amp;quot;&amp;lt;br&amp;gt;&amp;quot;, &amp;quot;Route:&amp;quot;, routes, 
                          &amp;quot;&amp;lt;br&amp;gt;&amp;quot;, &amp;quot;Ridership:&amp;quot;, ridership)) %&amp;gt;%
      addLegend(&amp;quot;topright&amp;quot;, pal = pal, opacity = 1,
                values = ~routes, 
                title = &amp;quot;Routes&amp;quot;)
frameWidget(p1, width=&amp;#39;100%&amp;#39;)&lt;/code&gt;&lt;/pre&gt;
&lt;div id=&#34;htmlwidget-1&#34; style=&#34;width:100%;height:480px;&#34; class=&#34;widgetframe html-widget&#34;&gt;&lt;/div&gt;
&lt;script type=&#34;application/json&#34; data-for=&#34;htmlwidget-1&#34;&gt;{&#34;x&#34;:{&#34;url&#34;:&#34;/post/2019-11-23-r-rmarkdown_files/figure-html//widgets/widget_stop_route_long.html&#34;,&#34;options&#34;:{&#34;xdomain&#34;:&#34;*&#34;,&#34;allowfullscreen&#34;:false,&#34;lazyload&#34;:false}},&#34;evals&#34;:[],&#34;jsHooks&#34;:[]}&lt;/script&gt;
&lt;/div&gt;
&lt;div style=&#34;margin-top:100px;&#34;&gt;
&lt;/div&gt;
&lt;div id=&#34;table-3-top-10-most-trafficked-routes&#34; class=&#34;section level5&#34;&gt;
&lt;h5&gt;Table 3 Top 10 most trafficked routes&lt;/h5&gt;
&lt;div style=&#34;margin-bottom:20px;&#34;&gt;

&lt;/div&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#routes by boarding and alighting ridership 
ridership_by_routes&amp;lt;- 
  stops_route %&amp;gt;%
  filter(!is.na(routes)) %&amp;gt;%
  group_by(routes, type) %&amp;gt;%
  summarise(n=sum(ridership)) %&amp;gt;%
  spread(type, n) %&amp;gt;%
  mutate(total =boardings+ alightings) %&amp;gt;%
  arrange(desc(total))

kable(ridership_by_routes[1:10, ], booktabs = T) %&amp;gt;%
  kable_styling(bootstrap_options = &amp;quot;striped&amp;quot;, full_width = F, position = &amp;quot;left&amp;quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;table class=&#34;table table-striped&#34; style=&#34;width: auto !important; &#34;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th style=&#34;text-align:left;&#34;&gt;
routes
&lt;/th&gt;
&lt;th style=&#34;text-align:right;&#34;&gt;
alightings
&lt;/th&gt;
&lt;th style=&#34;text-align:right;&#34;&gt;
boardings
&lt;/th&gt;
&lt;th style=&#34;text-align:right;&#34;&gt;
total
&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
9
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
33119.68
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
33704.75
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
66824.43
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
79
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
31520.93
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
31741.53
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
63262.47
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
49
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
29305.87
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
29500.68
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
58806.55
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
66
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
26494.62
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
26566.83
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
53061.45
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
77
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
24193.93
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
22996.52
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
47190.45
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
8
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
23579.65
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
23555.37
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
47135.02
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
53
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
23540.77
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
23586.90
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
47127.67
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
4
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
23855.90
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
22107.08
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
45962.98
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
82
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
22379.65
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
22005.20
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
44384.85
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&#34;text-align:left;&#34;&gt;
63
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
21785.53
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
21732.61
&lt;/td&gt;
&lt;td style=&#34;text-align:right;&#34;&gt;
43518.14
&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;div style=&#34;margin-top:100px;&#34;&gt;
&lt;/div&gt;
&lt;div id=&#34;figure-3-stops-segmentation-based-on-ridership&#34; class=&#34;section level5&#34;&gt;
&lt;h5&gt;Figure 3 Stops segmentation based on Ridership&lt;/h5&gt;
&lt;div style=&#34;margin-bottom:20px;&#34;&gt;

&lt;/div&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#reshape df with only lat, lon, boardings, and alights columns
#scale the dataset
km_df_scale&amp;lt;- 
  raw_df%&amp;gt;%
  select(stop_id,boardings,alightings) %&amp;gt;%
  column_to_rownames(var=&amp;quot;stop_id&amp;quot;) %&amp;gt;%
  scale()&lt;/code&gt;&lt;/pre&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;library(factoextra)
set.seed(123)
# function to compute total within-cluster sum of square 
wss &amp;lt;- function(k) {
  kmeans(km_df_scale, k,iter.max = 100, nstart = 25 )$tot.withinss
}

# Compute and plot wss for k = 1 to k = 15
k.values &amp;lt;- 1:15

# extract wss for 2-15 clusters
wss_values &amp;lt;- map_dbl(k.values, wss)

#plot optimal K
plot(k.values, wss_values,
       type=&amp;quot;b&amp;quot;, pch = 19, frame = FALSE, 
       xlab=&amp;quot;Number of clusters K&amp;quot;,
       ylab=&amp;quot;Total within-clusters sum of squares&amp;quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;img src=&#34;/post/2019-11-23-r-rmarkdown_files/figure-html/determin_k-1.png&#34; width=&#34;672&#34; /&gt;&lt;/p&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;set.seed(123)

#kmeans with 3 clusters
km.res &amp;lt;- kmeans(km_df_scale, 3, iter.max = 100, nstart = 25)

#join with raw df
dd &amp;lt;- cbind(raw_df, cluster = km.res$cluster)

#plot
dd %&amp;gt;%
  ggplot(aes(x=boardings, y= alightings, color= factor(cluster))) +
  geom_point() +
  geom_abline(aes(intercept = 0 , slope= 1 ), linetype= &amp;quot;dashed&amp;quot;) + 
  scale_x_continuous(trans = &amp;#39;log&amp;#39;) +
  scale_y_continuous(trans = &amp;#39;log&amp;#39;) +
  scale_size(range = c(0.5, 5))  +
  scale_color_brewer(&amp;quot;&amp;quot;, palette = &amp;quot;Set1&amp;quot;, 
                     labels = c(&amp;quot;Stops with little ridership&amp;quot;, 
                                &amp;quot;Stops with most ridership&amp;quot;, 
                                &amp;quot;Stops with medium ridership&amp;quot;)) +
  theme_bw() +
  xlab(&amp;quot;Boardings&amp;quot;) +
  ylab(&amp;quot;Alightings&amp;quot;) &lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;img src=&#34;/post/2019-11-23-r-rmarkdown_files/figure-html/kmeans-1.png&#34; width=&#34;672&#34; /&gt;&lt;/p&gt;
&lt;pre class=&#34;r&#34;&gt;&lt;code&gt;#visualize on a map
m = leaflet() %&amp;gt;%
    setView(lng=-87.705, lat=41.85443, zoom = 10) %&amp;gt;% 
    addProviderTiles(providers$Stamen.Toner,group=&amp;quot;Stamen Toner&amp;quot;) 

# subset into clusters 
data_filterlist = list(cluster_1 = subset(dd, cluster == 1),
                cluster_2 = subset(dd, cluster == 2),
                cluster_3 = subset(dd, cluster == 3))

# Remember we also had these groups associated with each variable? Let&amp;#39;s put them in a list too:
layerlist = c(&amp;quot;Stops with little ridership&amp;quot;, &amp;quot;Stops with most ridership&amp;quot;, &amp;quot;Stops with medium ridership&amp;quot;)

# We can keep that same color variable:
colorFactors = colorFactor(c(&amp;#39;red&amp;#39;, &amp;#39;blue&amp;#39;, &amp;#39;seagreen&amp;#39;), domain=dd$cluster)

# Now we have our loop - each time through the loop, it is adding our markers to the map object:

for (i in 1:length(data_filterlist)){
  m = addCircleMarkers(m, 
                       lng=data_filterlist[[i]]$lon, 
                       lat=data_filterlist[[i]]$lat, 
                       radius=1,
                       popup= paste( &amp;quot;Stop_id:&amp;quot;,data_filterlist[[i]]$stop_id,  
                                     &amp;quot;&amp;lt;br&amp;gt;&amp;quot;, &amp;quot;Routes:&amp;quot;, data_filterlist[[i]]$routes,
                                     &amp;quot;&amp;lt;br&amp;gt;&amp;quot;, &amp;quot;Boardings:&amp;quot;,data_filterlist[[i]]$boardings,
                                     &amp;quot;&amp;lt;br&amp;gt;&amp;quot;, &amp;quot;Alightings:&amp;quot;,data_filterlist[[i]]$alightings),
                       stroke = FALSE, fillOpacity = 0.75,
                       color = colorFactors(data_filterlist[[i]]$cluster),
                       group = layerlist[i]
  )

}


p2&amp;lt;- addLayersControl(m, overlayGroups = layerlist, options = layersControlOptions(collapsed = FALSE))

frameWidget(p2, width=&amp;#39;100%&amp;#39;)&lt;/code&gt;&lt;/pre&gt;
&lt;div id=&#34;htmlwidget-2&#34; style=&#34;width:100%;height:480px;&#34; class=&#34;widgetframe html-widget&#34;&gt;&lt;/div&gt;
&lt;script type=&#34;application/json&#34; data-for=&#34;htmlwidget-2&#34;&gt;{&#34;x&#34;:{&#34;url&#34;:&#34;/post/2019-11-23-r-rmarkdown_files/figure-html//widgets/widget_unnamed-chunk-6.html&#34;,&#34;options&#34;:{&#34;xdomain&#34;:&#34;*&#34;,&#34;allowfullscreen&#34;:false,&#34;lazyload&#34;:false}},&#34;evals&#34;:[],&#34;jsHooks&#34;:[]}&lt;/script&gt;
&lt;/div&gt;
&lt;/div&gt;
</description>
    </item>
    
  </channel>
</rss>
