Skip to content

Commit

Permalink
Cambios hechos en clase
Browse files Browse the repository at this point in the history
  • Loading branch information
joanby committed Nov 22, 2018
1 parent 571bbff commit 37eeb4b
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 24 deletions.
23 changes: 23 additions & 0 deletions scripts/Hola
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9"
"1" 1.06 9.2 151 54.4 1.6 9077 0 0.628 "Arizona"
"2" 0.89 10.3 202 57.9 2.2 5088 25.3 1.555 "Boston"
"3" 1.43 15.4 113 53 3.4 9212 0 1.058 "Central"
"4" 1.02 11.2 168 56 0.3 6423 34.3 0.7 "Common"
"5" 1.49 8.8 192 51.2 1 3300 15.6 2.044 "Consolid"
"6" 1.32 13.5 111 60 -2.2 11127 22.5 1.241 "Florida"
"7" 1.22 12.2 175 67.6 2.2 7642 0 1.652 "Hawaiian"
"8" 1.1 9.2 245 57 3.3 13082 0 0.309 "Idaho"
"9" 1.34 13 168 60.4 7.2 8406 0 0.862 "Kentucky"
"10" 1.12 12.4 197 53 2.7 6455 39.2 0.623 "Madison"
"11" 0.75 7.5 173 51.5 6.5 17441 0 0.768 "Nevada"
"12" 1.13 10.9 178 62 3.7 6154 0 1.897 "NewEngla"
"13" 1.15 12.7 199 53.7 6.4 7179 50.2 0.527 "Northern"
"14" 1.09 12 96 49.8 1.4 9673 0 0.588 "Oklahoma"
"15" 0.96 7.6 164 62.2 -0.1 6468 0.9 1.4 "Pacific"
"16" 1.16 9.9 252 56 9.2 15991 0 0.62 "Puget"
"17" 0.76 6.4 136 61.9 9 5714 8.3 1.92 "SanDiego"
"18" 1.05 12.6 150 56.7 2.7 10140 0 1.108 "Southern"
"19" 1.16 11.7 104 54 -2.1 13507 0 0.636 "Texas"
"20" 1.2 11.8 148 59.9 3.5 7287 41.1 0.702 "Wisconsi"
"21" 1.04 8.6 204 61 3.5 6650 0 2.116 "United"
"22" 1.07 9.3 174 54.3 5.9 10093 26.6 1.306 "Antonio"
18 changes: 16 additions & 2 deletions scripts/tema1/01-data_visualization.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#Data Visualization - 11 de Mayo de 2018
library(tidyverse)

#tidyverse 1.2.1 ──
#✔ ggplot2 2.2.1 ✔ purrr 0.2.4
#✔ tibble 1.4.2 ✔ dplyr 0.7.4
Expand All @@ -16,13 +17,23 @@ View(mpg)
# displ: tamaño del motor del coche en litros
# hwy: número de millas recorridas en autopista por galón de combustible (3.785411784 litros)

ggplot(data = mpg)

mpg %>% ggplot()


ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))

#PLANTILLA PARA HACER UNA REPRESENTACIÓN GRÁFICA CON GGPLOT
#ggplot(data = <DATA_FRAME>) +
# <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))

ggplot(data = mpg) +
geom_point(mapping = aes(x = class, y = drv))



#Color de los puntos
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
Expand Down Expand Up @@ -54,15 +65,18 @@ ggplot() +
scale_y_continuous(name="") +
scale_x_continuous(name="") +
scale_shape_identity() +
geom_point(data=d, mapping=aes(x=p%%16, y=p%/%16, shape=p), size=5, fill="red") +
geom_point(data=d, mapping=aes(x=p%%16, y=p%/%16, shape=p), size=5, fill="yellow") +
geom_text(data=d, mapping=aes(x=p%%16, y=p%/%16+0.25, label=p), size=3)



ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy),
shape = 23, size = 10, color = "red",
fill = "yellow")
fill = 'yellow')

ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y = hwy, color = displ<5))


##FACETS
Expand Down
62 changes: 50 additions & 12 deletions scripts/tema1/02-data_transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@ library(tidyverse)
library(nycflights13)


nycflights13::flights
tt<-nycflights13::flights
?flights
View(flights)
?tibble

head(flights)
tail(flights)

#tibble es un data frame mejorado para tidyverse
## * int -> números enteros
## * dbl -> números reales (double)
Expand All @@ -28,6 +33,9 @@ View(flights)

### FILTER
jan1 <- filter(flights, month == 1, day == 1)
flights %>%
filter(month == 1, day == 1) %>%
filter(dep_delay>0)

may19 <- filter(flights, month == 5, day == 19)

Expand All @@ -39,7 +47,9 @@ filter(flights, month == 5)
2 == 2

sqrt(2)^2 == 2
sqrt(2)^2 - 2
near(sqrt(2)^2, 2)
?near
1/pi * pi == 1
1/49 * 49 == 1
near(1/49*49, 1)
Expand All @@ -49,7 +59,7 @@ filter(flights, month == 5 | month == 6)
filter(flights, month == 5 | 6)# NO FUNCIONA...

may_june <- filter(flights, month %in% c(5,6))

#LEYES DE MORGAN
#!(x&y) == (!x)|(!y)
#!(x|y) == (!x)&(!y)

Expand All @@ -73,6 +83,7 @@ age.mery == age.john
is.na(age.mery)

df <- tibble(x = c(1,2,NA,4,5))
df
filter(df, x>2)
filter(df, is.na(x)|x>2)

Expand Down Expand Up @@ -110,6 +121,8 @@ tail(flights, 10)

### ARRANGE
sorted_date <- arrange(flights, year, month, day)
flights %>% arrange(year, month, day)
tail(flights)
tail(sorted_date)

head(arrange(flights, desc(arr_delay)))
Expand All @@ -126,7 +139,7 @@ View(arrange(flights, desc(distance)))

### SELECT

View(sorted_date[1024:1068,])
View(sorted_date[1024:1068,TRUE])

View(select(sorted_date[1024:1068,], dep_delay, arr_delay))

Expand All @@ -149,14 +162,13 @@ select(flights, num_range("x",1:5))# x1, x2, x3, x4, x5
?select

rename(flights, deptime = dep_time,
año = year, mes = month, dia = day)
anio = year, mes = month, dia = day)

select(flights, deptime = dep_time)

select(flights, time_hour, distance, air_time, everything())



sorted_date


Expand Down Expand Up @@ -199,13 +211,23 @@ flights_new <- select(flights,
distance,
air_time)

flights_new

mutate(flights_new,
time_gain = arr_delay - dep_delay, #diff_t (min)
air_time_hour = air_time/60,
flight_speed = distance/air_time_hour, #v = s/t (km/h)
time_gain_per_hour = time_gain / air_time_hour
) -> flights_new

View(flights_new)

flights_new %>%
filter(!is.na(time_gain_per_hour)) %>%
ggplot() +
geom_histogram(mapping = aes(x=time_gain_per_hour),
bins = 300)


transmute(flights_new,
time_gain = arr_delay - dep_delay,
Expand All @@ -229,7 +251,7 @@ transmute(flights,
# * Offsets: lead()->mueve hacia la izquierda, lag()->mueve hacia la derecha
df <- 1:12
df
lag(df)
lag(df,4)
lead(df)
# * Funcions acumulativas: cumsum(), cumprod(), cummin(), cummax(), cummean()
df
Expand Down Expand Up @@ -302,16 +324,25 @@ arrange(mutate(flights,

summarise(flights, delay = mean(dep_delay, na.rm = T))

by_month_group <- group_by(flights, year, month)
summarise(by_month_group, delay = mean(dep_delay, na.rm = T))
flights %>%
group_by(year, month) %>%
summarise(delay = mean(dep_delay, na.rm = T))

by_day_group <- group_by(flights, year, month, day)
summarise(by_day_group,
flights %>%
group_by(year, month, day) %>%
summarise(
delay = mean(dep_delay, na.rm = T),
median = median(dep_delay, na.rm = T),
min = min(dep_delay, na.rm = T)
)

flights %>%
group_by(carrier) %>%
summarise(
delay = mean(dep_delay, na.rm = T),
num = n()
)

mutate(summarise(group_by(flights, carrier),
delay = mean(dep_delay, na.rm = T)),
sorted = min_rank(delay)
Expand Down Expand Up @@ -364,8 +395,10 @@ flights %>%
median = median(dep_delay, na.rm = T),
sd = sd(dep_delay, na.rm = T),
count = n()
)
)

not_cancelled <- flights %>%
filter(!is.na(dep_delay), !is.na(arr_delay))

delay_numtail <- not_cancelled %>%
group_by(tailnum) %>%
Expand Down Expand Up @@ -454,6 +487,7 @@ not_cancelled %>%
# Medida de posición
not_cancelled %>%
group_by(carrier) %>%
arrange(dep_time) %>%
summarise(
first_dep = first(dep_time),
second_dep = nth(dep_time, 2),
Expand All @@ -467,14 +501,16 @@ not_cancelled %>%
mutate(rank = min_rank(dep_time)) %>%
filter(rank %in% range(rank)) -> temp

View(temp)

# Funciones de conteo
flights %>%
group_by(dest) %>%
summarise(
count = n(),
carriers = n_distinct(carrier),
arrivals = sum(!is.na(arr_delay))
arrivals = sum(!is.na(arr_delay)),
cancelled = count - arrivals
) %>%
arrange(desc(carriers))

Expand Down Expand Up @@ -505,6 +541,8 @@ summarise(business, n_fl = n()) %>%
summarise(n_fl = sum(n_fl)) %>%
summarise(n_fl = sum(n_fl))

business

business %>%
ungroup() %>%
summarise(n_fl = n())
Expand Down
36 changes: 28 additions & 8 deletions scripts/tema1/03-eda.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,22 @@ diamonds %>%
count(cut_width(carat, 0.5))


ggplot(diamonds, mapping = aes(x = "Kilates", y = carat)) +
geom_boxplot()

diamonds %>%
ggplot() +
geom_boxplot(mapping = aes(x = cut, y = carat, color = cut))

diamonds_filter <- diamonds %>%
filter(carat<3)

ggplot(data = diamonds_filter) +
geom_histogram(mapping = aes(x = carat), binwidth = 0.01)

ggplot(data = diamonds_filter, mapping = aes(x = carat, color = cut))+
geom_freqpoly(binwidth = 0.1)
ggplot(data = diamonds_filter,
mapping = aes(x = carat, color = cut))+
geom_freqpoly(binwidth = 0.01)

# * Cuales son los valores más comunes? Por qué?
# * Cuales son los valores más raros? Por qué? Cumple con lo que esperábamos?
Expand All @@ -57,10 +65,20 @@ ggplot(data = faithful, mapping = aes(x = eruptions)) +
geom_histogram(binwidth = 0.2)

# outliers
ggplot(diamonds) +
geom_histogram(mapping = aes(x = y), binwidth = 0.5) +
ggplot(diamonds) +
geom_histogram(mapping = aes(x = y), binwidth = 0.5) +
coord_cartesian(ylim = c(0,100))

diamonds %>%
ggplot(mapping=aes(x = price)) +
geom_histogram(binwidth = 100)

diamonds %>%
filter(price > 18000) %>%
ggplot(mapping = aes(x = y))+
geom_histogram()


unusual_diamonds <- diamonds %>%
filter(y<2 | y >30) %>%
select(price, x,y,z) %>%
Expand All @@ -77,7 +95,8 @@ good_diamonds <- diamonds %>%

?ifelse

ggplot(data = good_diamonds, mapping = aes(x = x, y = y)) +
ggplot(data = good_diamonds,
mapping = aes(x = x, y = y)) +
geom_point(na.rm = T)

nycflights13::flights %>%
Expand Down Expand Up @@ -139,13 +158,13 @@ ggplot(good_diamonds) +
# Categoría vs Contínua

ggplot(data = diamonds, mapping = aes(x = price)) +
geom_freqpoly(mapping = aes(color = cut), binwidth = 500)
geom_freqpoly(mapping = aes(color = cut), binwidth = 50)

ggplot(diamonds) +
geom_bar(mapping = aes(x = cut))

ggplot(data = diamonds, mapping = aes(x = price, y = ..density..)) +
geom_freqpoly(mapping = aes(color = cut), binwidth = 500)
geom_freqpoly(mapping = aes(color = cut), binwidth = 100)

ggplot(data = diamonds, mapping = aes(x = cut, y = price)) +
geom_boxplot()
Expand Down Expand Up @@ -182,7 +201,8 @@ diamonds %>%
# Contínua vs Contínua

ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price), alpha = 0.01)
geom_point(mapping = aes(x = carat, y = price),
alpha = 0.01)

install.packages("hexbin")
library(hexbin)
Expand Down
3 changes: 2 additions & 1 deletion scripts/tema2/01-tibbles.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ t <- tibble(
z = y * x ^ 2
)

View(t)
t[2,3]

t2 <- tibble(
Expand All @@ -21,7 +22,7 @@ t2 <- tibble(
`1988` = "number"
)

t2
t2$`:)`

tribble(
~x, ~y, ~z,
Expand Down
1 change: 1 addition & 0 deletions scripts/tema2/02-data-import.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ read_csv("Este fichero fue generado por Juan Gabriel
4,5,6", skip = 3)

read_csv("#Esto es un comentario
#Me aburro en clase del master...
x,y,z
1,2,3
4,5,6", comment = "#")
Expand Down
2 changes: 1 addition & 1 deletion scripts/tema4/04-gapminder.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ by_country <- by_country %>%
mutate(model = map(data, country_model))

by_country %>%
filter(continent == "Europe")
filter(continent == "Europe")

by_country %>%
arrange(continent, country)
Expand Down

0 comments on commit 37eeb4b

Please sign in to comment.