22 Sin Ming Lane #06-76 Midview City Singapore 573969
+6593361596
jsypconsulting888@gmail.com

Forums

Deep Tech, Financial Markets, Fintech, Blockchain ICOs, ITSec, and High Value IT Projects

Install dslabs pack...
 
Notifications
Clear all

Install dslabs package in R


(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

install.packages('package-name',repos='http://cran.us.r-project.org')


   
Quote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

factors in r

nlevels(movielens$genres)

> library()
> library(dslabs)
> str(murders)


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# defining murder rate as before
murder_rate <- murders$total / murders$population * 100000
# creating a logical vector that specifies if the murder rate in that state is less than or equal to 0.71
index <- murder_rate <= 0.71
# determining which states have murder rates less than or equal to 0.71
murders$state[index]
# calculating how many states have a murder rate less than or equal to 0.71
sum(index)

# creating the two logical vectors representing our conditions
west <- murders$region == "West"
safe <- murder_rate <= 1
# defining an index and identifying states with both conditions true
index <- safe & west
murders$state[index]


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# to determine the murder rate in Massachusetts we may do the following
ind <- which(murders$state == "Massachusetts")
murder_rate[ind]

# to obtain the indices and subsequent murder rates of New York, Florida, Texas, we do:
ind <- match(c("New York", "Florida", "Texas"), murders$state)
ind
murder_rate[ind]

# to see if Boston, Dakota, and Washington are states
c("Boston", "Dakota", "Washington") %in% murders$state


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# installing and loading the dplyr package
install.packages("dplyr")
library(dplyr)

# adding a column with mutate
library(dslabs)
data("murders")
murders <- mutate(murders, rate = total / population * 100000)

# subsetting with filter
filter(murders, rate <= 0.71)

# selecting columns with select
new_table <- select(murders, state, region, rate)

# using the pipe
murders %>% select(state, region, rate) %>% filter(rate <= 0.71)


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# creating a data frame with stringAsFactors = FALSE
grades <- data.frame(names = c("John", "Juan", "Jean", "Yao"),
exam_1 = c(95, 80, 90, 85),
exam_2 = c(90, 85, 85, 90),
stringsAsFactors = FALSE)


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# a simple scatterplot of total murders versus population
x <- murders$population / 10^6
y <- murders$total
plot(x, y)

# a histogram of murder rates
hist(murders$rate)

# boxplots of murder rates by region
boxplot(rate~region, data = murders)


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# an example showing the general structure of an if-else statement
a <- 0
if(a!=0){
print(1/a)
} else{
print("No reciprocal for 0.")
}

# an example that tells us which states, if any, have a murder rate less than 0.5
library(dslabs)
data(murders)
murder_rate <- murders$total / murders$population*100000
ind <- which.min(murder_rate)
if(murder_rate[ind] < 0.5){
print(murders$state[ind])
} else{
print("No state has murder rate that low")
}

# changing the condition to < 0.25 changes the result
if(murder_rate[ind] < 0.25){
print(murders$state[ind])
} else{
print("No state has a murder rate that low.")
}

# the ifelse() function works similarly to an if-else conditional
a <- 0
ifelse(a > 0, 1/a, NA)

# the ifelse() function is particularly useful on vectors
a <- c(0,1,2,-4,5)
result <- ifelse(a > 0, 1/a, NA)

# the ifelse() function is also helpful for replacing missing values
data(na_example)
no_nas <- ifelse(is.na(na_example), 0, na_example)
sum(is.na(no_nas))

# the any() and all() functions evaluate logical vectors
z <- c(TRUE, TRUE, FALSE)
any(z)
all(z)


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# example of defining a function to compute the average of a vector x
avg <- function(x){
s <- sum(x)
n <- length(x)
s/n
}

# we see that the above function and the pre-built R mean() function are identical
x <- 1:100
identical(mean(x), avg(x))

# variables inside a function are not defined in the workspace
s <- 3
avg(1:10)
s

# the general form of a function
my_function <- function(VARIABLE_NAME){
perform operations on VARIABLE_NAME and calculate VALUE
VALUE
}

# functions can have multiple arguments as well as default values
avg <- function(x, arithmetic = TRUE){
n <- length(x)
ifelse(arithmetic, sum(x)/n, prod(x)^(1/n))
}
 


   
ReplyQuote
(@admin)
Noble Member Admin
Joined: 5 years ago
Posts: 620
Topic starter  

# creating a function that computes the sum of integers 1 through n
compute_s_n <- function(n){
x <- 1:n
sum(x)
}

# a very simple for-loop
for(i in 1:5){
print(i)

# a for-loop for our summation
m <- 25
s_n <- vector(length = m) # create an empty vector
for(n in 1:m){
s_n[n] <- compute_s_n(n)
}

# creating a plot for our summation function
n <- 1:m
plot(n, s_n)

# a table of values comparing our function to the summation formula
head(data.frame(s_n = s_n, formula = n*(n+1)/2))

# overlaying our function with the summation formula
plot(n, s_n)
lines(n, n*(n+1)/2)


   
ReplyQuote
Share: