Eine Shiny App
Zum Ausprobieren einfach den Code in ein neues Skript kopieren, erforderliche Pakete installieren und in RStudio auf Run App
klicken – mehr Infos gibts hier.
library(shiny)
library(shinythemes)
library(psych)
library(ggplot2)
library(ggridges) # geom_density_ridges
library(ggthemes)
library(ggpubr) # theme_pubr(margin = F) + labs_pubr()
library(plotly) # for plots with zoom functions and much more
library(reshape2) # for melting and casting data frames
library(DT) # for better tables
# plot theme variations
economist = theme_economist() # requires library(ggthemes)
grey = theme_grey() # requires library(ggthemes)
pubr = theme_pubr(margin = F) + labs_pubr() # requires library(pubr)
clean = theme_tufte() + labs_pubr() + clean_theme() # requires library(pubr) and library(ggthemes)
## %######################################################%##
#### Data Set for Central Limit Theorem ####
## %######################################################%##
# generate 5000 values from an exponential distribution
# with rate parameter=0.2
exp_data = rexp(n = 5000, rate = 0.2)
# find mean of 5 values from an exponential distribution (repeat 5000 times)
exp_5 = array(NA, dim = 5000)
for (i in 1:5000) {
exp_5[i] = mean(rexp(n = 5, rate = 0.2))
}
# find mean of 50 values from an exponential distribution (repeat 5000 times)
exp_50 = array(NA, dim = 5000)
for (i in 1:5000) {
exp_50[i] = mean(rexp(n = 50, rate = 0.2))
}
# generate data frame
d <- data.frame(exp_data, exp_5, exp_50)
# rename columns to make them nicer in the plot's legend
names(d) <- c("N = 1", "N = 5", "N = 50")
# melt data so just one variable can be called to represent the three columns
d <- melt(d)
## %######################################################%##
#### Shiny App ####
## %######################################################%##
# Define UI for random distribution app ----
ui <- fluidPage(
# change the theme
# to quickly check which one you like, add shinythemes::themeSelector() instead of shinytheme("paper") # others: "superhero"
theme = shinytheme("lumen"),
# App title ----
titlePanel("Normal? Distributions"),
# Sidebar layout with input and output definitions ----
sidebarLayout(
# Sidebar panel for inputs ----
sidebarPanel(
# br() element to introduce extra vertical spacing ----
# conditional panel to enable/disable certain features only when they are (not) applicable depending on the numerical value of the tabs
conditionalPanel(condition = "input.tabs<=2",
# Input: slider for the number of observations to generate ----
numericInput("n",
"Number of Observations:",
value = 500,
min = 1,
max = 12000),
# Input: radio button for categorical selection,
# alternative: selectInput
radioButtons("dist",
"Distribution",
c("Normal" = "rnorm",
"Exponential" = "rexp",
"Beta" = "rbeta"
# "LogNormal" = "rlnorm"
), inline = T),
# checkbox to indicate whether a
# transformation is to be applied
selectInput("z",
"Transformation:",
c("None" = "none",
"z (mean = 0, sd = 1)" = "scale",
"Logarithm" = "log",
"Square Root" = "sqrt")
),
# conditional sidebar elements depending
# on the choice of distribution
uiOutput("slidermean"),
uiOutput("slidersd"),
uiOutput("sliderrate"),
uiOutput("slidershape1"),
uiOutput("slidershape2")
),
# conditional panel to enable/disable certain features only when they are (not) applicable depending on the numerical value of the tabs
conditionalPanel(condition = "input.tabs<=3&input.tabs!=1",
sliderInput("bin",
"Bar Width/Point Size:",
value = 1.55,
min = .01,
max = 10),
sliderInput("alpha",
"Opacity",
value = .6,
min = .1,
max = 1),
# Input: radio
# button for
# categorical
# selection,
# alternative:
# selectInput
# ----
radioButtons("col",
"Color:",
c("Pink" = "deeppink1",
"Black" = "black",
"Blue" = "navyblue",
"Green" = "darkolivegreen",
"Magenta" = "darkmagenta"),
inline = T
),
# drop down list
selectInput("theme",
"Theme:",
c("Publication" = "pubr",
"Economist" = "economist",
"Grey" = "grey",
"Clean" = "clean")
)
),
# adjust width of side panel
width = 3
),
# Main panel for displaying outputs ----
mainPanel(
# Output: Tabset w/ plots, summary, and table ----
# id is used to refer back to the individual panels for the
# condiditional side bar elements depending on the value
tabsetPanel(id = "tabs", type = "tabs",
# histogram with mean, uses ncount
tabPanel("nCount Plot", value = 2,
plotlyOutput("plot1", height = "550px"),
fluidRow(
column(9, offset = 0,
h6("Test")
)
)
),
# histogram with mean, +/- sd and t-significant areas
# uses density statistic
tabPanel("Density Plot", value = 2,
plotlyOutput("plot2", height = "550px")),
# qq plot
tabPanel("QQ Plot", value = 2,
plotlyOutput("plot3", height = "550px")),
# boxplot with underlayed individual data points
tabPanel("Boxplot", value = 2,
plotlyOutput("plot4", height = "550px")),
# descriptive stats of generated data set
tabPanel("Descriptive Stats", value = 1,
fluidRow(
h3("Describe"),
dataTableOutput("table"),
h3("Head"),
dataTableOutput("table4")
)
),
# plot illustrating the cental limit
# theorem with exponential distribution
tabPanel("CLT1", value = 3,
plotOutput("plot5", height = "550px")),
# classical plots with
# means, sd error and
# lines conntecting the means
tabPanel("CLT2", value = 3,
plotOutput("plot6", height = "550px")),
# descriptive stats of exponential data
tabPanel("CLT Data", value = 4,
# multiple outputs in one panel via fluidRow
fluidRow(
# Title element
h3("DescribeBy"),
dataTableOutput("table2"),
h3("Head"),
dataTableOutput("table3")
))
),
# adjust width of main panel
width = 9)
)
)
# Define server logic for random distribution app ----
server <- function(input, output) {
# Reactive expression to generate the requested data set according to n, mean and sd ----
# This is called whenever the inputs change. The output functions
# defined below then use the value computed from this expression
# generates the same dataset for all the manipulatable plots and tables
# this allows the user to jump from numeric representations to
# different types of visualization
x <- reactive({
# react to the checkbox for z-transformation:
# if FALSE, do the usual
if (input$z == "none") {
if (input$dist == "rnorm")
{x <- rnorm(input$n, mean = input$mean, sd = input$sd)}
# else if(input$dist == "rlnorm")
# {x <- rlnorm(input$n, meanlog = input$mean, sdlog=input$sd)}
else if (input$dist == "rexp") {
x <- rexp(input$n, rate = input$rate)
}
else {
x <- rbeta(input$n, input$shape1, input$shape2)
}
}
# if TRUE, apply the transformation specified by the input
# either scale, log or sqrt
else {
if (input$dist == "rnorm")
{x <- get(input$z)(rnorm(input$n, mean = input$mean,
sd = input$sd))}
# else if(input$dist == "rlnorm")
# {x <- rlnorm(input$n, meanlog = input$mean, sdlog=input$sd)}
else if (input$dist == "rexp") {
x <- get(input$z)(rexp(input$n, rate = input$rate))
}
else {
x <- get(input$z)(rbeta(input$n, input$shape1, input$shape2))
}
}
})
# Generate a plot of the data ----
# Also uses the inputs to build the plot label. Note that the
# dependencies on the inputs and the data reactive expression are
# both tracked, and all expressions are called in the sequence
# implied by the dependency graph.
# histogram with mean
# uses ncount statistic
output$plot1 <- renderPlotly({
# dist <- input$dist
# compute descriptive stats, will use skew and kurtosis later
s <- describe(x())
# extract mean and sd
mean <- round(s[3], digits = 4)
sd <- round(s[4], digits = 4)
# extract skew
skew <- round(s[11], digits = 4)
# extract kurtosis
kurtosis <- round(s[12], digits = 4)
# add kolmogorov-smirnov normality test
# mean and sd have the same values as in the sampling distribution
ks <- ks.test(x(), "pnorm", mean = mean(x()), sd = sd(x()))
ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
text <- paste(" Mean: ", mean, "\n Std. Dev: ",
sd, "\n Skew: ", skew, "\n Kurtosis: ", kurtosis,
"\n Kolmogorov-Smirnov p: ", ksp)
ggplot() + aes(x()) +
geom_histogram(aes(y = ..ncount..), binwidth = input$bin,
fill = input$col, alpha = input$alpha) +
labs(x = paste("Number of Observations: ", input$n), y = "") +
# annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1) +
get(input$theme) +
# scale_x_continuous(limits = c(input$mean-(5*input$sd), input$mean+(5*input$sd))) +
geom_vline(aes(xintercept = mean(x(), na.rm = T)),
color = "black", linetype = "dotted", size = 1)
})
# histogram with mean, +/- sd and t-significant areas
# uses density statistic
output$plot2 <- renderPlotly({
# dist <- input$dist
# compute descriptive stats, will use skew and kurtosis later
s <- describe(x())
# extract mean and sd
mean <- round(s[3], digits = 4)
sd <- round(s[4], digits = 4)
# extract skew
skew <- round(s[11], digits = 4)
# extract kurtosis
kurtosis <- round(s[12], digits = 4)
# add kolmogorov-smirnov normality test
# mean and sd have the same values as in the sampling distribution
ks <- ks.test(x(), "pnorm", mean = mean(x()),
sd = sd(x()))
ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
text <- paste(" Mean: ", mean, "\n Std. Dev: ",
sd, "\n Skew: ", skew, "\n Kurtosis: ", kurtosis,
"\n Kolmogorov-Smirnov p: ", ksp)
# thresholds for shaded areas;
# modelled after two-tailed
# t-distribution
thresholdup <- quantile(x(), prob = 0.975)[[1]]
thresholddown <- quantile(x(), prob = 0.025)[[1]]
ggplot() + aes(x()) +
geom_histogram(aes(y = ..density..), binwidth = input$bin, fill = input$col, alpha = input$alpha) +
stat_function(fun = dnorm,
args = list(mean = mean(x()), sd = sd(x())),
xlim = c(mean(x()), mean(x()) + sd(x())),
geom = "area", color = "black", fill = "transparent",
size = .7, linetype = "dotted") +
stat_function(fun = dnorm, args = list(mean = mean(x()),
sd = sd(x())),
xlim = c(mean(x()) - sd(x()), mean(x())), geom = "area",
color = "black", fill = "transparent", size = .7,
linetype = "dotted") +
stat_function(fun = dnorm, args = list(mean = mean(x()),
sd = sd(x())),
xlim = c(thresholdup, max(x())), geom = "area",
fill = "black", alpha = .4) +
stat_function(fun = dnorm, args = list(mean = mean(x()),
sd = sd(x())),
xlim = c(min(x()), thresholddown), geom = "area",
fill = "black", alpha = .4) +
stat_function(fun = dnorm, args = list(mean = mean(x()),
sd = sd(x())),
color = "black", size = .7, alpha = .5) +
labs(x = paste("Number of Observations: ", input$n), y = "") +
# annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1) +
# scale_x_continuous(limits = c(input$mean-(5*input$sd), input$mean+(5*input$sd))) +
get(input$theme)
})
# qq plot
output$plot3 <- renderPlotly({
# compute descriptive stats, will use skew and kurtosis later
s <- describe(x())
# extract mean and sd
mean <- round(s[3], digits = 4)
sd <- round(s[4], digits = 4)
# extract skew
skew <- round(s[11], digits = 4)
# extract kurtosis
kurtosis <- round(s[12], digits = 4)
# add kolmogorov-smirnov normality test
# mean and sd have the same values as in the sampling distribution
ks <- ks.test(x(), "pnorm", mean = mean(x()), sd = sd(x()))
ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
text <- paste(" Mean: ", mean, "\n Std. Dev: ", sd, "\n Skew: ",
skew, "\n Kurtosis: ", kurtosis,
"\n Kolmogorov-Smirnov p: ", ksp)
ggplot() + aes(sample = x()) + geom_qq(size = input$bin,
color = input$col,
alpha = input$alpha) +
get(input$theme) + geom_abline(intercept = mean(x()),
slope = sd(x()), color = "black",
size = 1, alpha = 0.8) +
labs(x = "", y = "") # + annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1)
})
# boxplot with underlayed individual data points
output$plot4 <- renderPlotly({
# compute descriptive stats, will use skew and kurtosis later
s <- psych::describe(x())
# extract mean and sd
mean <- round(s[3], digits = 4)
sd <- round(s[4], digits = 4)
# extract skew
skew <- round(s[11], digits = 4)
# extract kurtosis
kurtosis <- round(s[12], digits = 4)
# add kolmogorov-smirnov normality test
# mean and sd have the same values as in the sampling distribution
ks <- ks.test(x(), "pnorm", mean = mean(x()), sd = sd(x()))
ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
text <- paste(" Mean: ", mean, "\n Std. Dev: ", sd,
"\n Skew: ", skew, "\n Kurtosis: ", kurtosis,
"\n Kolmogorov-Smirnov p: ", ksp)
ggplot() + aes(y = x()) +
geom_jitter(aes(x = 0), color = input$col, alpha = input$alpha / 3,
size = input$bin, width = 0.37) +
geom_boxplot(outlier.shape = NA, size = .7,
outlier.color = NA) +
get(input$theme) + labs(x = "", y = "") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) # + annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1)
})
# describe function including IQR and quantiles
output$table <- renderDataTable({
# describe the data, including IQR and Quantiles, requires library(psych)
statrandom <- describe(x(), IQR = T, quant = c(.25, .75))
# rename the columns
names(statrandom) <- c("Row", "Observations", "Mean",
"Standard Deviation", "Median", "Trimmed",
"MAD", "Min", "Max", "Range", "Skew", "Kurtosis",
"Standard Error", "IQR", "Q 0.25", "Q 0.75")
# to reduce the number of digits, embedd in print command
print(statrandom[2:length(statrandom)], digits = 3)
# do not display the uninformative row names
}, rownames = FALSE)
# plots detailing the central limit
# theorem by drawing from exponential
# distribution with increasing number
# of means, see above
output$plot5 <- renderPlot({
# newer version, requires library(ggridges)
ggplot(d, aes(x = value, y = variable)) +
geom_density_ridges(aes(fill = variable, color = variable),
scale = input$bin, alpha = input$alpha) +
get(input$theme) + scale_x_continuous(limits = c(-1, 15)) +
labs(x = "Mean of n = 1, 5 and 50 Data Points from Exponential Distribution taken 5000 times",
y = "", fill = "Number of Observations for Mean",
color = "Number of Observations for Mean") +
scale_fill_manual(values = c("deeppink1", "darkmagenta", "navyblue")) +
scale_color_manual(values = c("deeppink1", "darkmagenta", "navyblue"))
})
# classical plots with means, sd error and lines conntecting them
output$plot6 <- renderPlot({
ggplot(d, aes(x = variable, y = value, color = variable,
shape = variable, group = variable)) +
stat_summary(fun.y = mean, geom = "line", group = 1,
color = input$col) +
stat_summary(fun.y = mean, geom = "point", size = input$bin) +
get(input$theme) + scale_fill_manual(values = c("deeppink1",
"darkmagenta",
"navyblue")) +
scale_color_manual(values = c("deeppink1", "darkmagenta", "navyblue")) +
stat_summary(fun.data = mean_se, geom = "errorbar",
width = input$bin / 10, color = input$col) +
labs(x = "Mean of n = 1, 5 and 50 Data Points from Exponential Distribution taken 5000 times",
y = "",
fill = "Number of Observations for Mean",
color = "Number of Observations for Mean",
shape = "Number of Observations for Mean",
group = "Number of Observations for Mean")
})
# describe CLT data by factor levels
output$table2 <- renderDataTable({
# describe the data, requires library(psych)
stat <- describeBy(d$value, d$variable, mat = T, digits = 3)
# rename the columns
names(stat) <- c("Row", "Group", "Vars", "Observations", "Mean",
"Standard Deviation", "Median", "Trimmed", "MAD",
"Min", "Max", "Range", "Skew", "Kurtosis",
"Standard Error")
stat[2:length(stat)]
# do not display the uninformative row names
}, rownames = FALSE)
# show CLT data
output$table3 <- renderDataTable({
# format function to display less decimal numbers
format(head(d, n = length(d$variable)), digits = 2)
# do not display the uninformative row names
}, rownames = FALSE)
# show dynamic data
output$table4 <- renderDataTable({
# format function to display less decimal numbers
data <- as.data.frame(x())
names(data) <- "Data"
format(head(data, n = input$n), digits = 2)
# do not display the uninformative row names
}, rownames = FALSE)
# CONditionally apparent UI elements ----
output$slidermean <- renderUI({
if (input$dist %in% c("rnorm", "rlnorm")) {
sliderInput("mean",
"Mean",
value = 150,
min = 125,
max = 175)
}
})
output$slidersd <- renderUI({
if (input$dist %in% c("rnorm", "rlnorm")) {
sliderInput("sd",
"SD",
value = 15,
min = 0.1,
max = 30)
}
})
output$sliderrate <- renderUI({
if (input$dist == "rexp") {
sliderInput("rate",
"Rate",
value = 0.2,
min = 0.01,
max = 0.9)
}
})
output$slidershape1 <- renderUI({
if (input$dist == "rbeta") {
sliderInput("shape1",
"Shape 1",
value = 5,
min = 0.01,
max = 10)
}
})
output$slidershape2 <- renderUI({
if (input$dist == "rbeta") {
sliderInput("shape2",
"Shape 2",
value = 2,
min = 0.01,
max = 10)
}
})
# END OF: server <- function(input, output) {
}
# Create Shiny app ----
shinyApp(ui, server)