Eine Shiny App

Zum Ausprobieren einfach den Code in ein neues Skript kopieren, erforderliche Pakete installieren und in RStudio auf Run App klicken – mehr Infos gibts hier.

library(shiny)
library(shinythemes)
library(psych)
library(ggplot2)
library(ggridges) # geom_density_ridges
library(ggthemes)
library(ggpubr) # theme_pubr(margin = F) + labs_pubr()
library(plotly) # for plots with zoom functions and much more
library(reshape2) # for melting and casting data frames
library(DT) # for better tables

# plot theme variations
economist = theme_economist() # requires library(ggthemes)
grey = theme_grey() # requires library(ggthemes)
pubr = theme_pubr(margin = F) + labs_pubr() # requires library(pubr)
clean = theme_tufte() + labs_pubr() + clean_theme() # requires library(pubr) and library(ggthemes)

## %######################################################%##
####         Data Set for Central Limit Theorem         ####
## %######################################################%##

# generate 5000 values from an exponential distribution
# with rate parameter=0.2
exp_data = rexp(n = 5000, rate = 0.2)
# find mean of 5 values from an exponential distribution (repeat 5000 times)
exp_5 = array(NA, dim = 5000)
for (i in 1:5000) {
  exp_5[i] = mean(rexp(n = 5, rate = 0.2))
}
# find mean of 50 values from an exponential distribution (repeat 5000 times)
exp_50 = array(NA, dim = 5000)
for (i in 1:5000) {
  exp_50[i] = mean(rexp(n = 50, rate = 0.2))
}

# generate data frame
d <- data.frame(exp_data, exp_5, exp_50)
# rename columns to make them nicer in the plot's legend
names(d) <- c("N = 1", "N = 5", "N = 50")
# melt data so just one variable can be called to represent the three columns
d <- melt(d)


## %######################################################%##
####                     Shiny App                      ####
## %######################################################%##

# Define UI for random distribution app ----
ui <- fluidPage(
  # change the theme
  # to quickly check which one you like, add shinythemes::themeSelector() instead of shinytheme("paper") # others: "superhero"
  theme = shinytheme("lumen"),

  # App title ----
  titlePanel("Normal? Distributions"),

  # Sidebar layout with input and output definitions ----
  sidebarLayout(

    # Sidebar panel for inputs ----
    sidebarPanel(
      # br() element to introduce extra vertical spacing ----

      # conditional panel to enable/disable certain features only when they are (not) applicable depending on the numerical value of the tabs
      conditionalPanel(condition = "input.tabs<=2",
        # Input: slider for the number of observations to generate ----
        numericInput("n",
          "Number of Observations:",
          value = 500,
          min = 1,
          max = 12000),
        # Input: radio button for categorical selection,
        # alternative: selectInput
        radioButtons("dist",
          "Distribution",
          c("Normal" = "rnorm",
            "Exponential" = "rexp",
            "Beta" = "rbeta"
            # "LogNormal" = "rlnorm"
        ), inline = T),
        # checkbox to indicate whether a
        # transformation is to be applied
        selectInput("z",
          "Transformation:",
          c("None" = "none",
            "z (mean = 0, sd = 1)" = "scale",
            "Logarithm" = "log",
            "Square Root" = "sqrt")
        ),

        # conditional sidebar elements depending
        # on the choice of distribution
        uiOutput("slidermean"),
        uiOutput("slidersd"),
        uiOutput("sliderrate"),
        uiOutput("slidershape1"),
        uiOutput("slidershape2")
      ),
      # conditional panel to enable/disable certain features only when they are (not) applicable depending on the numerical value of the tabs
      conditionalPanel(condition = "input.tabs<=3&input.tabs!=1",
        sliderInput("bin",
          "Bar Width/Point Size:",
          value = 1.55,
          min = .01,
          max = 10),

        sliderInput("alpha",
          "Opacity",
          value = .6,
          min = .1,
          max = 1),
        # Input: radio
        # button for
        # categorical
        # selection,
        # alternative:
        # selectInput
        # ----
        radioButtons("col",
          "Color:",
          c("Pink" = "deeppink1",
            "Black" = "black",
            "Blue" = "navyblue",
            "Green" = "darkolivegreen",
            "Magenta" = "darkmagenta"),
          inline = T
        ),
        # drop down list
        selectInput("theme",
          "Theme:",
          c("Publication" = "pubr",
            "Economist" = "economist",
            "Grey" = "grey",
            "Clean" = "clean")
        )
      ),

      # adjust width of side panel
      width = 3

    ),

    # Main panel for displaying outputs ----
    mainPanel(

      # Output: Tabset w/ plots, summary, and table ----
      # id is used to refer back to the individual panels for the
      # condiditional side bar elements depending on the value
      tabsetPanel(id = "tabs", type = "tabs",
        # histogram with mean, uses ncount
        tabPanel("nCount Plot", value = 2,
          plotlyOutput("plot1", height = "550px"),
          fluidRow(
            column(9, offset = 0,
              h6("Test")
            )
          )
        ),
        # histogram with mean, +/- sd and t-significant areas
        # uses density statistic
        tabPanel("Density Plot", value = 2,
          plotlyOutput("plot2", height = "550px")),
        # qq plot
        tabPanel("QQ Plot", value = 2,
          plotlyOutput("plot3", height = "550px")),
        # boxplot with underlayed individual data points
        tabPanel("Boxplot", value = 2,
          plotlyOutput("plot4", height = "550px")),
        # descriptive stats of generated data set
        tabPanel("Descriptive Stats", value = 1,
          fluidRow(
            h3("Describe"),
            dataTableOutput("table"),
            h3("Head"),
            dataTableOutput("table4")
          )
        ),
        # plot illustrating the cental limit
        # theorem with exponential distribution
        tabPanel("CLT1", value = 3,
          plotOutput("plot5", height = "550px")),
        # classical plots with
        # means, sd error and
        # lines conntecting the means
        tabPanel("CLT2", value = 3,
          plotOutput("plot6", height = "550px")),
        # descriptive stats of exponential data
        tabPanel("CLT Data", value = 4,
          # multiple outputs in one panel via fluidRow
          fluidRow(
            # Title element
            h3("DescribeBy"),
            dataTableOutput("table2"),
            h3("Head"),
            dataTableOutput("table3")
        ))

      ),
      # adjust width of main panel
      width = 9)
  )
)

# Define server logic for random distribution app ----
server <- function(input, output) {

  # Reactive expression to generate the requested data set according to n, mean and sd ----
  # This is called whenever the inputs change. The output functions
  # defined below then use the value computed from this expression

  # generates the same dataset for all the manipulatable plots and tables
  # this allows the user to jump from numeric representations to
  # different types of visualization
  x <- reactive({
    # react to the checkbox for z-transformation:
    # if FALSE, do the usual
    if (input$z == "none") {
      if (input$dist == "rnorm")
        {x <- rnorm(input$n, mean = input$mean, sd = input$sd)}
      # else if(input$dist == "rlnorm")
      # {x <- rlnorm(input$n, meanlog = input$mean, sdlog=input$sd)}
      else if (input$dist == "rexp") {
        x <- rexp(input$n, rate = input$rate)
      }
      else {
        x <- rbeta(input$n, input$shape1, input$shape2)
      }
    }
    # if TRUE, apply the transformation specified by the input
    # either scale, log or sqrt
    else {
      if (input$dist == "rnorm")
        {x <- get(input$z)(rnorm(input$n, mean = input$mean,
          sd = input$sd))}
      # else if(input$dist == "rlnorm")
      # {x <- rlnorm(input$n, meanlog = input$mean, sdlog=input$sd)}
      else if (input$dist == "rexp") {
        x <- get(input$z)(rexp(input$n, rate = input$rate))
      }
      else {
        x <- get(input$z)(rbeta(input$n, input$shape1, input$shape2))
      }
    }

  })

  # Generate a plot of the data ----
  # Also uses the inputs to build the plot label. Note that the
  # dependencies on the inputs and the data reactive expression are
  # both tracked, and all expressions are called in the sequence
  # implied by the dependency graph.

  # histogram with mean
  # uses ncount statistic
  output$plot1 <- renderPlotly({
    #  dist <- input$dist
    # compute descriptive stats, will use skew and kurtosis later
    s <- describe(x())
    # extract mean and sd
    mean <- round(s[3], digits = 4)
    sd <- round(s[4], digits = 4)
    # extract skew
    skew <- round(s[11], digits = 4)
    # extract kurtosis
    kurtosis <- round(s[12], digits = 4)
    # add kolmogorov-smirnov normality test
    # mean and sd have the same values as in the sampling distribution
    ks <- ks.test(x(), "pnorm", mean = mean(x()), sd = sd(x()))
    ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
    text <- paste(" Mean: ", mean, "\n Std. Dev: ",
      sd, "\n Skew: ", skew, "\n Kurtosis: ", kurtosis,
      "\n Kolmogorov-Smirnov p: ", ksp)
    ggplot() + aes(x()) +
      geom_histogram(aes(y = ..ncount..), binwidth = input$bin,
        fill = input$col, alpha = input$alpha) +
      labs(x = paste("Number of Observations: ", input$n), y = "") +
      #  annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1) +
      get(input$theme) +
      # scale_x_continuous(limits =  c(input$mean-(5*input$sd), input$mean+(5*input$sd))) +
      geom_vline(aes(xintercept = mean(x(), na.rm = T)),
        color = "black", linetype = "dotted", size = 1)
  })

  # histogram with mean, +/- sd and t-significant areas
  # uses density statistic
  output$plot2 <- renderPlotly({
    #  dist <- input$dist
    # compute descriptive stats, will use skew and kurtosis later
    s <- describe(x())
    # extract mean and sd
    mean <- round(s[3], digits = 4)
    sd <- round(s[4], digits = 4)
    # extract skew
    skew <- round(s[11], digits = 4)
    # extract kurtosis
    kurtosis <- round(s[12], digits = 4)
    # add kolmogorov-smirnov normality test
    # mean and sd have the same values as in the sampling distribution
    ks <- ks.test(x(), "pnorm", mean = mean(x()),
      sd = sd(x()))
    ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
    text <- paste(" Mean: ", mean, "\n Std. Dev: ",
      sd, "\n Skew: ", skew, "\n Kurtosis: ", kurtosis,
      "\n Kolmogorov-Smirnov p: ", ksp)

    # thresholds for shaded areas;
    # modelled after two-tailed
    # t-distribution
    thresholdup <- quantile(x(), prob = 0.975)[[1]]
    thresholddown <- quantile(x(), prob = 0.025)[[1]]

    ggplot() + aes(x()) +
      geom_histogram(aes(y = ..density..), binwidth = input$bin, fill = input$col, alpha = input$alpha) +
      stat_function(fun = dnorm,
        args = list(mean = mean(x()), sd = sd(x())),
        xlim = c(mean(x()), mean(x()) + sd(x())),
        geom = "area", color = "black", fill = "transparent",
        size = .7, linetype = "dotted") +
      stat_function(fun = dnorm, args = list(mean = mean(x()),
        sd = sd(x())),
      xlim = c(mean(x()) - sd(x()), mean(x())), geom = "area",
      color = "black", fill = "transparent", size = .7,
      linetype = "dotted") +
      stat_function(fun = dnorm, args = list(mean = mean(x()),
        sd = sd(x())),
      xlim = c(thresholdup, max(x())), geom = "area",
      fill = "black", alpha = .4) +
      stat_function(fun = dnorm, args = list(mean = mean(x()),
        sd = sd(x())),
      xlim = c(min(x()), thresholddown), geom = "area",
      fill = "black", alpha = .4) +
      stat_function(fun = dnorm, args = list(mean = mean(x()),
        sd = sd(x())),
      color = "black", size = .7, alpha = .5) +
      labs(x = paste("Number of Observations: ", input$n), y = "") +
      # annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1) +
      # scale_x_continuous(limits = c(input$mean-(5*input$sd), input$mean+(5*input$sd))) +
      get(input$theme)
  })


  # qq plot
  output$plot3 <- renderPlotly({
    # compute descriptive stats, will use skew and kurtosis later
    s <- describe(x())
    # extract mean and sd
    mean <- round(s[3], digits = 4)
    sd <- round(s[4], digits = 4)
    # extract skew
    skew <- round(s[11], digits = 4)
    # extract kurtosis
    kurtosis <- round(s[12], digits = 4)
    # add kolmogorov-smirnov normality test
    # mean and sd have the same values as in the sampling distribution
    ks <- ks.test(x(), "pnorm", mean = mean(x()), sd = sd(x()))
    ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
    text <- paste(" Mean: ", mean, "\n Std. Dev: ", sd, "\n Skew: ",
      skew, "\n Kurtosis: ", kurtosis,
      "\n Kolmogorov-Smirnov p: ", ksp)
    ggplot() + aes(sample = x()) + geom_qq(size = input$bin,
      color = input$col,
      alpha = input$alpha) +
      get(input$theme) + geom_abline(intercept = mean(x()),
        slope = sd(x()), color = "black",
        size = 1, alpha = 0.8) +
      labs(x = "", y = "") # + annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1)
  })


  # boxplot with underlayed individual data points
  output$plot4 <- renderPlotly({
    # compute descriptive stats, will use skew and kurtosis later
    s <- psych::describe(x())
    # extract mean and sd
    mean <- round(s[3], digits = 4)
    sd <- round(s[4], digits = 4)
    # extract skew
    skew <- round(s[11], digits = 4)
    # extract kurtosis
    kurtosis <- round(s[12], digits = 4)
    # add kolmogorov-smirnov normality test
    # mean and sd have the same values as in the sampling distribution
    ks <- ks.test(x(), "pnorm", mean = mean(x()), sd = sd(x()))
    ksp <- round(as.data.frame(ks[2])[1, 1], digits = 4)
    text <- paste(" Mean: ", mean, "\n Std. Dev: ", sd,
      "\n Skew: ", skew, "\n Kurtosis: ", kurtosis,
      "\n Kolmogorov-Smirnov p: ", ksp)
    ggplot() + aes(y = x()) +
      geom_jitter(aes(x = 0), color = input$col, alpha = input$alpha / 3,
        size = input$bin, width = 0.37) +
      geom_boxplot(outlier.shape = NA, size = .7,
        outlier.color = NA) +
      get(input$theme) + labs(x = "", y = "") +
      theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) # + annotate("text", -Inf, Inf, label = text, size=4.5, hjust = 0, vjust=1)
  })


  # describe function including IQR and quantiles
  output$table <- renderDataTable({
    # describe the data, including IQR and Quantiles, requires library(psych)
    statrandom <- describe(x(), IQR = T, quant = c(.25, .75))
    # rename the columns
    names(statrandom) <- c("Row", "Observations", "Mean",
      "Standard Deviation", "Median", "Trimmed",
      "MAD", "Min", "Max", "Range", "Skew", "Kurtosis",
      "Standard Error", "IQR", "Q 0.25", "Q 0.75")
    # to reduce the number of digits, embedd in print command
    print(statrandom[2:length(statrandom)], digits = 3)
    # do not display the uninformative row names
  }, rownames = FALSE)

  # plots detailing the central limit
  # theorem by drawing from exponential
  # distribution with increasing number
  # of means, see above
  output$plot5 <- renderPlot({
    # newer version, requires library(ggridges)
    ggplot(d, aes(x = value, y = variable)) +
      geom_density_ridges(aes(fill = variable, color = variable),
        scale = input$bin, alpha = input$alpha) +
      get(input$theme) + scale_x_continuous(limits = c(-1, 15)) +
      labs(x = "Mean of n = 1, 5 and 50 Data Points from Exponential Distribution taken 5000 times",
        y = "", fill = "Number of Observations for Mean",
        color = "Number of Observations for Mean") +
      scale_fill_manual(values = c("deeppink1", "darkmagenta", "navyblue")) +
      scale_color_manual(values = c("deeppink1", "darkmagenta", "navyblue"))
  })


  # classical plots with means, sd error and lines conntecting them
  output$plot6 <- renderPlot({
    ggplot(d, aes(x = variable, y = value, color = variable,
      shape = variable, group = variable)) +
      stat_summary(fun.y = mean, geom = "line", group = 1,
        color = input$col) +
      stat_summary(fun.y = mean, geom = "point", size = input$bin) +
      get(input$theme) + scale_fill_manual(values = c("deeppink1",
        "darkmagenta",
        "navyblue")) +
      scale_color_manual(values = c("deeppink1", "darkmagenta", "navyblue")) +
      stat_summary(fun.data = mean_se, geom = "errorbar",
        width = input$bin / 10, color = input$col) +
      labs(x = "Mean of n = 1, 5 and 50 Data Points from Exponential Distribution taken 5000 times",
        y = "",
        fill = "Number of Observations for Mean",
        color = "Number of Observations for Mean",
        shape = "Number of Observations for Mean",
        group = "Number of Observations for Mean")
  })


  # describe CLT data by factor levels
  output$table2 <- renderDataTable({
    # describe the data, requires library(psych)
    stat <- describeBy(d$value, d$variable, mat = T, digits = 3)
    # rename the columns
    names(stat) <- c("Row", "Group", "Vars", "Observations", "Mean",
      "Standard Deviation", "Median", "Trimmed", "MAD",
      "Min", "Max", "Range", "Skew", "Kurtosis",
      "Standard Error")
    stat[2:length(stat)]
    # do not display the uninformative row names
  }, rownames = FALSE)

  # show CLT data
  output$table3 <- renderDataTable({
    # format function to display less decimal numbers
    format(head(d, n = length(d$variable)), digits = 2)
    # do not display the uninformative row names
  }, rownames = FALSE)

  # show dynamic data
  output$table4 <- renderDataTable({
    # format function to display less decimal numbers
    data <- as.data.frame(x())
    names(data) <- "Data"
    format(head(data, n = input$n), digits = 2)
    # do not display the uninformative row names
  }, rownames = FALSE)

  # CONditionally apparent UI elements ----
  output$slidermean <- renderUI({
    if (input$dist %in% c("rnorm", "rlnorm")) {
      sliderInput("mean",
        "Mean",
        value = 150,
        min = 125,
        max = 175)
    }
  })

  output$slidersd <- renderUI({
    if (input$dist %in% c("rnorm", "rlnorm")) {
      sliderInput("sd",
        "SD",
        value = 15,
        min = 0.1,
        max = 30)
    }
  })

  output$sliderrate <- renderUI({
    if (input$dist == "rexp") {
      sliderInput("rate",
        "Rate",
        value = 0.2,
        min = 0.01,
        max = 0.9)
    }
  })

  output$slidershape1 <- renderUI({
    if (input$dist == "rbeta") {
      sliderInput("shape1",
        "Shape 1",
        value = 5,
        min = 0.01,
        max = 10)
    }
  })

  output$slidershape2 <- renderUI({
    if (input$dist == "rbeta") {
      sliderInput("shape2",
        "Shape 2",
        value = 2,
        min = 0.01,
        max = 10)
    }
  })

  # END OF: server <- function(input, output) {
}

# Create Shiny app ----
shinyApp(ui, server)