Skip to contents

Create violin plots with optional grouping, faceting, and statistical comparisons. Violin plots combine box plots with kernel density estimation to show the distribution shape of continuous data.

Usage

ViolinPlot(
  data,
  x,
  x_sep = "_",
  y = NULL,
  in_form = c("long", "wide"),
  split_by = NULL,
  split_by_sep = "_",
  symnum_args = NULL,
  sort_x = c("none", "mean_asc", "mean_desc", "mean", "median_asc", "median_desc",
    "median"),
  flip = FALSE,
  keep_empty = FALSE,
  group_by = NULL,
  group_by_sep = "_",
  group_name = NULL,
  paired_by = NULL,
  x_text_angle = NULL,
  step_increase = 0.1,
  fill_mode = ifelse(!is.null(group_by), "dodge", "x"),
  fill_reverse = FALSE,
  theme = "theme_ggforge",
  theme_args = list(),
  palette = "Paired",
  palcolor = NULL,
  alpha = 1,
  aspect.ratio = NULL,
  legend.position = "right",
  legend.direction = "vertical",
  add_point = FALSE,
  pt_color = "grey30",
  pt_size = NULL,
  pt_alpha = 1,
  jitter_width = NULL,
  jitter_height = 0,
  stack = FALSE,
  y_max = NULL,
  y_min = NULL,
  add_box = FALSE,
  box_color = "black",
  box_width = 0.1,
  box_ptsize = 2.5,
  add_trend = FALSE,
  trend_color = NULL,
  trend_linewidth = 1,
  trend_ptsize = 2,
  add_stat = NULL,
  stat_name = NULL,
  stat_color = "black",
  stat_size = 1,
  stat_stroke = 1,
  stat_shape = 25,
  add_bg = FALSE,
  bg_palette = "stripe",
  bg_palcolor = NULL,
  bg_alpha = 0.2,
  add_line = NULL,
  line_color = "red2",
  line_width = 0.6,
  line_type = 2,
  highlight = NULL,
  highlight_color = "red2",
  highlight_size = 1,
  highlight_alpha = 1,
  comparisons = NULL,
  ref_group = NULL,
  pairwise_method = "wilcox.test",
  multiplegroup_comparisons = FALSE,
  multiple_method = "kruskal.test",
  sig_label = "p.format",
  sig_labelsize = 3.5,
  hide_ns = FALSE,
  facet_by = NULL,
  facet_scales = "fixed",
  facet_ncol = NULL,
  facet_nrow = NULL,
  facet_byrow = TRUE,
  title = NULL,
  subtitle = NULL,
  xlab = NULL,
  ylab = NULL,
  seed = 8525,
  combine = TRUE,
  nrow = NULL,
  ncol = NULL,
  byrow = TRUE,
  axes = NULL,
  axis_titles = axes,
  guides = NULL,
  ...
)

Arguments

data

A data frame containing the data to plot

x

Column for x-axis (discrete). Can be a single column name or multiple columns that will be concatenated.

x_sep

Separator for concatenating multiple x columns.

y

Column for y-axis (numeric). The response variable.

in_form

Input data form: "long" (default) or "wide"

split_by

Column name(s) to split data into multiple plots

split_by_sep

Separator when concatenating multiple split_by columns

symnum_args

Symbolic number coding arguments for significance

sort_x

Sort x-axis values: "none", "mean_asc", "mean_desc", "mean", "median_asc", "median_desc", "median"

flip

Logical; flip coordinates to create horizontal plots

keep_empty

Logical; keep empty factor levels on x-axis

group_by

Column for grouping (creates dodged/side-by-side plots)

group_by_sep

Separator when concatenating multiple group_by columns

group_name

Legend name for groups

paired_by

Column identifying paired observations (for paired tests)

x_text_angle

Angle for x-axis text labels

step_increase

Step increase for comparison brackets

fill_mode

Fill coloring mode: "dodge", "x", "mean", or "median"

fill_reverse

Logical; reverse gradient fills

theme

Theme name (string) or theme function

theme_args

List of arguments passed to theme function

palette

Color palette name

palcolor

Custom colors for palette

alpha

Transparency level (0-1)

aspect.ratio

Aspect ratio of plot panel

legend.position

Legend position: "none", "left", "right", "bottom", "top"

legend.direction

Legend direction: "horizontal" or "vertical"

add_point

Logical; add jittered data points

pt_color

Point color (default: "grey30")

pt_size

Point size (auto-calculated if NULL)

pt_alpha

Point transparency (0-1)

jitter_width

Jitter width for points

jitter_height

Jitter height for points

stack

Logical; stack facets vertically/horizontally

y_max

Y-axis maximum (numeric or "qXX" for quantile)

y_min

Y-axis minimum (numeric or "qXX" for quantile)

add_box

Logical; add box overlay (violin only)

box_color

Box overlay color

box_width

Box overlay width

box_ptsize

Box median point size

add_trend

Logical; add trend line connecting medians

trend_color

Trend line color

trend_linewidth

Trend line width

trend_ptsize

Trend point size

add_stat

Function to add stat summary (e.g., mean)

stat_name

Stat legend name

stat_color

Stat point color

stat_size

Stat point size

stat_stroke

Stat point stroke width

stat_shape

Stat point shape

add_bg

Logical; add alternating background

bg_palette

Background color palette

bg_palcolor

Background custom colors

bg_alpha

Background transparency

add_line

Numeric; add horizontal reference line at this value

line_color

Reference line color

line_width

Reference line width

line_type

Reference line type

highlight

Points to highlight (logical, indices, or expression)

highlight_color

Highlight color

highlight_size

Highlight size

highlight_alpha

Highlight transparency

comparisons

Pairwise comparisons (list of pairs or TRUE for all)

ref_group

Reference group for comparisons

pairwise_method

Statistical method for pairwise comparisons

multiplegroup_comparisons

Logical; perform multiple group comparisons

multiple_method

Statistical method for multiple comparisons

sig_label

Significance label format: "p.format" or "p.signif"

sig_labelsize

Significance label font size

hide_ns

Logical; hide non-significant comparisons

facet_by

Column name(s) for faceting the plot

facet_scales

Scales for facets: "fixed", "free", "free_x", "free_y"

facet_ncol

Number of columns in facet layout

facet_nrow

Number of rows in facet layout

facet_byrow

Fill facets by row (TRUE) or column (FALSE)

title

Plot title

subtitle

Plot subtitle

xlab

X-axis label

ylab

Y-axis label

seed

Random seed for reproducibility

combine

Whether to combine split plots into one

nrow

Number of rows when combining plots

ncol

Number of columns when combining plots

byrow

Fill combined plots by row

axes

How to handle axes in combined plots ("keep", "collect", "collect_x", "collect_y")

axis_titles

How to handle axis titles in combined plots

guides

How to handle guides in combined plots ("collect", "keep", "auto")

...

Additional arguments passed to atomic plotting functions.

Value

A ggplot object or combined plots (patchwork)

Examples

# \donttest{
# ============================================================
# Basic Examples
# ============================================================

# Create sample data with different distributions
set.seed(456)
data <- data.frame(
  category = rep(c("Normal", "Bimodal", "Skewed", "Uniform"), each = 100),
  value = c(
    rnorm(100, 50, 10), # Normal
    c(rnorm(50, 30, 5), rnorm(50, 70, 5)), # Bimodal
    rexp(100, 0.1), # Skewed
    runif(100, 20, 80) # Uniform
  ),
  group = rep(c("A", "B"), 200)
)

# Simple violin plot
ViolinPlot(data, x = "category", y = "value")


# With title and labels
ViolinPlot(data,
  x = "category", y = "value",
  title = "Distribution Shapes Comparison",
  xlab = "Distribution Type",
  ylab = "Value"
)


# ============================================================
# Violin with Box Plot Overlay
# ============================================================

# Add box plot inside violin (shows quartiles)
ViolinPlot(data,
  x = "category", y = "value",
  add_box = TRUE
)


# Customize box overlay
ViolinPlot(data,
  x = "category", y = "value",
  add_box = TRUE,
  box_color = "white",
  box_width = 0.15,
  box_ptsize = 3
)


# ============================================================
# Adding Data Points
# ============================================================

# Add jittered points to show individual observations
ViolinPlot(data,
  x = "category", y = "value",
  add_point = TRUE
)


# Combine box overlay with points
ViolinPlot(data,
  x = "category", y = "value",
  add_box = TRUE,
  add_point = TRUE,
  pt_alpha = 0.3
)


# Customize point appearance
ViolinPlot(data,
  x = "category", y = "value",
  add_point = TRUE,
  pt_color = "navy",
  pt_size = 0.8,
  pt_alpha = 0.5,
  jitter_width = 0.3
)


# ============================================================
# Grouped Violin Plots
# ============================================================

# Side-by-side violins by group
ViolinPlot(data,
  x = "category", y = "value",
  group_by = "group"
)


# With box overlay
ViolinPlot(data,
  x = "category", y = "value",
  group_by = "group",
  add_box = TRUE
)


# Custom palette
ViolinPlot(data,
  x = "category", y = "value",
  group_by = "group",
  palette = "Set2",
  add_box = TRUE
)


# ============================================================
# Statistical Comparisons
# ============================================================

# Compare specific distributions
ViolinPlot(data,
  x = "category", y = "value",
  comparisons = list(
    c("Normal", "Bimodal"),
    c("Normal", "Skewed")
  )
)


# All pairwise comparisons
ViolinPlot(data,
  x = "category", y = "value",
  comparisons = TRUE,
  sig_label = "p.signif"
)


# Grouped comparisons
ViolinPlot(data,
  x = "category", y = "value",
  group_by = "group",
  comparisons = TRUE
)


# ============================================================
# Paired Data Analysis
# ============================================================

# Create paired data
paired_data <- data.frame(
  condition = factor(rep(c("Baseline", "Treatment"), each = 30)),
  patient = factor(rep(1:30, 2)),
  response = c(rnorm(30, 100, 20), rnorm(30, 120, 20))
)

# Paired violin with connecting lines
ViolinPlot(paired_data,
  x = "condition", y = "response",
  paired_by = "patient",
  add_box = TRUE
)
#> Warning: Forcing 'add_point' = TRUE when 'paired_by' is provided.


# With paired t-test
ViolinPlot(paired_data,
  x = "condition", y = "response",
  paired_by = "patient",
  comparisons = list(c("Baseline", "Treatment")),
  pairwise_method = "t.test"
)
#> Warning: Forcing 'add_point' = TRUE when 'paired_by' is provided.


# ============================================================
# Highlighting and Visual Enhancements
# ============================================================

# Highlight extreme values
ViolinPlot(data,
  x = "category", y = "value",
  add_point = TRUE,
  highlight = "value > 80 | value < 10",
  highlight_color = "red",
  highlight_size = 2
)


# Add trend line
ViolinPlot(data,
  x = "category", y = "value",
  add_trend = TRUE,
  trend_linewidth = 1.5
)


# Add reference line
ViolinPlot(data,
  x = "category", y = "value",
  add_line = 50,
  line_color = "darkgreen",
  line_type = 1
)


# Add mean indicator
ViolinPlot(data,
  x = "category", y = "value",
  add_stat = mean,
  stat_name = "Mean",
  stat_color = "red",
  stat_shape = 18,
  stat_size = 3
)


# ============================================================
# Fill Modes
# ============================================================

# Fill by x category
ViolinPlot(data,
  x = "category", y = "value",
  fill_mode = "x",
  palette = "Pastel1"
)


# Fill by mean (gradient coloring)
ViolinPlot(data,
  x = "category", y = "value",
  fill_mode = "mean",
  palette = "RdYlGn"
)


# Fill by median with reversed gradient
ViolinPlot(data,
  x = "category", y = "value",
  fill_mode = "median",
  palette = "Blues",
  fill_reverse = TRUE
)


# ============================================================
# Sorting and Orientation
# ============================================================

# Sort by mean value
ViolinPlot(data,
  x = "category", y = "value",
  sort_x = "mean_desc",
  add_box = TRUE
)


# Horizontal violin plot
ViolinPlot(data,
  x = "category", y = "value",
  flip = TRUE,
  add_box = TRUE
)


# ============================================================
# Faceting
# ============================================================

# Add faceting variable
data$experiment <- sample(c("Exp1", "Exp2"), nrow(data), replace = TRUE)

# Facet by experiment
ViolinPlot(data,
  x = "category", y = "value",
  facet_by = "experiment",
  add_box = TRUE
)


# Free scales
ViolinPlot(data,
  x = "category", y = "value",
  facet_by = "experiment",
  facet_scales = "free_y"
)


# ============================================================
# Wide Format Data
# ============================================================

# Wide format input
wide_data <- data.frame(
  Control = rnorm(50, 100, 15),
  LowDose = rnorm(50, 110, 15),
  HighDose = rnorm(50, 130, 20)
)

ViolinPlot(wide_data,
  x = c("Control", "LowDose", "HighDose"),
  in_form = "wide",
  add_box = TRUE,
  xlab = "Treatment",
  ylab = "Response"
)


# ============================================================
# Complex Example: Publication-Ready Plot
# ============================================================

# Gene expression data example
expr_data <- data.frame(
  gene = rep(c("BRCA1", "TP53", "EGFR", "MYC"), each = 40),
  expression = c(
    rnorm(40, 8, 1.5),
    rnorm(40, 6, 2),
    rnorm(40, 10, 1),
    rnorm(40, 12, 2.5)
  ),
  tissue = rep(rep(c("Normal", "Tumor"), each = 20), 4)
)

ViolinPlot(expr_data,
  x = "gene",
  y = "expression",
  group_by = "tissue",
  add_box = TRUE,
  add_point = TRUE,
  pt_alpha = 0.4,
  comparisons = TRUE,
  sig_label = "p.signif",
  hide_ns = TRUE,
  palette = c("Normal" = "#4DAF4A", "Tumor" = "#E41A1C"),
  title = "Gene Expression by Tissue Type",
  xlab = "Gene",
  ylab = "Expression (log2)",
  legend.position = "bottom"
)

# }