Content
- Basic Statistical Plots:
- Clustering Analysis:
- Multiple Variable Plots:
- Mapping:
- Geospatial analysis:
1 Basic Statistical Plots
Single Variable Statistic Plots
# Load necessary libraries
library(ggplot2)
library(gridExtra)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(corrplot)
## corrplot 0.92 loaded
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:plyr':
##
## ozone
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(gstat)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
# Load the OH dataset
OH_Climate_dataset_path <- "/Users/zhangshiyu/Desktop/R Course/Final/OH_Climate_dataset.csv"
data <- read_csv(OH_Climate_dataset_path)
## Rows: 2122 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Event, Area
## dbl (11): Sample ID, Location, δ18O H2O [‰ SMOW], δD H2O [‰ SMOW], δ18O H2O...
## date (1): DATE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- data %>%
filter(LATITUDE >= 24.396308, LATITUDE <= 49.384358,
LONGITUDE >= -125.001651, LONGITUDE <= -66.93457)
data$DATE <- as.Date(data$DATE)
# Interpolate missing TAVG values
data$TAVG <- na.approx(data$TAVG, rule=2)
## Single Variable Statistic Plots
# Histogram of δ18O H2O [‰ SMOW]
p1 <- ggplot(data, aes(x=`δ18O H2O [‰ SMOW]`)) +
geom_histogram(binwidth=0.5, fill="cornflowerblue", color="black") +
ggtitle("Histogram of δ18O H2O [‰ SMOW]") +
xlab("δ18O H2O [‰ SMOW]") + ylab("Frequency") +
theme_minimal()
# Histogram of δ18O H2O [‰ SMOW] with KDE
p2 <- ggplot(data, aes(x=`δ18O H2O [‰ SMOW]`)) +
geom_histogram(aes(y=..density..), bins=20, fill="skyblue", color="black") +
geom_density(alpha=.2, fill="skyblue") +
ggtitle("Histogram of δ18O H2O [‰ SMOW]") +
theme_minimal()
# Histogram of δD H2O [‰ SMOW]
p3 <- ggplot(data, aes(x=`δD H2O [‰ SMOW]`)) +
geom_histogram(binwidth=5, fill="lightcoral", color="black") +
ggtitle("Histogram of δD H2O [‰ SMOW]") +
xlab("δD H2O [‰ SMOW]") + ylab("Frequency") +
theme_minimal()
# Histogram of δD H2O [‰ SMOW] with KDE
p4 <- ggplot(data, aes(x=`δD H2O [‰ SMOW]`)) +
geom_histogram(aes(y=..density..), bins=20, fill="lightgreen", color="black") +
geom_density(alpha=.2, fill="lightgreen") +
ggtitle("Histogram of δD H2O [‰ SMOW]") +
theme_minimal()
# Line plot for Precipitation over time
p5 <- ggplot(data, aes(x=DATE, y=PRCP)) +
geom_line(color="tomato") +
ggtitle("Precipitation (PRCP) Over Time") +
xlab("Date") + ylab("Precipitation") +
theme_minimal()
# Line plot for Average Temperature (TAVG) over time
p6 <- ggplot(data, aes(x=DATE, y=TAVG)) +
geom_line(color="tomato") +
ggtitle("Average Temperature (TAVG) Over Time") +
xlab("Date") + ylab("Average Temperature (°C)") +
theme_minimal()
# Arrange the plots into a 3x2 grid
grid.arrange(p1, p2, p3, p4, p5, p6, ncol=2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The single-variable statistical plots provided a foundational understanding of the distribution and central tendencies of individual variables within my dataset. Through histograms and density plots, I can identify skewness, outliers, and the general shape of data distributions.
Bivariate Plots
## Bivariate plots
# Scatter Plot of δ18O vs δD
p7 <- ggplot(data, aes(x=`δ18O H2O [‰ SMOW]`, y=`δD H2O [‰ SMOW]`, color=Area)) +
geom_point() +
ggtitle("δ18O vs δD by Area") +
xlab("δ18O H2O [‰ SMOW]") + ylab("δD H2O [‰ SMOW]") +
theme_minimal() +
scale_color_viridis_d() # Optional: Use a different color palette
plot(p7)