[R] cut-off point to separate overlapping distributions

Luigi Marongiu marongiu.luigi at gmail.com
Mon Feb 15 01:13:36 CET 2016


dear all,
I have a set of data that can be defined as bimodal; would be possible
to define a cut-off to split it? my mathematical/statistical knowledge
is limited, my apologies, thus i am not sure what kind of area this
problem belongs to; at the moment I can fit -- thanks to previous help
from the community -- the modes of the distribution, but the problem i
am facing is to split the distribution in two. based on the example
below, i recknon the cut-off would be where i placed the green arrow
-- the local minimum -- but would be even better if the cut-off could
be be placed as close as possible to the first peak (blue arrow).
thank you
luigi

>>>
xval <- c(42.39, 44.53, 5.05, 6.9, 45, 2.35, 20.73, 3.31, 45, 4.76,
2.47, 2.37, 2.8, 3.26, 3.21, 45, 6.41, 4.77, 4.72, 4.89, 44.71, 2.8,
4.08, 4.07, 4.81, 2.93, 2.73, 44.75, 2.61, 2.56, 2.75, 2.75, 44.82,
36.59, 2.8, 43.82, 2.53, 2.75, 2.73, 3.05, 2.66, 5.61, 2.28, 4.83,
38.63, 44.23, 2.35, 2.47, 44.03, 6.33, 2.7, 2.96, 42.85, 2.47, 2,
12.76, 2.99, 2, 35.11, 2.63, 44.69, 2.96, 45, 42.13, 41.04, 3.22, 45,
45, 2.55, 4.58, 3.09, 39.98, 2, 2.97, 2.87, 2, 44.82, 45, 2.95, 45, 2,
2.82, 2.47, 2.98, 4.81, 44.53, 44.38, 2.87, 44.45, 2.9, 2.48, 44.14,
3.05, 2.76, 45, 45, 44.54, 42.85, 3.17, 2.46, 39.95, 36.96, 2.59,
2.75, 5.38, 2.8, 44.53, 45, 38.84, 4.64, 3.04, 2.59, 2.64, 45, 2.66,
44.37, 45, 26.32, 3.29, 40.44, 2, 41.51, 2, 45, 2, 5, 2.78, 2.11,
3.31, 2.61, 2.83, 2.6, 2.66, 2.95, 2.46, 2.58, 2.94, 45, 45, 2.71,
2.63, 2.81, 2, 3.29, 5.48, 45, 3.02, 2.82, 3.07, 2.65, 2.61, 2.67,
36.6, 2.08, 40.2, 45, 2.5, 45, 41.46, 45, 2.62, 2.77, 4.14, 2.63,
3.21, 4.79, 42.63, 2.66, 45, 4.69, 3.05, 45, 45, 2.97, 42.07, 2.73,
3.26, 5.17, 2.47, 44.66, 2.42, 5.14, 5.03, 2.65, 2.88, 2.69, 44.1,
3.15, 4.92, 42.02, 6.97, 2.46, 35.98, 2.95, 32.98, 2.79, 44.82, 2.84,
2.15, 44.42, 2.96, 45, 2.42, 2.75, 2.44, 4.58, 2, 45, 41.04, 4.04,
3.08, 2.46, 44.54, 3.21, 39.16, 2, 35.36, 3.08, 5.77, 2.71, 4.41,
2.46, 44.43, 2.62, 45, 2.7, 45, 41.43, 4.65, 3.05, 4.76, 40.66, 32.88,
45, 44.94, 44.67, 3.07, 2.92, 2.75, 2.63, 2.68, 34.15, 3.27, 2.47, 2,
2.63, 45, 3.06, 42.53, 35.25, 2.82, 42.62, 5.83, 4.69, 38.04, 2.47,
38.14, 3.73, 10, 4.93, 4.93, 4.65, 40.8, 2.32, 5.53, 3.01, 41.13, 4.5,
2.65, 44.85, 5.02, 2, 39.99, 2.89, 3.09, 2, 43.77, 44.53, 4.09, 6.22,
3.31, 44.64, 4.65, 45, 6.68, 39.93, 45, 2.77, 2.51, 2, 45, 4.08, 4.61,
6.11, 3.02, 44.8, 45, 44.54, 2.95, 2.77)
yval <- c(-0.002, 0.001, 0.002, 0.001, -0.001, 0.003, 0.003, 0.005, 0,
0.011, 0.003, 0.011, 0.004, 0.012, 0.004, 0.005, 0.001, 0.007, 0.006,
0.007, -0.001, 0.011, 0.005, 0.002, 0.007, 0.028, 0.01, 0.002, 0.003,
0.007, 0.033, 0.006, 0.003, 0, 0.01, 0.018, 0.01, 0.008, 0.002, 0.022,
0.02, 0.002, 0.006, 0.008, 0, -0.002, -0.001, 0.001, 0.001, 0.007,
0.005, 0.011, 0.004, 0.001, 0.005, 0.001, 0.019, 0.002, 0.001, 0.002,
-0.001, 0.003, 0.001, 0, -0.001, 0.002, 0.005, 0.001, 0, 0.007, 0.011,
-0.001, 0.002, 0.01, 0.004, 0.003, 0.001, 0, 0.015, 0.004, 0.001,
0.003, 0.003, 0.027, 0.005, 0, 0.003, 0.003, 0, 0.017, 0.004, -0.001,
0.043, 0.003, -0.001, 0.001, 0, 0, 0.019, 0.003, -0.001, 0.001, 0.009,
0.013, 0.001, 0.021, 0.001, -0.001, -0.001, 0.002, 0.008, 0.004,
0.007, 0.001, 0.007, 0.001, 0, 0.001, 0.004, -0.001, 0.001, 0, 0.007,
0.003, 0.002, 0.001, 0.028, 0.002, 0.005, 0.013, 0.017, 0.013, 0.009,
0.021, 0.01, 0.007, 0.015, 0, 0.002, 0.002, 0.013, 0.012, 0, 0.034,
0.005, 0, 0.041, 0.02, 0.036, 0.004, 0.002, 0.004, 0.001, 0.001,
0.001, 0.003, 0.016, 0.002, 0, 0, 0.002, 0.009, 0.006, 0.022, 0.002,
0.01, 0, 0.012, 0.006, 0.01, 0.005, 0.001, 0.001, 0.027, 0.003, 0.002,
0.02, 0.014, 0.003, 0.003, -0.001, 0.002, 0.008, 0.011, 0.014, 0.017,
0, 0.015, 0.008, 0.001, 0.005, 0.002, 0.001, 0.012, 0.002, 0.004, 0,
0.012, 0, 0.001, 0.005, 0.001, 0.008, 0.011, 0.006, 0.007, 0.005, 0,
0, 0.003, 0.004, 0.002, 0, 0.015, -0.001, 0.002, -0.001, 0.006, 0.005,
0.006, 0.003, 0.002, 0.001, 0.001, 0, 0.007, 0.002, -0.001, 0.033,
0.01, 0.007, 0.003, 0, 0.001, -0.001, 0.011, 0.006, 0.015, 0.013,
0.01, 0.015, 0, 0.014, 0.001, 0.001, 0.003, 0, 0.012, -0.001, 0.002,
0.027, -0.001, 0.009, 0.021, 0.001, 0.001, -0.001, 0.008, 0.001,
0.017, 0.002, 0.027, 0.003, -0.001, 0.018, 0.026, -0.001, 0.004,
0.003, 0.001, 0.019, 0.001, 0.001, 0.019, 0.005, 0.001, -0.001, 0.002,
0.001, 0.001, 0.004, 0.002, 0.007, 0.003, 0.01, 0.002, 0.003, 0.002,
0.005, 0.003, 0, 0.004, 0.032, 0.005, 0.018, 0.002, 0.001, 0.002,
0.003, 0.005)
df <- data.frame(xval,yval)

d.rv = density(df$xval)
plot(df$yval ~ df$xval, ylim=c(0,0.05))
lines(d.rv)
d.x = d.rv$x
d.y = d.rv$y
runs <- rle(sign(diff(d.rv$y)))
length(runs$lengths)
mode1 <- runs$lengths[1]+1
mode2 <- length(d.rv$x)- runs$lengths[4]
Y1 <- d.rv$y[mode1]
X1 <- d.rv$x[d.rv$y == Y1]
abline(v=X1, col="red")
Y2 <- d.rv$y[mode2]
X2 <- d.rv$x[d.rv$y == Y2]
abline(v=X2, col="red")

arrows(21, 0.03, 21, 0.01, col="green")
arrows(10, 0.03, 10, 0.01, col="blue")



More information about the R-help mailing list