[R] to change the size of the line in the plot created in ggplot2

Jeff Newmiller jdnewmil at dcn.davis.ca.us
Fri Dec 25 23:38:43 CET 2015


Giorgio... beware of using cbind to form data frames from vectors.
It is inefficient in use of memory, doesn't set column names, and
will convert all columns to character if you do it with any vector 
columns that are of character type. Below are three revamps of this 
example.

Using cbind with data frames as input fixes most of these problems,
but you are still better off using the "data.frame" function in most 
cases.

#--- base R, simplified
x <- seq( 1:100 )

y1 <- x * x
df1 <- data.frame( x = x, value = y1, stringsAsFactors = FALSE )
# scalar automatically repeated to number of rows
df1$variable <- "y1"

y2 <- y1 + 1500
df2 <- data.frame(x=x, value=y2, stringsAsFactors=FALSE)
df2$variable <- "y2"

y3 <- y1 + 6000
df3 <- data.frame( x = x, value = y3, stringsAsFactors=FALSE)
df3$variable <- "y3"

avg <- ( y1 + y2 + y3 ) / 3
df4 <- data.frame( x = x, value = avg, variable = g4 )
df4$variable <- "average"

df <- rbind( df1, df2, df3, df4 )

df$variable <- factor( df$variable
                      , levels=c( "y1", "y2", "y3", "average" ) )

# this is the data to start with ggplot()
df

library(ggplot2)

# this example you made goes to all the effort to put the data into one
# data frame, and then fails to make use of the automatic legend creation 
# feature of ggplot
ggplot( data = df[ -sel, ]
       , aes( x=x, y=value, group=variable ) ) +
   geom_line() +
   geom_line( data = df[ sel, ]
            , mapping = aes( x=x, y=value, group=variable )
            , size=0.5, linetype="dashed", color="blue" )

# the output of this one is easier to interpret
ggplot( df
       , aes( x=x
            , y=value
            , colour=variable
            , linetype=variable
            , size=variable )
            ) +
   geom_line() +
   scale_colour_manual( name = "Curve"
                      , values = c( "red", "green", "blue", "black" ) ) +
   scale_linetype_manual( name = "Curve", values = c( 1, 1, 1, 2 ) ) +
   scale_size_manual( name="Curve", values = c( 1, 1, 1, 0.5 ) )

#----- base R data manipulation, a little more sophisticated

library( ggplot2 )

# There are better ways to make these kinds of y1, y2 etc dependent 
# variables

df0 <- data.frame( x = seq( 1:100 ) )
df0 <- within( df0
              , {
                 y1 <- x * x
                 y2 <- y1 + 1500
                 y3 <- y1 + 6000
                }
              )
# rowMeans can work with an arbitrary number of columns
df0$average <- rowMeans( df0[ , c( "y1", "y2", "y3" ) ] )
# take a look
df0
# in "wide" format...

# base R has the reshape function to convert to "long" format... the 
# arguments are a bit complicated to remember though (compare with next 
# example)
vars <- c( "y1", "y2", "y3", "average" )
df <- reshape( df0
              , idvar="x"
              , varying = vars
              , v.names="value"
              , times=vars
              , timevar = "variable"
              , direction = "long" )

# convert character labels to factor
# the levels of the factor define the order in which colors and linetypes 
# are specified
df$variable <- factor( df$variable, levels = vars )

# using the same input data for colour, linetype and size causes the three 
# legends to be combined
ggplot( df
       , aes( x=x
            , y=value
            , colour=variable
            , linetype=variable
            , size=variable
            )
       ) +
   geom_line() +
   scale_colour_manual( name = "Curve"
                      , values = c( "red", "green", "blue", "black" ) ) +
   scale_linetype_manual( name = "Curve", values = c( 1, 1, 1, 2 ) ) +
   scale_size_manual( name="Curve", values = c( 1, 1, 1, 0.5 ) )

#---- Nonstandard syntax from dplyr, easier to remember and use on the fly
#     but requires some more contributed packages

library( ggplot2 )
library( dplyr )
library( tidyr )

# dplyr "pipes" data from one function to the next
# read about it in the vignettes for the "dplyr" and "magrittr" packages
df0 <- (   data.frame( x = seq( 1:100 ) )
        %>% mutate( y1 = x * x
                  , y2 = y1 + 1500
                  , y3 = y1 + 6000
                  )
        )
# make a note of all names except the first column in this case
vars <- names( df0 )[ -1 ]
# if you need to refer to the whole dataset in the pipeline of functions,
# the "." refers to the data frame as it exists at that point.
df0 <- (   df0
        %>% mutate( average = rowMeans( .[ , vars ] ) )
        )
# all names with average too
allvars <- names( df0 )[ -1 ]

# "gather" all columns except x into a "value" column, with labels in 
# "variable" column and make variable column into a factor with specified 
# sequence of levels
df <- (   df0
       %>% gather( variable, value, -x )
       %>% mutate( variable = factor( variable, levels = allvars ) )
       )

# define some colours and linetype values in desired order corresponding 
# to levels of "variable"
colv <- c( rainbow( length( vars ) ), "black" )
lntypv <- c( rep( 1, length( vars ) ), 2 )
szv <- c( rep( 1, length( vars ) ), 0.5 )

ggplot( df
       , aes( x=x
            , y=value
            , colour=variable
            , linetype=variable
            , size=variable
            )
       ) +
   geom_line() +
   scale_colour_manual( name = "Curve", values = colv ) +
   scale_linetype_manual( name = "Curve", values = lntypv ) +
   scale_size_manual( name = "Curve", values = szv )

On Fri, 25 Dec 2015, Giorgio Garziano wrote:

> Hi Marna,
>
> here is another example that should appear more similar to your scenario
> than my previous one.
>
> x <- seq(1:100)
>
> y1 <- x*x
> g1 <- rep("y1", 100)
> df1 <- as.data.frame(cbind(x, y1), stringsAsFactors=FALSE)
> df1 <- as.data.frame(cbind(df1, g1))
> colnames(df1)<- c("x", "value", "variable")
>
> y2 <- y1+1500
> g2 <- rep("y2", 100)
> df2 <- as.data.frame(cbind(x, y2), stringsAsFactors=FALSE)
> df2 <- as.data.frame(cbind(df2, g2))
> colnames(df2)<- c("x", "value", "variable")
>
> y3 <- y1+6000
> g3 <- rep("y3", 100)
> df3 <- as.data.frame(cbind(x, y3), stringsAsFactors=FALSE)
> df3 <- as.data.frame(cbind(df3, g3))
> colnames(df3)<- c("x", "value", "variable")
>
> avg <- (y1+y2+y3)/3
> df4 <- as.data.frame(cbind(x, avg))
> g4 <- rep("average", 100)
> df4 <- as.data.frame(cbind(df4, g4))
> colnames(df4) <- c("x", "value", "variable")
>
> df <- data.frame(rbind(df1, df2, df3, df4))
>
> # this is the data to start with ggplot()
> df
>
> # the df rows where the average value is stored
> sel <- which(df[,"variable"]=="average")
>
> library(ggplot2)
>
> ggplot(data = df[-sel,], aes(x=x, y=value, group=variable)) + geom_line() +
>  geom_line(data = df[sel,], aes(x=x, y=value, group=variable), size=0.5, linetype="dashed", color="blue")
>
>
> Merry Christmas,
>
> --
> GG
>
>
> 	[[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>

---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                       Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k



More information about the R-help mailing list