What are lists?

Lists in R are collections of objects that can be of any mix of types (or all the same type). They can be useful when dealing with multiple data.frames that each correspond to a different unit of study (note that we solved this before by considering a single data.frame with a column corresponding to country). Lists tend to be useful in my research when I’m simulating ecological dynamics, where each list item can be the results of a single simulation, or when I’m writing functions to return data that is not really structured as a data.frame. Before we go too far into use cases, lets refresh on how we form and index list objects.

lst <- list(runif(100), data.frame(x=runif(100), y=runif(100)), letters[1:10], 'a')

# index single list elements
lst[[1]]
##   [1] 0.90704945 0.77652364 0.90730792 0.18234435 0.16822273 0.17585736
##   [7] 0.74447123 0.99180202 0.52654958 0.69324169 0.85301198 0.53240988
##  [13] 0.86396784 0.18200349 0.65977339 0.37382909 0.66034647 0.33585775
##  [19] 0.94423552 0.47108947 0.64014545 0.14299730 0.83758049 0.09136402
##  [25] 0.83603546 0.65112385 0.79237058 0.04926343 0.60975753 0.81184443
##  [31] 0.71950415 0.34576116 0.69708172 0.71480971 0.43852277 0.91306516
##  [37] 0.33709835 0.64484189 0.23874983 0.03005836 0.85905594 0.62504449
##  [43] 0.46302086 0.85274002 0.66683303 0.25209227 0.57347857 0.94172352
##  [49] 0.60845584 0.87653416 0.80555945 0.12586274 0.10692094 0.65519174
##  [55] 0.53719319 0.77821136 0.52197263 0.77467111 0.19578390 0.13651637
##  [61] 0.94467557 0.40883423 0.80625228 0.88294754 0.93449914 0.54107256
##  [67] 0.73536660 0.57453253 0.56101505 0.58099235 0.79263289 0.02114419
##  [73] 0.45113483 0.16046333 0.55647937 0.06394116 0.65338370 0.76742226
##  [79] 0.52071744 0.51420978 0.19473846 0.27844844 0.66024552 0.61151509
##  [85] 0.27119185 0.61293326 0.43017132 0.88117462 0.73549543 0.86919798
##  [91] 0.18498700 0.44791807 0.98227175 0.07176038 0.04772194 0.24734227
##  [97] 0.64038550 0.32221512 0.27069574 0.05654550
lst[[2]]
##                x           y
## 1   0.9711248630 0.163197247
## 2   0.7570663751 0.270849837
## 3   0.9294406015 0.210958205
## 4   0.5364000814 0.623438141
## 5   0.1478103679 0.343618301
## 6   0.0776882314 0.984681295
## 7   0.5223464179 0.342049163
## 8   0.9478134413 0.532526935
## 9   0.4807977404 0.229018181
## 10  0.5042717066 0.051131312
## 11  0.9878702404 0.765741721
## 12  0.7547797987 0.830578280
## 13  0.8516403732 0.307384243
## 14  0.2377978710 0.905092597
## 15  0.0147246015 0.078250922
## 16  0.7972173975 0.879985108
## 17  0.4303299964 0.071435214
## 18  0.3909151922 0.870034968
## 19  0.4469986777 0.765329620
## 20  0.4262235162 0.557005505
## 21  0.8393820038 0.066578333
## 22  0.4331469364 0.152505867
## 23  0.9786027721 0.345341266
## 24  0.4867427261 0.028881846
## 25  0.0536901939 0.827337408
## 26  0.6377427704 0.006246690
## 27  0.4677336896 0.434778638
## 28  0.6986140336 0.943243005
## 29  0.7970075202 0.483888759
## 30  0.2506599759 0.633864311
## 31  0.9226017850 0.800301642
## 32  0.0025038649 0.626590540
## 33  0.9791661468 0.822802631
## 34  0.2556128427 0.353439753
## 35  0.8965716977 0.228846215
## 36  0.2377629776 0.150140253
## 37  0.9181363718 0.576929338
## 38  0.0227344930 0.007893186
## 39  0.4970574481 0.189509989
## 40  0.3314644506 0.987734135
## 41  0.0939055309 0.860263986
## 42  0.1094001110 0.587914045
## 43  0.1992902977 0.598895860
## 44  0.2197697689 0.416871586
## 45  0.7620014367 0.099557628
## 46  0.9508141740 0.400835748
## 47  0.3105879214 0.265780705
## 48  0.0001503848 0.693386884
## 49  0.4055376027 0.848887042
## 50  0.9087191643 0.941244133
## 51  0.0330890527 0.147223553
## 52  0.2824787481 0.503735947
## 53  0.2147192874 0.401763608
## 54  0.3223300606 0.810525108
## 55  0.3014176236 0.598305455
## 56  0.7976211212 0.103607750
## 57  0.3001318870 0.305545563
## 58  0.3210178553 0.788012243
## 59  0.8012628120 0.759338537
## 60  0.0374469769 0.079603650
## 61  0.7665565868 0.920609603
## 62  0.1607759397 0.527411877
## 63  0.2617601696 0.625022524
## 64  0.9111487148 0.950359337
## 65  0.1428027758 0.912524596
## 66  0.1014903188 0.319150291
## 67  0.2758274928 0.215314365
## 68  0.0643036880 0.675780897
## 69  0.8788718681 0.240681072
## 70  0.0586141017 0.576686268
## 71  0.4017654506 0.493577742
## 72  0.2588958980 0.311832946
## 73  0.5271591337 0.301856225
## 74  0.4924153527 0.197087383
## 75  0.2448883604 0.659983148
## 76  0.6887018394 0.949690158
## 77  0.5053683585 0.956632456
## 78  0.0644460099 0.852933277
## 79  0.6971812530 0.268945243
## 80  0.9590696895 0.466347856
## 81  0.4175283399 0.473655002
## 82  0.6887016196 0.727598679
## 83  0.1077271374 0.787842710
## 84  0.7235806210 0.396830022
## 85  0.9123932591 0.420076610
## 86  0.0309946393 0.261360365
## 87  0.7591738373 0.875259334
## 88  0.0739150322 0.109421942
## 89  0.7174504709 0.278115505
## 90  0.5633311914 0.118831564
## 91  0.6141034598 0.823268995
## 92  0.1190678899 0.442317624
## 93  0.9592662000 0.242461875
## 94  0.6394779414 0.102035899
## 95  0.1984358446 0.086960612
## 96  0.8283020621 0.926463483
## 97  0.9358594120 0.329809410
## 98  0.7497626538 0.500541312
## 99  0.4889136362 0.807810561
## 100 0.1254644105 0.396967957
lst[[3]]
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
lst[[4]]
## [1] "a"
# index multiple list elements
lst[1:2]
## [[1]]
##   [1] 0.90704945 0.77652364 0.90730792 0.18234435 0.16822273 0.17585736
##   [7] 0.74447123 0.99180202 0.52654958 0.69324169 0.85301198 0.53240988
##  [13] 0.86396784 0.18200349 0.65977339 0.37382909 0.66034647 0.33585775
##  [19] 0.94423552 0.47108947 0.64014545 0.14299730 0.83758049 0.09136402
##  [25] 0.83603546 0.65112385 0.79237058 0.04926343 0.60975753 0.81184443
##  [31] 0.71950415 0.34576116 0.69708172 0.71480971 0.43852277 0.91306516
##  [37] 0.33709835 0.64484189 0.23874983 0.03005836 0.85905594 0.62504449
##  [43] 0.46302086 0.85274002 0.66683303 0.25209227 0.57347857 0.94172352
##  [49] 0.60845584 0.87653416 0.80555945 0.12586274 0.10692094 0.65519174
##  [55] 0.53719319 0.77821136 0.52197263 0.77467111 0.19578390 0.13651637
##  [61] 0.94467557 0.40883423 0.80625228 0.88294754 0.93449914 0.54107256
##  [67] 0.73536660 0.57453253 0.56101505 0.58099235 0.79263289 0.02114419
##  [73] 0.45113483 0.16046333 0.55647937 0.06394116 0.65338370 0.76742226
##  [79] 0.52071744 0.51420978 0.19473846 0.27844844 0.66024552 0.61151509
##  [85] 0.27119185 0.61293326 0.43017132 0.88117462 0.73549543 0.86919798
##  [91] 0.18498700 0.44791807 0.98227175 0.07176038 0.04772194 0.24734227
##  [97] 0.64038550 0.32221512 0.27069574 0.05654550
## 
## [[2]]
##                x           y
## 1   0.9711248630 0.163197247
## 2   0.7570663751 0.270849837
## 3   0.9294406015 0.210958205
## 4   0.5364000814 0.623438141
## 5   0.1478103679 0.343618301
## 6   0.0776882314 0.984681295
## 7   0.5223464179 0.342049163
## 8   0.9478134413 0.532526935
## 9   0.4807977404 0.229018181
## 10  0.5042717066 0.051131312
## 11  0.9878702404 0.765741721
## 12  0.7547797987 0.830578280
## 13  0.8516403732 0.307384243
## 14  0.2377978710 0.905092597
## 15  0.0147246015 0.078250922
## 16  0.7972173975 0.879985108
## 17  0.4303299964 0.071435214
## 18  0.3909151922 0.870034968
## 19  0.4469986777 0.765329620
## 20  0.4262235162 0.557005505
## 21  0.8393820038 0.066578333
## 22  0.4331469364 0.152505867
## 23  0.9786027721 0.345341266
## 24  0.4867427261 0.028881846
## 25  0.0536901939 0.827337408
## 26  0.6377427704 0.006246690
## 27  0.4677336896 0.434778638
## 28  0.6986140336 0.943243005
## 29  0.7970075202 0.483888759
## 30  0.2506599759 0.633864311
## 31  0.9226017850 0.800301642
## 32  0.0025038649 0.626590540
## 33  0.9791661468 0.822802631
## 34  0.2556128427 0.353439753
## 35  0.8965716977 0.228846215
## 36  0.2377629776 0.150140253
## 37  0.9181363718 0.576929338
## 38  0.0227344930 0.007893186
## 39  0.4970574481 0.189509989
## 40  0.3314644506 0.987734135
## 41  0.0939055309 0.860263986
## 42  0.1094001110 0.587914045
## 43  0.1992902977 0.598895860
## 44  0.2197697689 0.416871586
## 45  0.7620014367 0.099557628
## 46  0.9508141740 0.400835748
## 47  0.3105879214 0.265780705
## 48  0.0001503848 0.693386884
## 49  0.4055376027 0.848887042
## 50  0.9087191643 0.941244133
## 51  0.0330890527 0.147223553
## 52  0.2824787481 0.503735947
## 53  0.2147192874 0.401763608
## 54  0.3223300606 0.810525108
## 55  0.3014176236 0.598305455
## 56  0.7976211212 0.103607750
## 57  0.3001318870 0.305545563
## 58  0.3210178553 0.788012243
## 59  0.8012628120 0.759338537
## 60  0.0374469769 0.079603650
## 61  0.7665565868 0.920609603
## 62  0.1607759397 0.527411877
## 63  0.2617601696 0.625022524
## 64  0.9111487148 0.950359337
## 65  0.1428027758 0.912524596
## 66  0.1014903188 0.319150291
## 67  0.2758274928 0.215314365
## 68  0.0643036880 0.675780897
## 69  0.8788718681 0.240681072
## 70  0.0586141017 0.576686268
## 71  0.4017654506 0.493577742
## 72  0.2588958980 0.311832946
## 73  0.5271591337 0.301856225
## 74  0.4924153527 0.197087383
## 75  0.2448883604 0.659983148
## 76  0.6887018394 0.949690158
## 77  0.5053683585 0.956632456
## 78  0.0644460099 0.852933277
## 79  0.6971812530 0.268945243
## 80  0.9590696895 0.466347856
## 81  0.4175283399 0.473655002
## 82  0.6887016196 0.727598679
## 83  0.1077271374 0.787842710
## 84  0.7235806210 0.396830022
## 85  0.9123932591 0.420076610
## 86  0.0309946393 0.261360365
## 87  0.7591738373 0.875259334
## 88  0.0739150322 0.109421942
## 89  0.7174504709 0.278115505
## 90  0.5633311914 0.118831564
## 91  0.6141034598 0.823268995
## 92  0.1190678899 0.442317624
## 93  0.9592662000 0.242461875
## 94  0.6394779414 0.102035899
## 95  0.1984358446 0.086960612
## 96  0.8283020621 0.926463483
## 97  0.9358594120 0.329809410
## 98  0.7497626538 0.500541312
## 99  0.4889136362 0.807810561
## 100 0.1254644105 0.396967957

Let’s think about how we might use lists. For one, many functions in R output data in list format. For instance, working with network data in R through igraph, most things are lists. Let’s explore this, both as a way to introduce lists and to talk about how to analyze network data in R.

#install.packages('igraph')
library(igraph)

g <- igraph::sample_gnm(100, 200)

str(g)
## Class 'igraph'  hidden list of 10
##  $ : num 100
##  $ : logi FALSE
##  $ : num [1:200] 4 5 7 10 10 13 13 16 17 19 ...
##  $ : num [1:200] 2 3 5 3 5 6 8 3 11 0 ...
##  $ : num [1:200] 0 1 2 3 4 5 6 7 8 9 ...
##  $ : num [1:200] 9 19 94 133 144 12 46 56 90 134 ...
##  $ : num [1:101] 0 0 0 0 0 1 2 2 3 3 ...
##  $ : num [1:101] 0 5 10 12 19 22 24 30 34 39 ...
##  $ :List of 4
##   ..$ : num [1:3] 1 0 1
##   ..$ :List of 4
##   .. ..$ name : chr "Erdos-Renyi (gnm) graph"
##   .. ..$ type : chr "gnm"
##   .. ..$ loops: logi FALSE
##   .. ..$ m    : num 200
##   ..$ : list()
##   ..$ : list()
##  $ :<environment: 0x557c08cae6d0>

Recall when we introduced the plot function, and I said that packages build in functionality such that some base functions will work with more complex objects (the igraph object above is a list). Try it here.

plot(g)

Nice. That’s neat. We can also write a wrapper function (we will not go over function writing now, but will save that for the coming weeks), which can be useful across multiple projects. This is a function I routinely use for visualizing networks in a prettier way.

#' @param g graph object
#' @param colz
#' @param nodeColor
#' @param nodeSize
#' 
#' @return a graph plot

plotGraph <- function(g, lay=layout_nicely(g), colz='dodgerblue'){
    plot(g, layout=lay, edge.width= E(g)$weight, vertex.size=10, directed=FALSE,
        vertex.color= colz, 
    vertex.label=NA)
}


plotGraph(g)

But let’s get back to lists. We’ve seen that igraph graph objects are lists, but also many of the outputs of functions within igraph are lists (or even lists of lists!). I will not defend nested lists as being all that useful, but we will see them in a couple of weeks when we talk about APIs and spatial data.

So one of the functions built into igraph is the sir function. This is a function which runs a model on the network known as the Susceptible-Infected-Recovered model (or SIR for short), which aims to capture how diseases spread through populations.

\[\begin{align} \frac{dS}{dt} & = -\beta SI \\ \frac{dI}{dt} & = \beta SI - \gamma I \\ \frac{dR}{dt} & = \gamma I \end{align}\]

The default behavior of the function (?sir) will run 100 simulations of the SIR model on a graph object that you provide to the function, and store the output as a list.

sims <- igraph::sir(g, beta=0.5, gamma=0.5, no.sim=100)
typeof(sims)
## [1] "list"
class(sims)
## [1] "sir"
# explore the structure of each list element 
sims[[1]] 
## $times
## [1] 0.00000000 0.03404459 0.13798784 0.77099134
## 
## $NS
## [1] 99 98 98 98
## 
## $NI
## [1] 1 2 1 0
## 
## $NR
## [1] 0 0 1 2
# each list element is another list
sims[[1]][[1]]
## [1] 0.00000000 0.03404459 0.13798784 0.77099134
sims[[1]]$times 
## [1] 0.00000000 0.03404459 0.13798784 0.77099134

And just to go back to plotting for a quick second, igraph has written functionality into the sir class to work well with the base R plot function.

plot(sims)

Neat, right?

But back to lists. Let’s work through the rest of working with lists through some exercises. Given the simulations above (sims list) …

Calculate the mean number of infected individuals for each simulation

Find the time associated with the maximum number of infected nodes

Calculate the fraction of all simulations in which fewer than 5 nodes are infected

apply statements

How did we approach the above questions? You almost certaintly used a for loop, right? This is perfectly fine, but there is another way. apply statements allow you to take a function and run it over all elements of a vector, columns/rows of a matrix, or a list.

apply statements typically have a prefix which gives information about what type of data it works well with. For instance, working with lists, we will use lapply. The base apply function is to work with matrices, where we want to apply a function to every row or column of a matrix (e.g., apply(matrix, 2, sum) is the same as colSums(matrix)). We will go over more examples of apply statements at some point, but for now we will focus on lapply and our problem of working with lists.

And here we hit an issue. lapply statements take a function argument, where the function needs to take the list object as an argument and then does something with it. So we’ll have to learn a bare minimum of function writing now. Let’s use a problem above as a motivating example, where we try to calculate the mean number of infected individuals for each simulation.

meanInfections <- function(x){
  # if we consider the mean infecteds as the mean of the infected vector
  return(mean(x$NI))
  # if we consider the mean infecteds as the mean number of nodes infected 
  # return((x$NS[1]+1)-tail(x$NS,1))
}


meanInfs <- lapply(sims, meanInfections)
str(meanInfs)
## List of 100
##  $ : num 1
##  $ : num 23.6
##  $ : num 0.5
##  $ : num 21.2
##  $ : num 0.5
##  $ : num 20.8
##  $ : num 11.5
##  $ : num 22.9
##  $ : num 19.5
##  $ : num 20.8
##  $ : num 0.5
##  $ : num 20.1
##  $ : num 10.9
##  $ : num 1
##  $ : num 16.2
##  $ : num 22.8
##  $ : num 18.2
##  $ : num 15.2
##  $ : num 15.9
##  $ : num 12.3
##  $ : num 1
##  $ : num 22.5
##  $ : num 8.91
##  $ : num 12.3
##  $ : num 16.2
##  $ : num 22.7
##  $ : num 1
##  $ : num 0.5
##  $ : num 1
##  $ : num 1
##  $ : num 0.5
##  $ : num 24.1
##  $ : num 18.8
##  $ : num 1
##  $ : num 1.17
##  $ : num 23.7
##  $ : num 19
##  $ : num 1
##  $ : num 16.1
##  $ : num 0.5
##  $ : num 17.1
##  $ : num 0.5
##  $ : num 23.2
##  $ : num 15.9
##  $ : num 19.8
##  $ : num 23.4
##  $ : num 15.9
##  $ : num 19.9
##  $ : num 14.5
##  $ : num 17
##  $ : num 19.8
##  $ : num 17.1
##  $ : num 16.5
##  $ : num 25.7
##  $ : num 0.5
##  $ : num 15.5
##  $ : num 17.8
##  $ : num 19.7
##  $ : num 1
##  $ : num 0.5
##  $ : num 12.9
##  $ : num 1
##  $ : num 12.6
##  $ : num 0.5
##  $ : num 21
##  $ : num 28.3
##  $ : num 19.6
##  $ : num 1
##  $ : num 1.5
##  $ : num 14.3
##  $ : num 14
##  $ : num 18.6
##  $ : num 23.5
##  $ : num 0.5
##  $ : num 0.5
##  $ : num 17
##  $ : num 19.2
##  $ : num 22.5
##  $ : num 1
##  $ : num 10.6
##  $ : num 17.9
##  $ : num 0.5
##  $ : num 1
##  $ : num 1
##  $ : num 0.5
##  $ : num 16.3
##  $ : num 19.7
##  $ : num 14.1
##  $ : num 1
##  $ : num 26.2
##  $ : num 0.5
##  $ : num 18.2
##  $ : num 22
##  $ : num 23.1
##  $ : num 15.6
##  $ : num 24
##  $ : num 18.7
##  $ : num 18
##  $ : num 0.5
##   [list output truncated]

lapply is nice in that if we give it a list object, it gives us a list object back. This makes analytical pipelines that deal with lists pretty straightforward, but if the output is a single value, we may want this to be a vector instead of a list.

unlist(meanInfs) 
##   [1]  1.000000 23.614943  0.500000 21.212644  0.500000 20.833333 11.484375
##   [8] 22.897436 19.538961 20.790698  0.500000 20.054217 10.897260  1.000000
##  [15] 16.234940 22.837349 18.157895 15.224138 15.879747 12.291045  1.000000
##  [22] 22.475904  8.909091 12.330986 16.222222 22.737500  1.000000  0.500000
##  [29]  1.000000  1.000000  0.500000 24.139535 18.837349  1.000000  1.166667
##  [36] 23.656627 18.967532  1.000000 16.143836  0.500000 17.100000  0.500000
##  [43] 23.178161 15.949275 19.791139 23.353659 15.922535 19.937500 14.475000
##  [50] 16.969136 19.837838 17.089041 16.524691 25.651163  0.500000 15.536145
##  [57] 17.750000 19.740964  1.000000  0.500000 12.913793  1.000000 12.636986
##  [64]  0.500000 20.994505 28.264706 19.591954  1.000000  1.500000 14.270492
##  [71] 14.033333 18.597561 23.547059  0.500000  0.500000 16.962500 19.212644
##  [78] 22.487342  1.000000 10.597222 17.858025  0.500000  1.000000  1.000000
##  [85]  0.500000 16.305195 19.746753 14.057377  1.000000 26.241176  0.500000
##  [92] 18.200000 21.969136 23.133333 15.550000 24.012500 18.731884 18.013158
##  [99]  0.500000 24.861446
#or
meanInfs2 <- sapply(sims, meanInfections)

sapply statements are essentially just lapply statements that simplify the result to a vector. This is useful when the output of the function is a single value, and not so useful when function returns multiple values.

A side note: some people will criticize for loops in R, and say “just use apply, it’s faster”. It’s not, really. Write however you feel comfortable. For awhile, apply statements were super confusing to me, so I tended to use for loops instead. After more work in, I shifted and tend to use apply statements when it fits, as they are less code and are more intuitive to me for many situations.

Let’s practice a bit.

Calculate the maximum number of infected inviduals at any time in the sims list using the apply approach.

What is the mean duration (the total time the epidemic took before it stopped) across all the epidemics in sims?

plyr apply functionality tweaks

XYply statements as nice wrappers to more classic apply statements. Here, X and Y can take values of ‘a’, ‘l’, or ‘d’, depending on the input or output data structure desired. For instance, if we have a list that we would like to apply over and return a data.frame, we would use ldply, where the l is claiming that the input is a list object, and the d is claiming that the output should be formatted as a data.frame. Other examples of this syntax would be adply, ddply, laply, aaply, etc. etc.

Below, I provide an example of the aXply syntax (e.g., adply, alply, aaply).

arr <- array(1:27, c(3,3,3))
rownames(arr) = c("Curly", "Larry", "Moe")
colnames(arr) = c("Groucho", "Harpo", "Zeppo")
dimnames(arr)[[3]] = c("Bart", "Lisa", "Maggie")

arr
## , , Bart
## 
##       Groucho Harpo Zeppo
## Curly       1     4     7
## Larry       2     5     8
## Moe         3     6     9
## 
## , , Lisa
## 
##       Groucho Harpo Zeppo
## Curly      10    13    16
## Larry      11    14    17
## Moe        12    15    18
## 
## , , Maggie
## 
##       Groucho Harpo Zeppo
## Curly      19    22    25
## Larry      20    23    26
## Moe        21    24    27

Arrays are something that we did not introduce when we talked about R basics, and that is because they really are not used too often. Think of matrix. It has two dimensions (x and y), so it can be viewed as a rectangle of data. Arrays simply add more dimensions. In the example above, there is another dimension, forming a data cube (in the rectangle analogy).

We can use plyr functionality to operate on this array and return different forms. For instance, aaply takes an array and returns a simplified array (here a vector).

plyr::aaply(arr, 1, sum) 
## Curly Larry   Moe 
##   117   126   135

We can change one letter and now return a data.frame containing two columns. This is also a good time to point out the flexibility of the XYply statements to different margins. Margins (denoted as .margins argument in R, asks along which axis you would like to operate on the array. If we set .margins=1, this corresponds to a row-wise operation, so we calculate the sum across the array for Curly, Larry, and Moe. If we change this to .margins=2, we operate on columns, and will return sums for Groucho, Harpo, and Zeppo. And if we use .margins=3, we will return sums for Bart, Lisa, and Maggie.

plyr::adply(.data=arr, .margins=1, .fun=sum) 
##      X1  V1
## 1 Curly 117
## 2 Larry 126
## 3   Moe 135
plyr::adply(.data=arr, .margins=2, .fun=sum) 
##        X1  V1
## 1 Groucho  99
## 2   Harpo 126
## 3   Zeppo 153
plyr::adply(.data=arr, .margins=3, .fun=sum) 
##       X1  V1
## 1   Bart  45
## 2   Lisa 126
## 3 Maggie 207

Finally, we can return a list object. In this use case, this is not super helpful, but in other use cases the list output is pretty helpful.

plyr::alply(.data=arr, .margins=1, .fun=sum) 
## $`1`
## [1] 117
## 
## $`2`
## [1] 126
## 
## $`3`
## [1] 135
## 
## attr(,"split_type")
## [1] "array"
## attr(,"split_labels")
##      X1
## 1 Curly
## 2 Larry
## 3   Moe

A pitch for plyr::ldply. I really like this function, as I often find myself with lists of similar structures that I want to operate on and get a single clean object back. I will not go into an example, but this is a pretty useful function (though all the utility is basically contained in vapply).

Finally, you may wonder why am I pushing apply statements so hard. It has nothing to do with speed, and only a bit to do with code clarity. The main advantage is understanding the programmatic nature of apply statements (which will be similar but less chronological than a for loop), and many parallel computing packages have their own little versions of apply statements ready to go (e.g., parallel::mclapply, parallel::parLapply, parallel::clusterApplyLB).

Let’s do one practice problem to showcase the utility of ldply specifically.

Calculate the correlation between number of infections and time for each simulation, reporting the estimate, p-value, and confidence intervals around the estimate. (you will use cor.test to do this, whose output is a list object as well)

A note about do.call and Reduce

While a bit opaque, these functions are pretty useful in a variety of situations. Speaking of data manipulation functions that are useful but a bit conceptually difficult, do.call and Reduce are solid base R functions.

do.call is a way of calling the same function recursively on multiple objects, and may have similar output to Reduce, which is also a way to recursively apply a function.

lst <- list(1:10, 1:10, 1:10, 1:10, 1:10)
lst
## [[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## [[2]]
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## [[3]]
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## [[4]]
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## [[5]]
##  [1]  1  2  3  4  5  6  7  8  9 10
#this makes a single rbind call with each element of the list as an argument
str(do.call(rbind, lst))
##  int [1:5, 1:10] 1 1 1 1 1 2 2 2 2 2 ...
#this does it iteratively (so makes n-1 rbind calls)
Reduce(rbind, lst)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## init    1    2    3    4    5    6    7    8    9    10
##         1    2    3    4    5    6    7    8    9    10
##         1    2    3    4    5    6    7    8    9    10
##         1    2    3    4    5    6    7    8    9    10
##         1    2    3    4    5    6    7    8    9    10

sessionInfo

sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] geodata_0.5-8  terra_1.7-29   maps_3.4.1     gbm_2.1.8.1    igraph_1.4.3  
##  [6] dplyr_1.1.2    plyr_1.8.8     DBI_1.1.3      rgbif_3.7.7    jsonlite_1.8.5
## [11] httr_1.4.6     rmarkdown_2.11 fastmap_1.1.1 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.3      xfun_0.29         ggplot2_3.4.2     lattice_0.21-8   
##  [5] vctrs_0.6.2       tools_4.3.0       generics_0.1.2    parallel_4.3.0   
##  [9] curl_4.3.3        tibble_3.2.1      fansi_1.0.2       RSQLite_2.3.1    
## [13] highr_0.9         blob_1.2.4        pkgconfig_2.0.3   Matrix_1.5-1     
## [17] data.table_1.14.6 dbplyr_2.3.2      lifecycle_1.0.3   compiler_4.3.0   
## [21] stringr_1.5.0     munsell_0.5.0     codetools_0.2-19  htmltools_0.5.2  
## [25] yaml_2.3.6        lazyeval_0.2.2    pillar_1.9.0      jquerylib_0.1.4  
## [29] whisker_0.4.1     cachem_1.0.8      viridis_0.6.3     tidyselect_1.2.0 
## [33] digest_0.6.31     stringi_1.7.12    purrr_1.0.1       splines_4.3.0    
## [37] grid_4.3.0        colorspace_2.1-0  cli_3.6.1         magrittr_2.0.2   
## [41] triebeard_0.4.1   survival_3.5-3    crul_1.4.0        utf8_1.2.2       
## [45] withr_2.5.0       scales_1.2.1      bit64_4.0.5       oai_0.4.0        
## [49] bit_4.0.5         gridExtra_2.3     memoise_2.0.1     evaluate_0.15    
## [53] knitr_1.37        viridisLite_0.4.2 rlang_1.1.1       urltools_1.7.3   
## [57] Rcpp_1.0.10       glue_1.6.2        httpcode_0.3.0    xml2_1.3.4       
## [61] R6_2.5.1