data_list() constructs a data list object which inherits from classes
data_list and list. This object is the primary way in which features to
be used along the metasnf clustering pipeline are stored. The data list is
fundamentally a 2-level nested list object where each inner list contains a
data frame and associated metadata for that data frame. The metadata
includes the name of the data frame, the 'domain' of that data frame (the
broader source of information that the input data frame is capturing,
determined by user's domain knowledge), and the type of feature stored in
the data frame (continuous, discrete, ordinal, categorical, or mixed).
Examples
heart_rate_df <- data.frame(
    patient_id = c("1", "2", "3"),
    var1 = c(0.04, 0.1, 0.3),
    var2 = c(30, 2, 0.3)
)
personality_test_df <- data.frame(
    patient_id = c("1", "2", "3"),
    var3 = c(900, 1990, 373),
    var4 = c(509, 2209, 83)
)
survey_response_df <- data.frame(
    patient_id = c("1", "2", "3"),
    var5 = c(1, 3, 3),
    var6 = c(2, 3, 3)
)
city_df <- data.frame(
    patient_id = c("1", "2", "3"),
    var7 = c("toronto", "montreal", "vancouver")
)
# Explicitly (Name each nested list element):
dl <- data_list(
    list(
        data = heart_rate_df,
        name = "heart_rate",
        domain = "clinical",
        type = "continuous"
    ),
    list(
        data = personality_test_df,
        name = "personality_test",
        domain = "surveys",
        type = "continuous"
    ),
    list(
        data = survey_response_df,
        name = "survey_response",
        domain = "surveys",
        type = "ordinal"
    ),
    list(
        data = city_df,
        name = "city",
        domain = "location",
        type = "categorical"
    ),
    uid = "patient_id"
)
# Compact loading
dl <- data_list(
    list(heart_rate_df, "heart_rate", "clinical", "continuous"),
    list(personality_test_df, "personality_test", "surveys", "continuous"),
    list(survey_response_df, "survey_response", "surveys", "ordinal"),
    list(city_df, "city", "location", "categorical"),
    uid = "patient_id"
)
# Printing data list summaries
summary(dl)
#>               name        type   domain length width
#> 1       heart_rate  continuous clinical      3     2
#> 2 personality_test  continuous  surveys      3     2
#> 3  survey_response     ordinal  surveys      3     2
#> 4             city categorical location      3     1
# Alternative loading: providing a single list of lists
list_of_lists <- list(
    list(heart_rate_df, "data1", "domain1", "continuous"),
    list(personality_test_df, "data2", "domain2", "continuous")
)
dl <- data_list(
    list_of_lists,
    uid = "patient_id"
)