Title: | Exploring Social Network Structures Through Friendship-Driven Community Detection with Association Rules Mining |
---|---|
Description: | Implements an innovative approach to community detection in social networks using Association Rules Learning. The package provides tools for processing graph and rules objects, generating association rules, and detecting communities based on node interactions. Designed to facilitate advanced research in Social Network Analysis, this package leverages association rules learning for enhanced community detection. This approach is described in El-Moussaoui et al. (2021) <doi:10.1007/978-3-030-66840-2_3>. |
Authors: | Mohamed El-Moussaoui [aut, cre], Mohamed Hanine [aut], Ali Kartit [ths], Tarik Agouti [rev] |
Maintainer: | Mohamed El-Moussaoui <[email protected]> |
License: | GPL-3 |
Version: | 1.0.5 |
Built: | 2024-10-23 06:01:53 UTC |
Source: | https://github.com/assuom44/arlclustering |
This function calculates the mode of a vector.
arlc_calculate_mode(x)
arlc_calculate_mode(x)
x |
A vector. |
The mode of the vector.
arlc_calculate_mode(c(1, 2, 2, 3, 4))
arlc_calculate_mode(c(1, 2, 2, 3, 4))
This function cleans the final set of association rules.
arlc_clean_final_rules(final_rules)
arlc_clean_final_rules(final_rules)
final_rules |
A set of final rules to be cleaned. |
A cleaned set of rules.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) message(cleaned_rules)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) message(cleaned_rules)
This function plots a graph with specified aesthetics and highlights communities.
arlc_clusters_plot(g, graphLabel, clusters)
arlc_clusters_plot(g, graphLabel, clusters)
g |
An igraph object representing the graph. |
graphLabel |
A character string for the graph label to be displayed in the title. |
clusters |
A list of clusters to highlight in the plot. |
The function produces a plot as a side effect.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) clusters <- arlc_generate_clusters(cleaned_rules) arlc_clusters_plot(g$graph, "Karate Club", clusters$Clusters)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) clusters <- arlc_generate_clusters(cleaned_rules) arlc_clusters_plot(g$graph, "Karate Club", clusters$Clusters)
This function converts a date from one format to another.
arlc_convert_date_format(date_str, from_format, to_format)
arlc_convert_date_format(date_str, from_format, to_format)
date_str |
A date string. |
from_format |
The current format of the date string. |
to_format |
The desired format of the date string. |
The date string in the new format.
arlc_convert_date_format("2023-01-01", "%Y-%m-%d", "%d-%m-%Y")
arlc_convert_date_format("2023-01-01", "%Y-%m-%d", "%d-%m-%Y")
This function counts the number of NA values in each column of a data frame.
arlc_count_na(df)
arlc_count_na(df)
df |
A data frame. |
A named vector with the count of NA values in each column.
arlc_count_na(data.frame(a = c(1, NA, 3), b = c(NA, NA, 3)))
arlc_count_na(data.frame(a = c(1, NA, 3), b = c(NA, NA, 3)))
This function creates a summary of a data frame including count, mean, median, and standard deviation for numeric columns.
arlc_df_summary(df)
arlc_df_summary(df)
df |
A data frame. |
A data frame summarizing the statistics of the input data frame.
arlc_df_summary(data.frame(a = c(1, 2, 3, 4, 5), b = c(5, 4, 3, 2, 1)))
arlc_df_summary(data.frame(a = c(1, 2, 3, 4, 5), b = c(5, 4, 3, 2, 1)))
This function processes a list of sets and removes those that are fully overlapped by other sets.
arlc_fct_clean_transactions(all_sets)
arlc_fct_clean_transactions(all_sets)
all_sets |
A list of sets where each set is a vector of elements. |
The function iterates through each set and checks if it is fully overlapped by any other set. If a set is fully overlapped, it is excluded from the final list of sets. The result is a list of sets with no fully overlapped sets.
A list of sets with fully overlapped sets removed.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) vec <- lapply(cleaned_rules, function(v) unique(unlist(v))) vec2 <- split(vec, sapply(vec, `[`, 1)) sorted_result <- lapply(vec2, function(v) sort(unique(unlist(v)))) clusters <- arlc_fct_clean_transactions(sorted_result) message (clusters)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) vec <- lapply(cleaned_rules, function(v) unique(unlist(v))) vec2 <- split(vec, sapply(vec, `[`, 1)) sorted_result <- lapply(vec2, function(v) sort(unique(unlist(v)))) clusters <- arlc_fct_clean_transactions(sorted_result) message (clusters)
This function finds the best support and confidence thresholds for the Apriori algorithm to maximize the lift of the generated association rules.
arlc_fct_get_best_apriori_thresholds(transactions, support_range, conf)
arlc_fct_get_best_apriori_thresholds(transactions, support_range, conf)
transactions |
A transaction dataset of class |
support_range |
A numeric vector specifying the range of support values to be tested. |
conf |
A numeric value (0.5 or 1.0) specifying the confidence value. |
This function iterates through the given ranges of support and confidence values, applies the Apriori algorithm to find association rules for each pair of values, and selects the pair that produces rules with the highest lift.
A numeric vector containing the best support, best confidence, highest lift, and the number of rules found.
The return value is a named vector with elements best_support
, best_confidence
, best_lift
, and len_rules
.
library(arlclustering) sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 best_thresholds <- arlc_fct_get_best_apriori_thresholds(trans, supportRange, Conf)
library(arlclustering) sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 best_thresholds <- arlc_fct_get_best_apriori_thresholds(trans, supportRange, Conf)
This function checks if a file exists and is readable.
arlc_file_exists_readable(filepath)
arlc_file_exists_readable(filepath)
filepath |
The path to the file. |
TRUE if the file exists and is readable, FALSE otherwise.
arlc_file_exists_readable("example.txt")
arlc_file_exists_readable("example.txt")
This function generates gross association rules from transactions.
arlc_gen_gross_rules(transactions, minSupp, minConf, minLenRules, maxLenRules)
arlc_gen_gross_rules(transactions, minSupp, minConf, minLenRules, maxLenRules)
transactions |
A transactions object. |
minSupp |
Minimum support threshold. |
minConf |
Minimum confidence threshold. |
minLenRules |
Minimum length of rules. |
maxLenRules |
Maximum length of rules. |
A set of gross association rules.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules)
This function generates a transactional dataset from a graph.
arlc_gen_transactions(graph)
arlc_gen_transactions(graph)
graph |
A graph object. |
A transactional dataset.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph)
This function takes a vector of preprocessed rules, combines elements starting with the same value, groups segments by the starting value, sorts elements within each segment, and returns potential clusters.
arlc_generate_clusters(vec)
arlc_generate_clusters(vec)
vec |
A vector of preprocessed rules. |
This function generates potential clusters based on preprocessed rules.
A list of unique and potential clusters.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) clusters <- arlc_generate_clusters(cleaned_rules)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules) cleaned_rules <- arlc_clean_final_rules(NonRRSig_rules$FiltredRules) clusters <- arlc_generate_clusters(cleaned_rules)
This function generates a sequence of dates between two given dates.
arlc_generate_date_sequence(start_date, end_date, by = "day")
arlc_generate_date_sequence(start_date, end_date, by = "day")
start_date |
The start date. |
end_date |
The end date. |
by |
The step size for the sequence (e.g., "day", "week", "month"). |
A vector of dates.
arlc_generate_date_sequence("2023-01-01", "2023-01-10", "day")
arlc_generate_date_sequence("2023-01-01", "2023-01-10", "day")
This function generates a unique identifier string.
arlc_generate_uid(length = 10)
arlc_generate_uid(length = 10)
length |
The length of the unique identifier. Default is 10. |
A unique identifier string.
arlc_generate_uid() arlc_generate_uid(15)
arlc_generate_uid() arlc_generate_uid(15)
This function takes a transaction dataset and ranges for support and confidence, computes the best thresholds, and returns the best minimum support, minimum confidence, best lift, total number of gross rules, and ratio of generated rules to total number of transactions.
arlc_get_apriori_thresholds(trx, supportRange, Conf)
arlc_get_apriori_thresholds(trx, supportRange, Conf)
trx |
A transaction dataset of class |
supportRange |
A sequence of values representing the range for minimum support. |
Conf |
A sequence of values representing the range for minimum confidence. |
This function generates gross rules based on the best obtained thresholds.
This function iterates through the given ranges of support and confidence values, applies the Apriori algorithm to find association rules for each pair of values, and selects the pair that produces rules with the highest lift. The function then returns the best thresholds along with the lift, number of rules, and their ratio to the total transactions.
A list containing:
minSupp |
The best minimum support value. |
minConf |
The best minimum confidence value. |
bestLift |
The highest lift value obtained. |
lenRules |
The total number of gross rules generated. |
ratio |
The ratio of generated rules to the total number of transactions. |
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) message(params$minSupp) message(params$minConf) message(params$bestLift) message(params$lenRules) message(params$ratio)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) message(params$minSupp) message(params$minConf) message(params$bestLift) message(params$lenRules) message(params$ratio)
This function reads a network dataset from a GML file, assigns node names, and calculates various properties of the graph such as total edges, total nodes, and average degree.
arlc_get_network_dataset(file_path, label)
arlc_get_network_dataset(file_path, label)
file_path |
The file path to the GML file to be loaded. |
label |
A label for the graph. |
This function loads a network dataset from a specified GML file and computes basic graph properties.
A list containing the graph object and its properties: total edges, total nodes, and average degree.
# Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") loaded_karate <- arlc_get_network_dataset(sample_gml_file, "Karate Club") message(loaded_karate$graph) message(loaded_karate$graphLabel) message(loaded_karate$totalEdges) message(loaded_karate$graphEdges) message(loaded_karate$totalNodes) message(loaded_karate$graphNodes) message(loaded_karate$averageDegree)
# Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") loaded_karate <- arlc_get_network_dataset(sample_gml_file, "Karate Club") message(loaded_karate$graph) message(loaded_karate$graphLabel) message(loaded_karate$totalEdges) message(loaded_karate$graphEdges) message(loaded_karate$totalNodes) message(loaded_karate$graphNodes) message(loaded_karate$averageDegree)
This function takes a set of gross rules, removes redundant rules, and returns the total number of non-redundant rules along with the non-redundant rules.
arlc_get_NonR_rules(gross_rules)
arlc_get_NonR_rules(gross_rules)
gross_rules |
A vector or dataframe of gross rules. |
This function cleans the gross rules and provides non-redundant rules.
A list containing the total number of non-redundant rules and the non-redundant rules.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules)
This function takes all transactions and a set of non-redundant rules as input, and returns significant rules based on a specified method and adjustment.
arlc_get_significant_rules(all_trans, nonRR_rules)
arlc_get_significant_rules(all_trans, nonRR_rules)
all_trans |
A dataframe containing all transactions. |
nonRR_rules |
A list of non-redundant rules. |
This function filters significant rules from a set of non-redundant rules.
A list containing the total number of significant non-redundant rules and the significant rules themselves.
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules)
library(arlclustering) # Create a sample transactions dataset sample_gml_file <- system.file("extdata", "karate.gml", package = "arlclustering") g <- arlc_get_network_dataset(sample_gml_file, "Karate Club") trans <- arlc_gen_transactions(g$graph) supportRange <- seq(0.1, 0.2, by = 0.1) Conf <- 0.5 params <- arlc_get_apriori_thresholds(trans, supportRange, Conf) grossRules <- arlc_gen_gross_rules(trans, params$minSupp, params$minConf, 1, params$lenRules) nonRR_rules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRRSig_rules <- arlc_get_significant_rules(trans, nonRR_rules$FiltredRules)
This function checks if all elements of a vector are numeric.
arlc_is_numeric_vector(x)
arlc_is_numeric_vector(x)
x |
A vector. |
TRUE if all elements are numeric, FALSE otherwise.
arlc_is_numeric_vector(c(1, 2, 3)) arlc_is_numeric_vector(c(1, "a", 3))
arlc_is_numeric_vector(c(1, 2, 3)) arlc_is_numeric_vector(c(1, "a", 3))
This function converts a list of named vectors to a data frame.
arlc_list_to_df(lst)
arlc_list_to_df(lst)
lst |
A list of named vectors. |
A data frame with each element of the list as a row.
lst <- list(a = c(x = 1, y = 2), b = c(x = 3, y = 4)) arlc_list_to_df(lst)
lst <- list(a = c(x = 1, y = 2), b = c(x = 3, y = 4)) arlc_list_to_df(lst)
This function measures the execution time of a given function.
arlc_measure_time(func, ...)
arlc_measure_time(func, ...)
func |
The function to measure. |
... |
Additional arguments to pass to the function. |
The result of the function execution.
arlc_measure_time(Sys.sleep, 1)
arlc_measure_time(Sys.sleep, 1)
This function normalizes a numeric vector to have values between 0 and 1.
arlc_normalize_vector(x)
arlc_normalize_vector(x)
x |
A numeric vector. |
A normalized numeric vector.
arlc_normalize_vector(c(1, 2, 3, 4, 5))
arlc_normalize_vector(c(1, 2, 3, 4, 5))
This function replaces NA values in a vector or data frame with a specified value.
arlc_replace_na(x, value)
arlc_replace_na(x, value)
x |
A vector or data frame. |
value |
The value to replace NA with. |
The vector or data frame with NA values replaced.
arlc_replace_na(c(1, NA, 3), 0) arlc_replace_na(data.frame(a = c(1, NA, 3), b = c(NA, NA, 3)), 0)
arlc_replace_na(c(1, NA, 3), 0) arlc_replace_na(data.frame(a = c(1, NA, 3), b = c(NA, NA, 3)), 0)