Commit 084ae8bd authored by Anthony Coutant's avatar Anthony Coutant
Browse files

ELSA init commit

parents
^.*\.Rproj$
^\.Rproj\.user$
.Rproj.user
.Rhistory
.RData
tmp/*
Package: bnmixture
Type: Package
Title: Dynamic Bayesian network ensemble learning of spanning arborescences
Version: 0.1
Date: 2016-09-22
Authors@R: c(person("Anthony", "Coutant", email = "coutant@lipn.univ-paris13.fr", role = c("cre", "aut")))
Description: Learn dynamic Bayesian networks from ensemble learning of very simple component models, such as Edmonds spanning arborescences.
License:
LazyData: TRUE
Imports: entropy,
bnlearn,
R.cache,
igraph (>= 1.0.0),
parallel,
plyr,
data.table,
Rcpp
Suggests: testthat
LinkingTo: Rcpp
SystemRequirements: C++11
RoxygenNote: 6.0.1
Collate:
'RcppExports.R'
'_package.R'
'arbre.R'
'edgelists.R'
'edges.R'
'edgespool.R'
'learners.R'
'utils.R'
# Generated by roxygen2: do not edit by hand
S3method("+",Edgelist)
S3method("+",EdgesPool)
S3method("[",EdgesPool)
S3method("[[",EdgesPool)
export(ComponentsAggregator.concat)
export(ComponentsAggregator.merge)
export(EdgeWeighting.bdeu)
export(EdgeWeighting.bdeu.generateWithPrior)
export(EdgeWeighting.bge)
export(EdgeWeighting.bge.generateWithPrior)
export(EdgeWeighting.condentropy)
export(EdgeWeighting.mi)
export(EdgeWeightingPrior.fromWhiteList)
export(Edgelist.all)
export(Edgelist.bipartite.all)
export(Edgelist.buildWeightedGraph)
export(Edgelist.dynrev)
export(Edgelist.nodynloop)
export(Edgelist.noloop)
export(Edgelist.reverse)
export(Edgelist.sample)
export(Edgelist.sample.ids)
export(Edgelist.undyn)
export(Learning.arborescence.fromWeightedGraph)
export(Pruning.components.number)
export(Pruning.components.ratio)
export(Pruning.components.unsignificant)
export(combineComponentsInPool)
export(dyn)
export(getAdmissibleEdgeSpace)
export(getDataPerturbationPool)
export(getWeightedGraphsPool)
export(getWeightedGraphsPoolSubset)
export(getdyn)
export(learnComponentsPool)
export(loadPool)
export(logThresholdEdgePred)
export(pairComponentPools)
export(savePool)
export(undyn)
exportClasses(Edgelist)
exportClasses(EdgesPool)
exportMethods(getEdgelist)
exportMethods(getEdges)
exportMethods(getFromIds)
exportMethods(getIds)
exportMethods(getIdsFT)
exportMethods(getToIds)
useDynLib(bnmixture)
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
fastEdmonds <- function(froms, tos, weights, addDummyRoot) {
.Call('_bnmixture_fastEdmonds', PACKAGE = 'bnmixture', froms, tos, weights, addDummyRoot)
}
fastBipartiteEdgeIds <- function(nodesFrom, nodesTo) {
.Call('_bnmixture_fastBipartiteEdgeIds', PACKAGE = 'bnmixture', nodesFrom, nodesTo)
}
fastIndicesTable <- function(numWithDuplicates) {
.Call('_bnmixture_fastIndicesTable', PACKAGE = 'bnmixture', numWithDuplicates)
}
#' bnmixture
#'
#' Learn dynamic Bayesian networks from ensemble learning of very simple component models, such as Edmonds spanning arborescences.
#'
#' @docType bnmixture
#' @author Anthony Coutant
#' @useDynLib bnmixture
#' @name bnmixture
NULL
\ No newline at end of file
This diff is collapsed.
library(methods)
#'an S4 compact attributed edgelist representation
#'
#'This class allows the representation of compact edgelists, with nodes names sequence and edge indices using the node names sequence order
#'
#'@slot nodes the character vector of node names
#'@slot edgeIds the numeric vector of edge indices. Indices maps to endpoints using the \code{nodes} order
#'@slot attributes the edge attributes list containing a key for each attribute. Each key is associated to an attribute vector of \code{edgeIds} length and ith attribute values are considered to be associated to the ith edge in \code{edgeIds}
#'@name Edgelist class
#'@rdname Edgelist-class
#'@export
#'
Edgelist <- setClass(
"Edgelist",
slots = c(
nodes = "character",
edgeIds = "numeric",
attributes = "list"
),
validity = function(object){
for(att in (object)@attributes){
if(length(att) != length((object)@edgeIds)){
return("Some edge attribute is not set to every edge.")
}
}
return(TRUE)
}
)
## Edgelist dimensions
##
## Retrieves the edgelist dimensions. Dimension 1 corresponds to the number of edges. Dimension 2 is 2.
##
## @param x an Edgelist object
##
## @rdname Edgelist-methods
## @include edgelists.R
## @seealso \code{\link{Edgelist-class}}
##
## @examples
## #Build an Edgelist object from a 2 nodes x 2 timesteps set of node names
## edgelist = Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = c(2, 3))
##
## #Retrieve edgelist dimension = c(2, 2)
## dim(edgelist)
##
## @export
setMethod(
f="dim",
signature="Edgelist",
definition=function(x){
return(c(length(x@edgeIds), 2))
}
)
setGeneric(
name="getEdges",
def=function(edgelist, ids){
standardGeneric("getEdges")
}
)
#' Edgelist class methods
#'
#' \code{getEdges} retrieves the human readable edgelist from a compact Edgelist representation.
#'
#' @param edgelist an Edgelist object
#' @param ids the integer vector of edge ids, in the order implicitly defined in \code{edgelist}, to translate into human readable form
#' @return \code{getEdges(edgelist, ids)} returns a two columns data.frame object respectively representing from and to endpoints node names of edges at \code{ids} indices.
#'
#' @rdname Edgelist-methods
#' @include edgelists.R
#' @seealso \code{\link{Edgelist-class}}
#'
#' @examples
#' #Build an Edgelist object from a 2 nodes x 2 timesteps set of node names
#' edgelist = Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = c(2, 3))
#'
#' #Retrieve human readable data.frame of edge id 1, in the edges order defined by edgelist (can be outside edgelist@edgeIds!):
#' # V1 V2
#' # 1 A.t. A.t.1.
#' # 2 B.t. B.t.1.
#' hrEdges = getEdges(edgelist, c(1, 4))
#'
#' @export
setMethod(
f="getEdges",
signature=c("Edgelist", "numeric"),
definition=function(edgelist, ids){
fromIds = floor((ids - 1) / length(edgelist@nodes)) + 1
toIds = ((ids - 1) %% length(edgelist@nodes)) + 1
return(data.frame(V1 = edgelist@nodes[fromIds], V2 = edgelist@nodes[toIds], stringsAsFactors = FALSE))
}
)
#'
#' @return \code{getEdges(edgelist)} returns a two columns data.frame object respectively representing from and to endpoints node names of edges at \code{edgelist@edgeIds} indices.
#'
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#'
#' #Retrieve human readable data.frame:
#' # V1 V2
#' # 1 A.t. B.t.
#' # 2 A.t. A.t.1.
#' hrEdges = getEdges(edgelist)
#'
#' @export
setMethod(
f="getEdges",
signature=c("Edgelist"),
definition=function(edgelist){
fromIds = floor((edgelist@edgeIds - 1) / length(edgelist@nodes)) + 1
toIds = ((edgelist@edgeIds - 1) %% length(edgelist@nodes)) + 1
return(data.frame(V1 = edgelist@nodes[fromIds], V2 = edgelist@nodes[toIds], stringsAsFactors = FALSE))
}
)
setGeneric(
name="getIds",
def=function(edgelist, edges){
standardGeneric("getIds")
}
)
#' Edges indexing from human readable edgelist
#'
#' \code{getIds} retrieves the edge indices of all rows of a human readable edgelist
#'
#' @param edges a 2 columns data.frame object, respectively containing the "from" and "to" node names of each edge. The union of from and to node names set must be included in \code{edgelist@nodes} for the method to run.
#' @return \code{getIds} returns an integer vector containing all \code{edges} corresponding edge ids, as defined by \code{edgelist} internal order
#'
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#' #Build a human readable edgelist
#' edges = data.frame(c("A.t.", "B.t."), c("B.t.1.", "A.t.1."), stringsAsFactors = FALSE)
#'
#' #Retrieve edge ids, considering lexicographic order grouped by time steps
#' #Result is: c(2, 3)
#' edgeIds = getIds(Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = 1:4), edges)
#'
#' @export
setMethod(
f="getIds",
signature=c("Edgelist", "data.frame"),
definition=function(edgelist, edges){
fromIds = sapply(edges[,1], function(e){which(edgelist@nodes == e)})
toIds = sapply(edges[,2], function(e){which(edgelist@nodes == e)})
res = (fromIds - 1) * length(edgelist@nodes) + toIds
names(res) = NULL
return(res)
}
)
setGeneric(
name="getIdsFT",
def=function(edgelist, froms, tos){
standardGeneric("getIdsFT")
}
)
#' Edges indices using both from and to endpoint nodes indices
#'
#' \code{getIdsFT} retrieves the edge indices of edgelist defined by two "from" and "to" node indice vectors
#'
#' @param froms an integer vector containing each edge to encode "from" node id. Each node id must be in \code{{1, ..., length(edgelist@nodes)}} to be valid
#' @param tos an integer vector containing each edge to encode "to" node id. Each node id must be in \code{{1, ..., length(edgelist@nodes)}} to be valid
#' @return \code{getIdsFT} returns an integer vector containing all \code{edges} corresponding edge ids, as defined by \code{edgelist} internal order
#'
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#'
#' #Retrieve edge ids, considering lexicographic order grouped by time steps
#' #Result is: c(2, 3)
#' edgeIds = getIdsFT(Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = 1:4), c(1, 2), c(4, 3))
#'
#' @export
setMethod(
f="getIdsFT",
signature=c("Edgelist", "integer", "integer"),
definition=function(edgelist, froms, tos){
res = (froms - 1) * length(edgelist@nodes) + tos
names(res) = NULL
return(res)
}
)
setGeneric(
name="getFromIds",
def=function(edgelist, ids){
standardGeneric("getFromIds")
}
)
#' "From" nodes indices from edge ids
#'
#' \code{getFromIds} retrieves the "from" endpoint node indices from a compact Edgelist representation.
#'
#' @return \code{getFromIds} returns an integer vector containing all "from" node ids, as defined by \code{edgelist} internal order
#'
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#'
#' #Retrieve node ids, considering lexicographic order grouped by time steps
#' #Result is: c(1, 2)
#' edgeIds = getFromIds(Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = 1:4), c(2, 3))
#'
#' @export
setMethod(
f="getFromIds",
signature=c("Edgelist", "numeric"),
definition=function(edgelist, ids){
fromIds = floor((ids - 1) / length(edgelist@nodes)) + 1
names(fromIds) = NULL
return(fromIds)
}
)
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#'
#' #Retrieve from node ids, considering lexicographic order grouped by time steps
#' #Result is: c(1, 2)
#' edgeIds = getFromIds(Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = c(2, 3)))
#'
#' @export
setMethod(
f="getFromIds",
signature=c("Edgelist"),
definition=function(edgelist){
fromIds = floor((edgelist@edgeIds - 1) / length(edgelist@nodes)) + 1
names(fromIds) = NULL
return(fromIds)
}
)
setGeneric(
name="getToIds",
def=function(edgelist, ids){
standardGeneric("getToIds")
}
)
#' "To" nodes indices from edge ids
#'
#' \code{getToIds} retrieves the "to" endpoint node indices from a compact Edgelist representation.
#'
#' @return \code{getToIds} returns an integer vector containing all "to" node ids, as defined by \code{edgelist} internal order
#'
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#'
#' #Retrieve node ids, considering lexicographic order grouped by time steps
#' #Result is: c(4, 3)
#' edgeIds = getToIds(Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = 1:4), c(2, 3))
#'
#' @export
setMethod(
f="getToIds",
signature=c("Edgelist", "numeric"),
definition=function(edgelist, ids){
toIds = ((ids - 1) %% length(edgelist@nodes)) + 1
names(toIds) = NULL
return(toIds)
}
)
#' @rdname Edgelist-methods
#' @include edgelists.R
#'
#' @examples
#'
#' #Retrieve from node ids, considering lexicographic order grouped by time steps
#' #Result is: c(4, 3)
#' edgeIds = getToIds(Edgelist(nodes = c("A.t.", "B.t.", "A.t.1.", "B.t.1."), edgeIds = c(2, 3)))
#'
#' @export
setMethod(
f="getToIds",
signature=c("Edgelist"),
definition=function(edgelist){
toIds = ((edgelist@edgeIds - 1) %% length(edgelist@nodes)) + 1
names(toIds) = NULL
return(toIds)
}
)
\ No newline at end of file
#' Complete graph Edgelist construction from nodes set
#'
#' Instantiate a new Edgelist object from a node names set corresponding to the complete graph in this nodes space.
#'
#' @param nodes a character vector containing all node names of the graph Edgelist to instantiate
#'
#' @return the Edgelist object with all possible edge ids within the input nodes space
#'
#' @examples
#' #Equivalent to Edgelist(nodes = c("A", "B", "C"), edgeIds = 1:9)
#' edgelist = Edgelist.all(c("A", "B", "C"))
#'
#' @export
#'
Edgelist.all = function(nodes){
Edgelist(nodes = nodes, edgeIds = seq_len(length(nodes) * length(nodes)))
}
#' Complete bipartite graph Edgelist construction from two node sets
#'
#' Instantiate a new Edgelist object from node name sets corresponding to the complete bipartite graph in this nodes space.
#'
#' @param nodesFrom a character vector containing all parent node names of the graph Edgelist to instantiate
#' @param nodesTo a character vector containing all child node names of the graph Edgelist to instantiate
#'
#' @return the Edgelist object with all possible edge ids within the input bipartite nodes space
#'
#' @examples
#' #Equivalent to Edgelist(nodes = c("A", "B", "C", "D", "E"), edgeIds = c(4, 5, 9, 10, 14, 15))
#' #Corresponds to all (X, Y) such that X in {"A", "B", "C"} and Y in {"D", "E"}
#' edgelist = Edgelist.bipartite.all(c("A", "B", "C"), c("D", "E"))
#'
#' @export
#'
Edgelist.bipartite.all = function(nodesFrom, nodesTo){
edgeIds = fastBipartiteEdgeIds(nodesFrom, nodesTo)
Edgelist(nodes = c(nodesFrom, nodesTo), edgeIds = edgeIds)
}
#' Edgelist sampling
#'
#' Retrieves a subset of an Edgelist edge ids by random sampling
#'
#' @param edgelist an Edgelist object containing the whole edge ids space to sample from
#' @param keepRatio a single number in [0; 1] describing the ratio of all \code{edgelist} edge ids to select at most
#' @param pred a predicate function filtering non admissible edges among sampled ones. By default, no filtering is performed.
#'
#' @return an integer vector of sampled edge ids not filtered by \code{pred}
#'
#' @examples
#' #Build an Edgelist with all (X, Y) such that X in {"A", "B", "C"} and Y in {"D", "E"}
#' edgelist = Edgelist.bipartite.all(c("A", "B", "C"), c("D", "E"))
#' #Randomly select 50% of edge ids, filtering out edges starting from "A"
#' sampledIds = Edgelist.sample.ids(edgelist, 0.5, function(e){
#' e[1] > 1
#' })
#'
#' @export
#'
Edgelist.sample.ids = function(edgelist, keepRatio, pred = function(edge){TRUE}){
shuffledIds = if(length(edgelist@edgeIds) <= 1) edgelist@edgeIds else sample(edgelist@edgeIds)
keepNum = keepRatio * nrow(edgelist)
keptIds = shuffledIds[seq_len(keepNum)]
remainingIds = shuffledIds[-seq_len(keepNum)]
checks = apply(cbind(getFromIds(edgelist, keptIds), getToIds(edgelist, keptIds)), 1, pred)
kos = which(checks == FALSE)
kosLength = length(kos)
while(kosLength > 0){
keptIds = setdiff(keptIds, keptIds[kos])
if(length(remainingIds) == 0) break
keptIds = union(keptIds, remainingIds[seq_len(min(kosLength, length(remainingIds)))])
remainingIds = remainingIds[-seq_len(kosLength)]
checks = apply(cbind(getFromIds(edgelist, keptIds), getToIds(edgelist, keptIds)), 1, pred)
kos = which(checks == FALSE)
kosLength = length(kos)
}
return(keptIds)
}
#' Edgelist sampling
#'
#' Retrieves a subset of an Edgelist edge ids by random sampling and return a new Edgelist with the result
#'
#' @param edgelist an Edgelist object containing the whole edge ids space to sample from
#' @param keepRatio a single number in [0; 1] describing the ratio of all \code{edgelist} edge ids to select at most
#' @param pred a predicate function filtering non admissible edges among sampled ones. By default, no filtering is performed.
#'
#' @return an Edgelist with same nodes as \code{edgelist} and sampled edge ids not filtered by \code{pred} as edgeIds slot value
#'
#' @examples
#' #Build an Edgelist with all (X, Y) such that X in {"A", "B", "C"} and Y in {"D", "E"}
#' edgelist = Edgelist.bipartite.all(c("A", "B", "C"), c("D", "E"))
#' #Build an Edgelist from a random selection of 50% of edgelist edge, filtering out edges starting from "A"
#' subEdgelist = Edgelist.sample(edgelist, 0.5, function(e){
#' e[1] > 1
#' })
#'
#' @export
#'
Edgelist.sample = function(edgelist, keepRatio, pred = function(edge){TRUE}){
ids = Edgelist.sample.ids(edgelist, keepRatio, pred)
iids = sapply(ids, function(i){which(edgelist@edgeIds == i)})
atts = lapply(seq_along(edgelist@attributes), function(aid){
edgelist@attributes[[aid]][iids]
})
names(atts) = names(edgelist@attributes)
Edgelist(nodes = edgelist@nodes, edgeIds = ids,
attributes = atts)
}
#' Edgelist reversal
#'
#' Retrieves an Edgelist with reversed edges from an Edgelist given as input
#'
#' @param edgelist an Edgelist object
#' @return an Edgelist with same nodes as \code{edgelist} but reversed edges
#'
#' @examples
#' #Build an Edgelist with all (X, Y) such that X in {"A", "B", "C"} and Y in {"D", "E"}
#' edgelist = Edgelist.bipartite.all(c("A", "B", "C"), c("D", "E"))
#' #Get the reversed Edgelist which then have the following edgeIds: c(16, 21, 17, 22, 18, 23)
#' #Corresponds to all (X, Y) such that X in {"D", "E"} and Y in {"A", "B", "C"}
#' revEdgelist = Edgelist.reverse(edgelist)
#'
#' @export
#'
Edgelist.reverse = function(edgelist){
nodesLen = length(edgelist@nodes)
Edgelist(nodes = edgelist@nodes,
edgeIds = sapply(edgelist@edgeIds - 1,
function(id){id %% nodesLen * nodesLen + floor(id / nodesLen)}) + 1,
attributes = edgelist@attributes)
}
#' Edgelist dynamic reversal
#'
#' Retrieves an Edgelist with edges dynamic time step reversal from an Edgelist given as input
#'
#' @param edgelist an Edgelist object
#' @return an Edgelist with same nodes as \code{edgelist} but reversed dynamic time steps between edge endpoints
#'
#' @examples
#' #Build a typical dynamic edgelist
#' edgelist = Edgelist.bipartite.all(c("A.t.", "B.t."), c("A.t.1.", "B.t.1."))
#' #Get the dynamically reversed Edgelist which then have the following edgeIds: c(9, 10, 13, 14)
#' #Corresponds to all (X, Y) such that X in {"A.t.1.", "B.t.1."} and Y in {"A.t.", "B.t."}
#' revEdgelist = Edgelist.dynrev(edgelist)
#'
#' @export
#'
Edgelist.dynrev = function(edgelist){
edgelistStr = getEdges(edgelist)
dyns1 = getdyn(edgelistStr[,1])
dyns2 = getdyn(edgelistStr[,2])
D = as.data.frame(do.call("rbind", lapply(seq_len(dim(edgelist)[1]), function(eid){
c(V1 = dyn(edgelistStr[eid, 1], dyns2[eid]), V2 = dyn(edgelistStr[eid, 2], dyns1[eid]))
})), stringsAsFactors = FALSE)
Edgelist(nodes = edgelist@nodes, edgeIds = getIds(edgelist, D), attributes = edgelist@attributes)
}
#' Edgelist static transformation
#'
#' Retrieves an Edgelist with static transformation of input Edgelist edges
#'
#' @param edgelist an Edgelist object
#' @return an Edgelist with static node version of \code{edgelist} and corresponding static edges
#'
#' @examples
#' #Build a typical dynamic edgelist
#' edgelist = Edgelist.bipartite.all(c("A.t.", "B.t."), c("A.t.1.", "B.t.1."))
#' #Get the static Edgelist which then have the nodes c("A", "B") and the following edgeIds: c(1, 2, 3, 4)
#' #Corresponds to all (X, Y) such that X and Y are in {"A", "B"}
#' undynEdgelist = Edgelist.undyn(edgelist)
#'
#' @export
#'
Edgelist.undyn = function(edgelist){
edgelistStr = getEdges(edgelist)
d = data.frame(undyn(edgelistStr[,1]), undyn(edgelistStr[,2]), stringsAsFactors = FALSE)
newNodes = unique(undyn(edgelist@nodes))
newEdgelist = Edgelist(nodes = newNodes, edgeIds = 1)
Edgelist(nodes = newNodes, edgeIds = getIds(newEdgelist, d), attributes = edgelist@attributes)
}
#' Edgelist loop removal
#'
#' Retrieves an Edgelist without loop edges of an input Edgelist
#'
#' @param edgelist an Edgelist object
#' @return an Edgelist with no loop, i.e. no ego edge.
#'
#' @examples
#' #Build a simple edgelist
#' edgelist = Edgelist.all(c("A", "B"))
#' #Get the no loop Edgelist which then have the following edgeIds: c(2, 4)
#' #Corresponds to removing (X, X) edges
#' noloopEdgelist = Edgelist.noloop(edgelist)
#'
#' @export
#'
Edgelist.noloop = function(edgelist){
nodesLen = length(edgelist@nodes)