# gSpan is a subgraph mining algorithm that uses DFS and DFS codes to mine subgraphs
# **Importing this algorithm into a python program**
# --------------------------------------------------------
#
# from PAMI.subgraphMining.basic import gspan as alg
#
# obj = alg.GSpan(iFile, minSupport)
#
# obj.mine()
#
# frequentGraphs = obj.getFrequentSubgraphs()
#
# memUSS = obj.getMemoryUSS()
#
# obj.save(oFile)
#
# print("Total Memory in USS:", memUSS)
#
# memRSS = obj.getMemoryRSS()
#
# print("Total Memory in RSS", memRSS)
#
# run = obj.getRuntime()
#
# print("Total ExecutionTime in seconds:", run)
#
from PAMI.subgraphMining.basic import abstract as _ab
[docs]
class GSpan(_ab._gSpan):
eliminate_infrequent_vertices = True
eliminate_infrequent_vertex_pairs = True
eliminate_infrequent_edge_labels = True
edge_count_pruning = True
def __init__(self, iFile, minSupport, outputSingleVertices=True, maxNumberOfEdges=float('inf'), outputGraphIds=False) -> None:
"""
Initialize variables
"""
self.minSup = minSupport
self.frequentSubgraphs = []
self._runtime = 0
self.maxMem = 0
self.graphCount = 0
self.patternCount = 0
self.frequentVertexLabels = []
self.infrequentVerticesRemovedCount = 0
self.infrequentVertexPairsRemoved = 0
self.edgeRemovedByLabel = 0
self.eliminatedWithMaxSize = 0
self.emptyGraphsRemoved = 0
self.pruneByEdgeCount = 0
self.inPath = iFile
self.outPath = None
self.outputSingleVertices = outputSingleVertices
self.maxNumberOfEdges = maxNumberOfEdges
self.outputGraphIds = outputGraphIds
self._memoryUSS = float()
self._memoryRSS = float()
self.label_mapping = {}
self.current_label = 0
self.edge_label_mapping = {}
self.current_edge_label = 0
[docs]
def mine(self):
if self.maxNumberOfEdges <= 0:
return
self.frequentSubgraphs = []
self.patternCount = 0
# Record the start time
t1 = _ab.time.time()
# Read graphs
graphDb = self.readGraphs(self.inPath)
# Calculate minimum support as a number of graphs
self.minSup = _ab.math.ceil(self.minSup * len(graphDb))
# Mining
self.gSpan(graphDb, self.outputSingleVertices)
# Output
# self.writeResultToFile(self.outPath)
t2 = _ab.time.time()
#Calculate runtime
self._runtime = (t2 - t1)
process = _ab._psutil.Process(_ab._os.getpid())
self._memoryUSS = float()
self._memoryRSS = float()
self._memoryUSS = process.memory_full_info().uss
self._memoryRSS = process.memory_info().rss
self.patternCount = len(self.frequentSubgraphs)
[docs]
def save(self, oFile):
"""
The `save` function writes information about frequent subgraphs to a specified
output file in a specific format.
:param oFile: The `save` method is used to write the results of frequent
subgraphs to a file specified by the `outputPath` parameter. The method iterates over each
frequent subgraph in `self.frequentSubgraphs` and writes the subgraph information to the file
"""
reverse_label_mapping = {v: k for k, v in self.label_mapping.items()}
reverse_edge_label_mapping = {v: k for k, v in self.edge_label_mapping.items()}
with open(oFile, 'w') as bw:
i = 0
for subgraph in self.frequentSubgraphs:
sb = []
dfsCode = subgraph.dfsCode
sb.append(f"t # {i} * {subgraph.support}\n")
if dfsCode.size == 1:
ee = dfsCode.getEeList()[0]
vLabel1 = reverse_label_mapping.get(ee.vLabel1, ee.vLabel1)
sb.append(f"v 0 {vLabel1}\n")
if ee.edgeLabel != -1:
vLabel2 = reverse_label_mapping.get(ee.vLabel2, ee.vLabel2)
edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel)
sb.append(f"v 1 {vLabel2}\n")
sb.append(f"e 0 1 {edgeLabel}\n")
else:
vLabels = [reverse_label_mapping.get(label, label) for label in dfsCode.getAllVLabels()]
for j, vLabel in enumerate(vLabels):
sb.append(f"v {j} {vLabel}\n")
for ee in dfsCode.getEeList():
edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel)
sb.append(f"e {ee.v1} {ee.v2} {edgeLabel}\n")
if self.outputGraphIds:
sb.append("x " + " ".join(str(iD) for iD in subgraph.setOfGraphsIds))
sb.append("\n\n")
bw.write("".join(sb))
i += 1
[docs]
def readGraphs(self, path):
"""
The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices
and edges.
:param path: The `path` parameter in the `readGraphs` method is the file path to the text file
containing the graph data that needs to be read and processed.
:return: The `readGraphs` method reads graph data from a file specified by the `path` parameter. It
parses the data to create a list of graph objects and returns this list. Each graph object contains
information about vertices and edges within the graph.
"""
with open(path, 'r') as br:
graphDatabase = []
vMap = {}
gId = None
for line in br:
line = line.strip()
if line.startswith("t"):
if vMap: # If vMap is not empty, it means a graph was read
graphDatabase.append(_ab.Graph(gId, vMap))
vMap = {} # Reset for the next graph
gId = int(line.split(" ")[2])
elif line.startswith("v"):
items = line.split(" ")
vId = int(items[1])
# Map vertex label
if items[2].isdigit():
vLabel = int(items[2])
else:
vLabel = self.get_label(items[2])
vMap[vId] = _ab.Vertex(vId, vLabel)
elif line.startswith("e"):
items = line.split(" ")
v1 = int(items[1])
v2 = int(items[2])
# Map edge label
if items[3].isdigit():
eLabel = int(items[3])
else:
eLabel = self.get_edge_label(items[3])
e = _ab.Edge(v1, v2, eLabel)
vMap[v1].addEdge(e)
vMap[v2].addEdge(e)
if vMap:
graphDatabase.append(_ab.Graph(gId, vMap))
self.graphCount = len(graphDatabase)
return graphDatabase
[docs]
def get_label(self, label_char):
if label_char not in self.label_mapping:
self.label_mapping[label_char] = self.current_label
self.current_label += 1
return self.label_mapping[label_char]
[docs]
def get_edge_label(self, label_char):
if label_char not in self.edge_label_mapping:
self.edge_label_mapping[label_char] = self.current_edge_label
self.current_edge_label += 1
return self.edge_label_mapping[label_char]
[docs]
def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph):
"""
The function `subgraphIsomorphisms` takes a DFS code and a graph as input, and finds all subgraph
isomorphisms between the DFS code and the graph.
:param c: The parameter `c` in the `subgraphIsomorphisms` function is of type `_ab.DFSCode`, which
seems to represent a Depth-First Search code.
:param g: The parameter `g` in the `subgraphIsomorphisms` function represents a graph object. The
function is trying to find subgraph isomorphisms between a given DFS code `c` and the graph `g`. It
iterates through the vertices of the graph starting with a specific
:return: The function `subgraphIsomorphisms` returns a list of dictionaries, where each dictionary
represents a subgraph isomorphism mapping between the input DFS code `c` and the input graph `g`.
Each dictionary in the list maps vertex IDs from the DFS code to corresponding vertex IDs in the
graph, indicating a valid subgraph isomorphism.
"""
isoms = []
startLabel = c.getEeList()[0].getVLabel1()
# Find all vertices in the graph that match the start label and initialize isomorphisms with them
for vId in g.findAllWithLabel(startLabel):
hMap = {0: vId}
isoms.append(hMap)
# For each edge in the DFS code, try to extend each partial isomorphism
for ee in c.getEeList():
v1, v2, v2Label, eLabel = ee.getV1(), ee.getV2(), ee.getVLabel2(), ee.getEdgeLabel()
updateIsoms = []
# Try to extend each current isomorphism with the current edge
for iso in isoms:
mappedV1 = iso.get(v1)
# Forward edge
if v1 < v2:
mappedVertices = list(iso.values())
for mappedV2 in g.getAllNeighbors(mappedV1):
if (v2Label == mappedV2.getLabel() and
mappedV2.getId() not in mappedVertices and
eLabel == g.getEdgeLabel(mappedV1, mappedV2.getId())):
tempM = iso.copy()
tempM[v2] = mappedV2.getId()
updateIsoms.append(tempM)
# Backward edge
else:
mappedV2 = iso.get(v2)
# Check if the backward edge exists in the graph matching the DFS code edge
if g.isNeighboring(mappedV1, mappedV2) and eLabel == g.getEdgeLabel(mappedV1, mappedV2):
updateIsoms.append(iso)
isoms = updateIsoms
return isoms
[docs]
def rightMostPathExtensionsFromSingle(self, c: _ab.DFSCode, g: _ab.Graph):
"""
The function `rightMostPathExtensionsFromSingle` generates extensions for a given DFS code and
graph, focusing on the rightmost path.
:param c: The parameter `c` is of type `_ab.DFSCode`, which seems to represent a Depth-First Search
code. It is used in the `rightMostPathExtensionsFromSingle` method to perform operations related to
DFS codes
:param g: The parameter `g` in the provided code snippet represents a graph object. It seems to be
an instance of a graph data structure that contains vertices and edges. The code is designed to
find and return extensions from a given DFS code `c` based on the provided graph `g`. The function
`
:return: The function `rightMostPathExtensionsFromSingle` returns a dictionary `extensions`
containing extended edges as keys and sets of graph IDs as values.
"""
# Get the unique identifier for the given graph
gid = g.getId()
# Initialize a dictionary to store potential extensions
extensions = {}
# If the DFS code is empty, consider all edges of the graph for extension
if c.isEmpty():
for vertex in g.vertices:
for e in vertex.getEdgeList():
# Determine the order of vertex labels to maintain consistency
v1Label = g.getVLabel(e.v1)
v2Label = g.getVLabel(e.v2)
if v1Label < v2Label:
ee1 = _ab.ExtendedEdge(0, 1, v1Label, v2Label, e.getEdgeLabel())
else:
ee1 = _ab.ExtendedEdge(0, 1, v2Label, v1Label, e.getEdgeLabel())
# Update the extensions dictionary with new or existing extended edges
setOfGraphIds = extensions.get(ee1, set())
setOfGraphIds.add(gid)
extensions[ee1] = setOfGraphIds
else:
# For non-empty DFS code, focus on extending from the rightmost path
rightMost = c.getRightMost()
isoms = self.subgraphIsomorphisms(c, g)
# Iterate through all isomorphisms to find valid extensions
for isom in isoms:
invertedIsom = {v: k for k, v in isom.items()}
mappedRm = isom[rightMost]
mappedRmLabel = g.getVLabel(mappedRm)
for x in g.getAllNeighbors(mappedRm):
invertedX = invertedIsom.get(x.getId())
if invertedX is not None and c.onRightMostPath(invertedX) and c.notPreOfRm(invertedX) and not c.containEdge(rightMost, invertedX):
ee = _ab.ExtendedEdge(rightMost, invertedX, mappedRmLabel, x.getLabel(), g.getEdgeLabel(mappedRm, x.getId()))
extensions.setdefault(ee, set()).add(gid)
mappedVertices = set(isom.values())
for v in c.getRightMostPath():
mappedV = isom[v]
mappedVLabel = g.getVLabel(mappedV)
for x in g.getAllNeighbors(mappedV):
if x.getId() not in mappedVertices:
ee = _ab.ExtendedEdge(v, rightMost + 1, mappedVLabel, x.getLabel(), g.getEdgeLabel(mappedV, x.getId()))
extensions.setdefault(ee, set()).add(gid)
return extensions
[docs]
def rightMostPathExtensions(self, c: _ab.DFSCode, graphDb, graphIds):
"""
The function `rightMostPathExtensions` generates extensions for a given DFS code by considering
rightmost paths in a graph database.
:param c: The parameter `c` in the `rightMostPathExtensions` method is of type `_ab.DFSCode`. It
seems to represent a Depth-First Search code used in graph algorithms. The method is responsible
for generating extensions based on the rightmost path in a graph
:param graphDb: The `graphDb` parameter in the `rightMostPathExtensions` method is a
database that stores graph data. It is used to retrieve graph objects based on
their IDs, which are provided in the `graphIds` parameter. The method then performs operations on
these graph objects to generate
:param graphIds: The `graphIds` parameter in the `rightMostPathExtensions` function represents a
list of graph identifiers. These identifiers are used to retrieve specific graphs from the
`graphDb` database in order to perform operations on them within the function. Each ID in the
`graphIds` list corresponds to an identifier.
:return: The function `rightMostPathExtensions` returns a dictionary `extensions` containing
extended edges as keys and sets of graph IDs as values.
"""
extensions = {}
if c.isEmpty():
for iD in graphIds:
g = graphDb[iD]
# Skip graphs if pruning based on edge count is enabled and applicable
if GSpan.edge_count_pruning and c.size >= g.getEdgeCount():
self.pruneByEdgeCount += 1
continue
for v in g.vertices:
for e in v.getEdgeList():
# Organize the vertex labels to maintain consistent ordering
v1L = g.getVLabel(e.v1)
v2L = g.getVLabel(e.v2)
if v1L < v2L:
ee1 = _ab.ExtendedEdge(0, 1, v1L, v2L, e.getEdgeLabel())
else:
ee1 = _ab.ExtendedEdge(0, 1, v2L, v1L, e.getEdgeLabel())
# Add the new or existing extensions to the dictionary
setOfGraphIds = extensions.get(ee1, set())
setOfGraphIds.add(iD)
extensions[ee1] = setOfGraphIds
else:
# For non-empty DFS codes, extend based on the rightmost path of each graph
rightMost = c.getRightMost()
for iD in graphIds:
g = graphDb[iD]
if GSpan.edge_count_pruning and c.size >= g.getEdgeCount():
self.pruneByEdgeCount += 1
continue
isoms = self.subgraphIsomorphisms(c, g)
for isom in isoms:
invertedIsom = {}
for key, value in isom.items():
invertedIsom[value] = key
mappedRM = isom.get(rightMost)
mappedRMLabel = g.getVLabel(mappedRM)
for x in g.getAllNeighbors(mappedRM):
invertedX = invertedIsom.get(x.getId())
if invertedX is not None and c.onRightMostPath(invertedX) and \
c.notPreOfRm(invertedX) and not c.containEdge(rightMost, invertedX):
ee = _ab.ExtendedEdge(rightMost, invertedX, mappedRMLabel, x.getLabel(),
g.getEdgeLabel(mappedRM, x.getId()))
if ee not in extensions:
extensions[ee] = set()
extensions[ee].add(g.getId())
mappedVertices = isom.values()
for v in c.getRightMostPath():
mappedV = isom[v]
mappedVLabel = g.getVLabel(mappedV)
for x in g.getAllNeighbors(mappedV):
if x.getId() not in mappedVertices:
ee = _ab.ExtendedEdge(v, rightMost + 1, mappedVLabel, x.getLabel(),
g.getEdgeLabel(mappedV, x.getId()))
if ee not in extensions:
extensions[ee] = set()
extensions[ee].add(g.getId())
return extensions
[docs]
def gspanDFS(self, c: _ab.DFSCode, graphDb, subgraphId):
"""
The `gspanDFS` function recursively explores graph patterns using the gSpan algorithm to find
frequent subgraphs in a graph database.
:param c: In the provided code snippet, the parameter `c` is an instance of the `_ab.DFSCode` class.
It is used as an input to the `gspanDFS` method for performing Depth-First Search (DFS) traversal in
a graph mining algorithm. The `c` parameter represents
:type c: _ab.DFSCode
:param graphDb: The `graphDb` parameter refers to a graph database that the algorithm is
operating on.
:param subgraphId: The `subgraphId` parameter in the `gspanDFS` method refers to an
ID represents a specific subgraph within the graph database `graphDb`.
:return: The `gspanDFS` method is a recursive function that is called within itself to explore the graph
structure and find frequent subgraphs. The function does not have a return value, but it modifies
the `self.frequentSubgraphs` list by appending new frequent subgraphs found during the DFS traversal.
"""
if c.size == self.maxNumberOfEdges - 1:
return
extensions = self.rightMostPathExtensions(c, graphDb, subgraphId)
for extension, newGraphIds in extensions.items():
sup = len(newGraphIds)
if sup >= self.minSup:
newC = c.copy()
newC.add(extension)
if self.isCanonical(newC):
subgraph = _ab.FrequentSubgraph(newC, newGraphIds, sup)
self.frequentSubgraphs.append(subgraph)
self.gspanDFS(newC, graphDb, newGraphIds)
[docs]
def isCanonical(self, c: _ab.DFSCode):
"""
The function `isCanonical` checks if a given DFS code is canonical by comparing it with its
rightmost path extensions.
:param c: The parameter `c` is an instance of the `_ab.DFSCode` class
:type c: _ab.DFSCode
:return: a boolean value. It returns True if the input DFSCode `c` is canonical, and False if it is
not canonical.
"""
canC = _ab.DFSCode()
for i in range(c.size):
extensions = self.rightMostPathExtensionsFromSingle(canC, _ab.Graph(-1, None, c))
minEe = None
for ee in extensions.keys():
if minEe is None or ee.smallerThan(minEe):
minEe = ee
if minEe is not None and minEe.smallerThan(c.getAt(i)):
return False
if minEe is not None:
canC.add(minEe)
return True
[docs]
def gSpan(self, graphDb, outputSingleVertices):
"""
The gSpan function in Python processes a graph database by precalculating vertex lists, removing
infrequent vertex pairs, and performing a depth-first search algorithm.
:param graphDb: The `graphDb` parameter refers to a graph database that the algorithm is
operating on.
:param outputSingleVertices: The `outputFrequentVertices` parameter is a boolean flag that
determines whether single vertices should be output or not.
"""
if outputSingleVertices or GSpan.eliminate_infrequent_vertices:
self.findAllOnlyOneVertex(graphDb, outputSingleVertices)
for g in graphDb:
g.precalculateVertexList()
if GSpan.eliminate_infrequent_vertex_pairs or GSpan.eliminate_infrequent_edge_labels:
self.removeInfrequentVertexPairs(graphDb)
graphIds = set()
for i, g in enumerate(graphDb):
if g.vertices is not None and len(g.vertices) != 0:
if self.infrequentVerticesRemovedCount > 0:
g.precalculateVertexList()
graphIds.add(i)
g.precalculateVertexNeighbors()
g.precalculateLabelsToVertices()
else:
self.emptyGraphsRemoved += 1
if len(self.frequentVertexLabels) != 0:
self.gspanDFS(_ab.DFSCode(), graphDb, graphIds)
[docs]
class Pair:
def __init__(self, x, y):
if x < y:
self.x = x
self.y = y
else:
self.x = y
self.y = x
def __eq__(self, other):
if isinstance(other, GSpan.Pair):
return self.x == other.x and self.y == other.y
return False
def __hash__(self):
return self.x + 100 * self.y
[docs]
def findAllOnlyOneVertex(self, graphDb, outputFrequentVertices):
"""
The function `findAllOnlyOneVertex` iterates through a graph database to find frequent vertices
based on a minimum support threshold, storing the results and optionally removing infrequent
vertices.
:param graphDb: The `graphDb` parameter refers to a graph database that the algorithm is
operating on.
:param outputFrequentVertices: The `outputFrequentVertices` parameter is a boolean flag that
determines whether single vertices should be included in the output or not.
"""
self.frequentVertexLabels = []
labelM = {}
for g in graphDb:
for v in g.getNonPrecalculatedAllVertices():
if v.getEdgeList():
vLabel = v.getLabel()
labelM.setdefault(vLabel, set()).add(g.getId())
# Check each label for frequency against the minimum support threshold
for label, tempSupG in labelM.items():
sup = len(tempSupG)
if sup >= self.minSup:
self.frequentVertexLabels.append(label)
if outputFrequentVertices:
tempD = _ab.DFSCode()
tempD.add(_ab.ExtendedEdge(0, 0, label, label, -1))
self.frequentSubgraphs.append(_ab.FrequentSubgraph(tempD, tempSupG, sup))
elif GSpan.eliminate_infrequent_vertices:
for graphId in tempSupG:
g = graphDb[graphId]
g.removeInfrequentLabel(label)
self.infrequentVerticesRemovedCount += 1
[docs]
def removeInfrequentVertexPairs(self, graphDb):
"""
The function `removeInfrequentVertexPairs` processes a graph database by removing infrequent vertex
pairs and edge labels based on specified support thresholds.
:param graphDb: The `graphDb` parameter refers to a graph database that the algorithm is
operating on.
"""
alreadySeenPair = None
matrix = None
mapEdgeLabelToSupport = None
alreadySeenEdgeLabel = None
if GSpan.eliminate_infrequent_edge_labels:
matrix = _ab.SparseTriangularMatrix()
alreadySeenPair = set() # To avoid double counting pairs in the same graph
if GSpan.eliminate_infrequent_edge_labels:
mapEdgeLabelToSupport = {}
alreadySeenEdgeLabel = set() # To avoid double counting edge labels in the same graph
for g in graphDb:
vertices = g.getAllVertices()
# Check each vertex and its edges for infrequent pairs and labels
for v1 in vertices:
labelV1 = v1.getLabel()
for edge in v1.getEdgeList():
v2 = edge.another(v1.getId())
labelV2 = g.getVLabel(v2)
# Track vertex label pairs for infrequency analysis
if GSpan.eliminate_infrequent_edge_labels:
pair = self.Pair(labelV1, labelV2)
if pair not in alreadySeenPair:
matrix.incrementCount(labelV1, labelV2)
alreadySeenPair.add(pair)
# Track edge labels for infrequency analysis
if GSpan.eliminate_infrequent_edge_labels:
edgeLabel = edge.getEdgeLabel()
if edgeLabel not in alreadySeenEdgeLabel:
alreadySeenEdgeLabel.add(edgeLabel)
edgeSupport = mapEdgeLabelToSupport.get(edgeLabel, 0)
mapEdgeLabelToSupport[edgeLabel] = edgeSupport + 1
if GSpan.eliminate_infrequent_vertex_pairs:
alreadySeenPair.clear()
if GSpan.eliminate_infrequent_edge_labels:
alreadySeenEdgeLabel.clear()
if GSpan.eliminate_infrequent_vertex_pairs:
matrix.removeInfrequentEntriesFromMatrix(self.minSup)
if GSpan.eliminate_infrequent_vertex_pairs or GSpan.eliminate_infrequent_edge_labels:
for g in graphDb:
vertices = g.getAllVertices()
for v1 in vertices:
iterEdges = iter(v1.getEdgeList())
for edge in iterEdges:
v2 = edge.another(v1.getId())
labelV2 = g.getVLabel(v2)
count = matrix.getSupportForItems(v1.getLabel(), labelV2)
# Remove edges based on infrequency criteria
if GSpan.eliminate_infrequent_vertex_pairs and count < self.minSup:
v1.removeEdge(edge)
self.infrequentVertexPairsRemoved += 1
elif GSpan.eliminate_infrequent_edge_labels and \
mapEdgeLabelToSupport.get(edge.getEdgeLabel(), 0) < self.minSup:
v1.removeEdge(edge)
self.edgeRemovedByLabel += 1
[docs]
def getMemoryUSS(self):
return self._memoryUSS
[docs]
def getRuntime(self):
return self._runtime
[docs]
def getFrequentSubgraphs(self):
sb = []
for i, subgraph in enumerate(self.frequentSubgraphs):
dfsCode = subgraph.dfsCode
subgraphDescription = [f"t # {i} * {subgraph.support}"]
if dfsCode.size == 1:
ee = dfsCode.getEeList()[0]
subgraphDescription.append(f"v 0 {ee.vLabel1}")
if ee.edgeLabel != -1:
subgraphDescription.append(f"v 1 {ee.vLabel2}")
subgraphDescription.append(f"e 0 1 {ee.edgeLabel}")
else:
vLabels = dfsCode.getAllVLabels()
for j, vLabel in enumerate(vLabels):
subgraphDescription.append(f"v {j} {vLabel}")
for ee in dfsCode.getEeList():
subgraphDescription.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}")
sb.append('\n'.join(subgraphDescription))
return '\n'.join(sb)
[docs]
def getSubgraphGraphMapping(self):
"""
Return a list of mappings from subgraphs to the graph IDs they belong to in the format <FID, Clabel, GIDs[]>.
"""
mappings = []
for i, subgraph in enumerate(self.frequentSubgraphs):
mapping = {
"FID": i,
"Clabel": str(subgraph.dfsCode),
"GIDs": list(subgraph.setOfGraphsIds)
}
mappings.append(mapping)
return mappings
[docs]
def saveSubgraphsByGraphId(self, oFile):
"""
Save subgraphs by graph ID as a flat transaction, such that each row represents the graph ID and each row can contain multiple subgraph IDs.
"""
graphToSubgraphs = {}
for i, subgraph in enumerate(self.frequentSubgraphs):
for graphId in subgraph.setOfGraphsIds:
if graphId not in graphToSubgraphs:
graphToSubgraphs[graphId] = []
graphToSubgraphs[graphId].append(i)
graphToSubgraphs = {k: graphToSubgraphs[k] for k in sorted(graphToSubgraphs)}
with open(oFile, 'w') as f:
for _, subgraphIds in graphToSubgraphs.items():
f.write(f"{' '.join(map(str, subgraphIds))}\n")