from torch_geometric.datasets import KarateClub
dataset = KarateClub()
data = dataset[0] # Get the first graph object.
print(data)
print('==============================================================')
print(f'Number of node features:{data.num_node_features}') # 节点属性的维度
print(f'Number of node features: {data.num_features}')
# 同样是节点属性的维度
print(f'Number of edge features: {data.num_edge_features}') # 边属性的维度
print(f'Average node degree: {data.num_edges /data.num_nodes:.2f}') # 平均节点度
print(f'if edge indices are ordered and do not contain duplicate entries.: {data.is_coalesced()}') # 是否边是有序的同时不含有重复的边
print(f'Number of training nodes: {data.train_mask.sum()}') # 用作训练集的节点
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}') #用作训练集的节点的数量
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}') # 此图是否包含孤立的节点
print(f'Contains self-loops: {data.contains_self_loops()}') # 此图是否包含自环的边
print(f'Is undirected: {data.is_undirected()}') # 此图 是否是无向图
Data(edge_index=[2, 156], train_mask=[34], x=[34, 34], y=[34])
==============================================================
Number of node features:34
Number of node features: 34
Number of edge features: 0
Average node degree: 4.59
if edge indices are ordered and do not contain duplicate entries.: True
Number of training nodes: 4
Training node label rate: 0.12
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True
from torch_geometric.data import Data
import numpy as np
class MyGraphData(Data):
def __init__(self, data, labels, **args ):
"""
data: structural data, a table. Each column of table is one type of node
so in this case: we have author, department, paper 3 different types of node.
each value in the table represent a data value/ID value in a data node in graph
Example:
author depart paper
1 1 1
2 1 2
Then author #1 in department 1 writes 1 paper
author #2 in department 2 writes 2 papers
This is a heterogenuous graph
I consider the missing value and isolated nodes in the input data as well.
**args: other args passed to the Graph Data
"""
super(MyGraphData, self).__init__()
self.data = data
self.edge_index = None
self.x = None
self.y = None
self.node_list = []
self.labels = labels
self.create_graph(self.data)
pass
def create_graph(self,data):
self.edge_index = []
for row in self.data.values:
# we get 3 nodes here, each has type / target: department, author, paper respectively
dept = row[0]
author = row[1]
paper = row[2]
print(dept, author, paper)
dept_node_index = self.__add_node(self.node_list, dept, self.labels['dept']) #if dept !=None else None
author_node_index = self.__add_node(self.node_list, author, self.labels['author']) #if author !=None else None
paper_node_index = self.__add_node(self.node_list, paper, self.labels['paper']) #if paper !=None else None
# add undirected author-department edge
if dept_node_index!=None and author_node_index!=None:
self.edge_index.append([dept_node_index, author_node_index])
self.edge_index.append([ author_node_index,dept_node_index])
# add undirected author-paper edge
if author_node_index!=None and paper_node_index!=None:
self.edge_index.append([ author_node_index,paper_node_index])
self.edge_index.append([ paper_node_index,author_node_index])
# first row = from node , second row = to node
self.edge_index = torch.tensor(np.array(self.edge_index).T, dtype= torch.long)
# gather value of each node into a feature matrix x
self.x = torch.tensor([node[0] for node in self.node_list], dtype= torch.float)
# gather target for each node
self.y = torch.tensor([node[1] for node in self.node_list], dtype = torch.float)
return self.x, self.edge_index, self.y
def __add_node(self, node_ls, node, target):
if node == None or np.isnan(node):
return None
if node_ls.count([node, target]) ==0:
# check if node exists
node_ls.append([node, target])
# return the index of the unique node
node_idx = node_ls.index([node, target])
return node_idx
@property
def dept_nums(self):
return self.data['dept'].nunique()
@property
def author_nums(self):
return self.data['author'].nunique()
@property
def paper_nums(self):
return self.data['paper'].nunique()
@property
def isolated_nodes(self):
iso_nodes = []
for n in range(len(self.node_list)):
if n not in self.edge_index:
iso_nodes.append(self.node_list[n])
return iso_nodes
# Data(edge_index=[2, 4], x=[3, 1])
import pandas as pd
x = pd.DataFrame(data= [[1,1,1],[2,2,1],[None, None, 3]], columns=["dept","author","paper"])
data = MyGraphData(data = x, labels = {"dept":0,"author":1,"paper":2})
1.0 1.0 1.0
2.0 2.0 1.0
nan nan 3.0
print("Test Results:")
print(f"Number of authors: {data.author_nums}")
print(f"Number of papers: {data.paper_nums}")
print(f"Number of departments: {data.dept_nums}")
print(f"Number of isolated nodes: {len(data.isolated_nodes)}")
print(f"Edge index: {data.edge_index}")
print(f"x representation matrix: {data.x}")
print(f"Node index [value, label]: {data.node_list}")
Test Results:
Number of authors: 2
Number of papers: 2
Number of departments: 2
Number of isolated nodes: 1
Edge index: tensor([[0, 1, 1, 2, 3, 4, 4, 2],
[1, 0, 2, 1, 4, 3, 2, 4]])
x representation matrix: tensor([1., 1., 1., 2., 2., 3.])
Node index [value, label]: [[1.0, 0], [1.0, 1], [1.0, 2], [2.0, 0], [2.0, 1], [3.0, 2]]