from torch_geometric.datasets import KarateClubdataset =KarateClub()data = dataset[0]# Get the first graph object.print(data)print('==============================================================')print(f'Number of node features:{data.num_node_features}')# 节点属性的维度print(f'Number of node features: {data.num_features}')# 同样是节点属性的维度print(f'Number of edge features: {data.num_edge_features}')# 边属性的维度print(f'Average node degree: {data.num_edges /data.num_nodes:.2f}')# 平均节点度print(f'if edge indices are ordered and do not contain duplicate entries.: {data.is_coalesced()}')# 是否边是有序的同时不含有重复的边print(f'Number of training nodes: {data.train_mask.sum()}')# 用作训练集的节点print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')#用作训练集的节点的数量print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')# 此图是否包含孤立的节点print(f'Contains self-loops: {data.contains_self_loops()}')# 此图是否包含自环的边print(f'Is undirected: {data.is_undirected()}')# 此图 是否是无向图
Data(edge_index=[2, 156], train_mask=[34], x=[34, 34], y=[34])
==============================================================
Number of node features:34
Number of node features: 34
Number of edge features: 0
Average node degree: 4.59
if edge indices are ordered and do not contain duplicate entries.: True
Number of training nodes: 4
Training node label rate: 0.12
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True
from torch_geometric.data import Dataimport numpy as npclassMyGraphData(Data):def__init__(self,data,labels,**args ):""" data: structural data, a table. Each column of table is one type of node so in this case: we have author, department, paper 3 different types of node. each value in the table represent a data value/ID value in a data node in graph Example: author depart paper 1 1 1 2 1 2 Then author #1 in department 1 writes 1 paper author #2 in department 2 writes 2 papers This is a heterogenuous graph I consider the missing value and isolated nodes in the input data as well. **args: other args passed to the Graph Data """super(MyGraphData, self).__init__() self.data = data self.edge_index =None self.x =None self.y =None self.node_list = [] self.labels = labels self.create_graph(self.data)passdefcreate_graph(self,data): self.edge_index = []for row in self.data.values:# we get 3 nodes here, each has type / target: department, author, paper respectively dept = row[0] author = row[1] paper = row[2]print(dept, author, paper) dept_node_index = self.__add_node(self.node_list, dept, self.labels['dept'])#if dept !=None else None author_node_index = self.__add_node(self.node_list, author, self.labels['author']) #if author !=None else None
paper_node_index = self.__add_node(self.node_list, paper, self.labels['paper'])#if paper !=None else None# add undirected author-department edgeif dept_node_index!=Noneand author_node_index!=None: self.edge_index.append([dept_node_index, author_node_index]) self.edge_index.append([ author_node_index,dept_node_index])# add undirected author-paper edgeif author_node_index!=Noneand paper_node_index!=None: self.edge_index.append([ author_node_index,paper_node_index]) self.edge_index.append([ paper_node_index,author_node_index])# first row = from node , second row = to node self.edge_index = torch.tensor(np.array(self.edge_index).T, dtype= torch.long)# gather value of each node into a feature matrix x self.x = torch.tensor([node[0] for node in self.node_list], dtype= torch.float)# gather target for each node self.y = torch.tensor([node[1] for node in self.node_list], dtype = torch.float)return self.x, self.edge_index, self.ydef__add_node(self,node_ls,node,target):if node ==Noneor np.isnan(node):returnNoneif node_ls.count([node, target])==0:# check if node exists node_ls.append([node, target])# return the index of the unique node node_idx = node_ls.index([node, target])return node_idx@propertydefdept_nums(self):return self.data['dept'].nunique()@propertydefauthor_nums(self):return self.data['author'].nunique()@propertydefpaper_nums(self):return self.data['paper'].nunique()@propertydefisolated_nodes(self): iso_nodes = []for n inrange(len(self.node_list)):if n notin self.edge_index: iso_nodes.append(self.node_list[n])return iso_nodes
print("Test Results:")print(f"Number of authors: {data.author_nums}")print(f"Number of papers: {data.paper_nums}")print(f"Number of departments: {data.dept_nums}")print(f"Number of isolated nodes: {len(data.isolated_nodes)}")print(f"Edge index: {data.edge_index}")print(f"x representation matrix: {data.x}")print(f"Node index [value, label]: {data.node_list}")
Test Results:
Number of authors: 2
Number of papers: 2
Number of departments: 2
Number of isolated nodes: 1
Edge index: tensor([[0, 1, 1, 2, 3, 4, 4, 2],
[1, 0, 2, 1, 4, 3, 2, 4]])
x representation matrix: tensor([1., 1., 1., 2., 2., 3.])
Node index [value, label]: [[1.0, 0], [1.0, 1], [1.0, 2], [2.0, 0], [2.0, 1], [3.0, 2]]