- 60
- 0
- 约9.99千字
- 约 11页
- 2023-01-08 发布于上海
- 举报
cora数据集的预处理
⼀:
出⾃:《深⼊浅出图神经⽹络》机械⼯业出版社,刘忠⾬、李彦霖、周洋
# 加载数据,并转换为torch.Tensor
dataset = CoraData().data
node_feature = dataset.x / dataset.x.sum(1, keepdims=True) # 归⼀化数据,使得每⼀⾏和为1
tensor_x = tensor_from_numpy(node_feature, DEVICE)
tensor_y = tensor_from_numpy(dataset.y, DEVICE)
tensor_train_mask = tensor_from_numpy(dataset.train_mask, DEVICE)
tensor_val_mask = tensor_from_numpy(dataset.val_mask, DEVICE)
tensor_test_mask = tensor_from_numpy(dataset.test_mask, DEVICE)
normalize_adjacency = CoraData.normalization(dataset.adjacency) # 规范化邻接矩阵
num_nodes, input_dim = node_feature.shape
indices = torch.from_numpy(np.asarray(
[normalize_adjacency.row,
normalize_adjacency.col]).astype(int64)).long()
values = torch.from_numpy(normalize_adjacency.data.astype(np.float32))
tensor_adjacency = torch.sparse.FloatTensor(indices, values,
(num_nodes, num_nodes)).to(DEVICE)
out :
Process data ...
Nodes feature shape: (2708, 1433)
Nodes label shape: (2708,)
Adjacencys shape: (2708, 2708)
Number of training nodes: 140
Number of validation nodes: 500
Number of test nodes: 1000
Cached file: cora/processed_cora.pkl
⼆. gcn⾥的
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys
def parse_index_file(filename):
Parse index file.
index = []
for line in open(filename):
index.append(int(line.strip()))
return index
def sample_mask(idx, l):
Create mask.
mask = np.zeros(l)
mask[idx] = 1
return np.array(mask, dtype=np.bool)
def load_data(dataset_str):
Loads input data from gcn/data directory
ind.dataset_str.x = the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.tx = the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.allx = the feature vectors of both labeled and unlabeled training instances
(a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix
原创力文档

文档评论(0)