temu跨境电商入驻,河北网站seo地址,阳谷网站建设公司,网站改版后 搜索不到一、确认cuda工具包和n卡相关驱动是否安装
在终端中输入以下命令#xff1a;
nvcc -V如果出现以下提示#xff0c;则已经成功安装 在终端中输入以下命令#xff1a;
nvidia-smi如果出现即为成功#xff0c;我在这里就不去介绍怎么下载cuda和驱动怎么下载了#xff0c;…一、确认cuda工具包和n卡相关驱动是否安装
在终端中输入以下命令
nvcc -V如果出现以下提示则已经成功安装 在终端中输入以下命令
nvidia-smi如果出现即为成功我在这里就不去介绍怎么下载cuda和驱动怎么下载了大家可以看一下网上的其他安装教程
二、pip安装tensorRT API
此步骤在python的虚拟环境下进行
python3 -m pip install --upgrade tensorrt三、验证tensort库安装结果 import tensorrtprint(tensorrt.__version__)
8.6.1assert tensorrt.Builder(tensorrt.Logger())
[11/04/2023-11:19:33] [TRT] [W] CUDA lazy loading is not enabled.
Enabling it can significantly reduce device memory usage and speed up
TensorRT initialization.
See Lazy Loading section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading四、安装Cuda-python
这里如果遇到网络问题可以加上清华源-i [https://pypi.tuna.tsinghua.edu.cn/simple](https://pypi.tuna.tsinghua.edu.cn/simple)
pip install cuda-python五、验证cuda-python是否安装成功
进入python如果能成功导入库则成功安装开库
python
import cuda六、克隆TensorRT HelloWorld程序
项目地址https://github.com/NVIDIA/TensorRT/tree/main/samples/python/network_api_pytorch_mnist 1克隆整个项目
git clone https://github.com/NVIDIA/TensorRT.git这里如果网络不行可以直接上github下载zip文件一样的 2进入pytorch版本的例子目录安装依赖
cd TensorRT/samples/python/network_api_pytorch_mnist
pip install -r requirements.txt这里面包含了两个python源代码文件其中model.py是一个卷积神经网络的代码sample.py是调用这个网络对minist数据集进行训练预测的代码并将训练好的模型转换文tensorRT的格式进行推理。 3运行sample.py文件
python sample.py运行上述命令后会产生以下输出 训练过程 最终结果 **此时你就已经成功运行了tensorRT的pytorch版本的HelloWorld程序**下面我们对这个HelloWorld进行深入分析。
六、代码分析-Todo
这个样例采用了经典的mnist数据集它是一些单通道的二维图像组成的数据集可视化后如下 从下述代码可以知道作者构造了一个由两个卷积层和两个全连接层组成的简单神经网络用来训练在mnist数据集上的预测模型并且提供了get_weights方法方便下载训练好的参数。
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
import os
from random import randint# 这是一个简单的神经的网络由两个卷积层和两个全连接层组成
# Network
class Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 nn.Conv2d(1, 20, kernel_size5)self.conv2 nn.Conv2d(20, 50, kernel_size5)self.fc1 nn.Linear(800, 500)self.fc2 nn.Linear(500, 10)def forward(self, x):x F.max_pool2d(self.conv1(x), kernel_size2, stride2)x F.max_pool2d(self.conv2(x), kernel_size2, stride2)x x.view(-1, 800)x F.relu(self.fc1(x))x self.fc2(x)return F.log_softmax(x, dim1)# 这个类主要实现用上述网络结构来训练的功能
class MnistModel(object):def __init__(self):# 一系列超参数self.batch_size 64self.test_batch_size 100self.learning_rate 0.0025self.sgd_momentum 0.9self.log_interval 100# 加载mnist的训练数据和测试数据self.train_loader torch.utils.data.DataLoader(datasets.MNIST(/tmp/mnist/data,trainTrue,downloadTrue,transformtransforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),),batch_sizeself.batch_size,shuffleTrue,num_workers1,timeout600,)self.test_loader torch.utils.data.DataLoader(datasets.MNIST(/tmp/mnist/data,trainFalse,transformtransforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),),batch_sizeself.test_batch_size,shuffleTrue,num_workers1,timeout600,)# 初始化网络对象self.network Net()def learn(self, num_epochs2):这个函数用来训练网络默认训练两轮# 每一个单轮训练def train(epoch):# 切换到训练模式self.network.train()# 使用随机梯度下降法来作为优化器optimizer optim.SGD(self.network.parameters(), lrself.learning_rate, momentumself.sgd_momentum)# 每一个batch训练数据for batch, (data, target) in enumerate(self.train_loader):data, target Variable(data), Variable(target)optimizer.zero_grad()output self.network(data)loss F.nll_loss(output, target)loss.backward()optimizer.step()# 输出损失信息if batch % self.log_interval 0:print(Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}.format(epoch,batch * len(data),len(self.train_loader.dataset),100.0 * batch / len(self.train_loader),loss.data.item(),))# 测试函数def test(epoch):# 切换到验证模式self.network.eval()test_loss 0correct 0for data, target in self.test_loader:with torch.no_grad():data, target Variable(data), Variable(target)output self.network(data)test_loss F.nll_loss(output, target).data.item()pred output.data.max(1)[1]correct pred.eq(target.data).cpu().sum()test_loss / len(self.test_loader)# 输出测试损失print(\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n.format(test_loss, correct, len(self.test_loader.dataset), 100.0 * correct / len(self.test_loader.dataset)))# 训练num_epochs轮for e in range(num_epochs):train(e 1)test(e 1)def get_weights(self):返回network的权重参数return self.network.state_dict()def get_random_testcase(self):从名字可以看出这是一个从测试数据中随机抽取样本来进行推理data, target next(iter(self.test_loader))case_num randint(0, len(data) - 1)test_case data.numpy()[case_num].ravel().astype(np.float32)test_name target.numpy()[case_num]return test_case, test_name
从下述代码可知作者使用了tensorRT对使用pytorch构建的神经网络进行了再构建没有使用到parser自动解析网络框架。随着网络层数越深这种方式会越来越麻烦。
import os
import sys
import model
import numpy as np
import tensorrt as trt
sys.path.insert(1, os.path.join(sys.path[0], ..))
import common
TRT_LOGGER trt.Logger(trt.Logger.WARNING)class ModelData(object):INPUT_NAME dataINPUT_SHAPE (1, 1, 28, 28)OUTPUT_NAME probOUTPUT_SIZE 10DTYPE trt.float32def populate_network(network, weights):# 从这个函数可以看出这里对model.py中的网络架构又重新进行了创建并没有使用parser来自动构建网络如果网络层数更深使用这种方法会非常麻烦# Configure the network layers based on the weights provided.input_tensor network.add_input(nameModelData.INPUT_NAME, dtypeModelData.DTYPE, shapeModelData.INPUT_SHAPE)def add_matmul_as_fc(net, input, outputs, w, b):assert len(input.shape) 3m 1 if len(input.shape) 3 else input.shape[0]k int(np.prod(input.shape) / m)assert np.prod(input.shape) m * kn int(w.size / k)assert w.size n * kassert b.size ninput_reshape net.add_shuffle(input)input_reshape.reshape_dims trt.Dims2(m, k)filter_const net.add_constant(trt.Dims2(n, k), w)mm net.add_matrix_multiply(input_reshape.get_output(0),trt.MatrixOperation.NONE,filter_const.get_output(0),trt.MatrixOperation.TRANSPOSE,)bias_const net.add_constant(trt.Dims2(1, n), b)bias_add net.add_elementwise(mm.get_output(0), bias_const.get_output(0), trt.ElementWiseOperation.SUM)output_reshape net.add_shuffle(bias_add.get_output(0))output_reshape.reshape_dims trt.Dims4(m, n, 1, 1)return output_reshapeconv1_w weights[conv1.weight].numpy()conv1_b weights[conv1.bias].numpy()conv1 network.add_convolution(inputinput_tensor, num_output_maps20, kernel_shape(5, 5), kernelconv1_w, biasconv1_b)conv1.stride (1, 1)pool1 network.add_pooling(inputconv1.get_output(0), typetrt.PoolingType.MAX, window_size(2, 2))pool1.stride (2, 2)conv2_w weights[conv2.weight].numpy()conv2_b weights[conv2.bias].numpy()conv2 network.add_convolution(pool1.get_output(0), 50, (5, 5), conv2_w, conv2_b)conv2.stride (1, 1)pool2 network.add_pooling(conv2.get_output(0), trt.PoolingType.MAX, (2, 2))pool2.stride (2, 2)fc1_w weights[fc1.weight].numpy()fc1_b weights[fc1.bias].numpy()fc1 add_matmul_as_fc(network, pool2.get_output(0), 500, fc1_w, fc1_b)relu1 network.add_activation(inputfc1.get_output(0), typetrt.ActivationType.RELU)fc2_w weights[fc2.weight].numpy()fc2_b weights[fc2.bias].numpy()fc2 add_matmul_as_fc(network, relu1.get_output(0), ModelData.OUTPUT_SIZE, fc2_w, fc2_b)fc2.get_output(0).name ModelData.OUTPUT_NAMEnetwork.mark_output(tensorfc2.get_output(0))def build_engine(weights):# For more information on TRT basics, refer to the introductory samples.builder trt.Builder(TRT_LOGGER)network builder.create_network(common.EXPLICIT_BATCH)config builder.create_builder_config()runtime trt.Runtime(TRT_LOGGER)config.max_workspace_size common.GiB(1)# Populate the network using weights from the PyTorch model.populate_network(network, weights)# Build and return an engine.plan builder.build_serialized_network(network, config)return runtime.deserialize_cuda_engine(plan)# Loads a random test case from pytorchs DataLoader
def load_random_test_case(model, pagelocked_buffer):# Select an image at random to be the test case.img, expected_output model.get_random_testcase()# Copy to the pagelocked input buffernp.copyto(pagelocked_buffer, img)return expected_outputdef main():common.add_help(descriptionRuns an MNIST network using a PyTorch model)# 训练pytorch模型mnist_model model.MnistModel()mnist_model.learn()# 训练结束后可以获得训练后的权重字典weights mnist_model.get_weights()# 使用训练好的权重来构建tensorrt的引擎对象engine build_engine(weights)# Build an engine, allocate buffers and create a stream.# For more information on buffer allocation, refer to the introductory samples.inputs, outputs, bindings, stream common.allocate_buffers(engine)context engine.create_execution_context()# 随机抽取推理样本保存在inputs中case_num是抽出的样本的真实值case_num load_random_test_case(mnist_model, pagelocked_bufferinputs[0].host)# For more information on performing inference, refer to the introductory samples.# The common.do_inference function will return a list of outputs - we only have one in this case.# 开始推理并产生推理结果output[output] common.do_inference_v2(context, bindingsbindings, inputsinputs, outputsoutputs, streamstream)pred np.argmax(output)# 清除缓存common.free_buffers(inputs, outputs, stream)# 输出真实值print(Test Case: str(case_num))# 输出测试值print(Prediction: str(pred))if __name__ __main__:main()
代码没时间看的后续完成后我再补上-~