当前位置: 首页 > news >正文

鸿蒙进阶——Mindspore Lite AI框架源码解读之模型加载详解(五)

文章大纲

  • 引言
  • 一、LiteSession::CompileGraph(Model *model)
  • 二、LiteSession::CompileGraph(Model *model) 核心流程
    • 1、MindirModel::ConvertTensors
      • 1.1、遍历并执行MindirModel::ConvertTensor
        • 1.1.1、MindirModel::LoadTensorData
  • 三、LiteSession::InitGraphInputTensors(model)
  • 四、LiteSession::InitGraphOutputTensors(model)
  • 五、Scheduler::Schedule(std::vector<mindspore::kernel::KernelExec *> *dst_kernels)

引言

书接上文继续补充LiteSession::CompileGraph(Model *model)的后续流程,前一篇文章介绍了Mindspore Lite AI 的核心LiteSession 的部分Init流程,这一篇接着往下介绍LiteSession处理Model对象的后续流程。

一、LiteSession::CompileGraph(Model *model)

在LiteSession中传入Model * (是从权重处理器的缓存区中读取出来的封封装而得到的Model *)到CompileGraph 时,先进行拆包转换Model->Tensor,不过先会先去判断Model 的模型类型,不同的模型使用不同的转换算法,其中Model 的子类有一个抽象父类AbstractBaseModel,目前只有MindIRModel 这个子类即用于描述MINDIR模型的。转为完成后,再把处理过的Model 传入用于初始化Graph,即vector< mslite:Tensor >

int LiteSession::CompileGraph(Model *model) {auto ret = PreCheck(model);MS_LOG(ERROR) << "NH#CompileGraph START";
...if (model->model_type_ != ModelType_MSLite) {MS_LOG(ERROR) << "NH#model typeModelType_MSLite: " << model->model_type_;ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);} else {MS_LOG(ERROR) << "NH#model (model->model_type_" << model->model_type_;// Convert to abstract base model interfaceret = ConvertTensors(model);context_->set_schema_version(reinterpret_cast<LiteModel *>(model)->GetSchemaVersion());}
...ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
...InitGraphInputTensors(model);InitGraphOutputTensors(model);PackedNodePass::GetInstance().Run(model, tensors_);MS_LOG(ERROR) << "NH#CompileGraph  create Scheduler";// scheduler kernelsScheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,&is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,delegate_device_type_);scheduler.SetupSchedulerCb(std::move(sched_cb_));scheduler.SetConfig(config_info_);MS_LOG(ERROR) << "NH#CompileGraph scheduler.Schedule";ret = scheduler.Schedule(&kernels_);...if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && is_control_flow_) {context_->DeleteThreadPool();(void)context_->CreateThreadPool(is_control_flow_);}infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_ && (is_infershape_ != RET_OK);InitGraphInOutTensorsMap(model);non_tail_call_kernels_ = scheduler.NonTailCallNodes();ret = PrepareKernels(model);...if (is_train_session_ || is_prepare_session_) {is_running_.store(false);return RET_OK;}ret = InitExecutor();...MarkSharedWeight(kernels_);FreePackOpWeight(kernels_);infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);if (infer_along_running_) {this->context_->set_infer_checker(InferCheckerAll);}is_running_.store(false);return RET_OK;
}

二、LiteSession::CompileGraph(Model *model) 核心流程

1、MindirModel::ConvertTensors

将模型中的张量(tensors)转换为Lite框架可以使用的格式

int MindirModel::ConvertTensors(std::vector<mindspore::lite::Tensor *> *lite_tensors) {if (lite_tensors == nullptr) {MS_LOG(ERROR) << "lite tensors is null.";return mindspore::lite::RET_NULL_PTR;}
//获取张量数量及输入输出索引uint32_t tensor_count = this->all_mindir_tensors_.size();auto model_input_indices = this->graph_.input_indices_;auto model_output_indices = this->graph_.output_indices_;
//遍历所有的MindIR张量,并通过ConvertTensor方法将其转换为Lite张量for (uint32_t i = 0; i < tensor_count; ++i) {auto src_tensor = this->all_mindir_tensors_[i];auto *dst_tensor = ConvertTensor(src_tensor);...if (mindspore::lite::IsContain(model_input_indices, i)) {dst_tensor->set_category(mindspore::lite::Category::GRAPH_INPUT);}if (mindspore::lite::IsContain(model_output_indices, i)) {// a tensor is as both input and output, would be treated as an input.if (!dst_tensor->IsGraphInput()) {dst_tensor->set_category(mindspore::lite::Category::GRAPH_OUTPUT);}}auto ret = CheckTensorValid(dst_tensor);....lite_tensors->emplace_back(dst_tensor);}return mindspore::lite::RET_OK;
}

1.1、遍历并执行MindirModel::ConvertTensor

mindspore::lite::Tensor *MindirModel::ConvertTensor(TensorProtoWrap mindir_tensor_wrap) {auto mindir_tensor = mindir_tensor_wrap.tensor_proto();auto data_type = MindirModelUtil::ProtoTypeToTypeId(mindir_tensor.data_type());std::vector<int> shape;for (int i = 0; i < mindir_tensor.dims_size(); i++) {shape.push_back(mindir_tensor.dims(i));}auto format = Format::NCHW;mindspore::lite::NodeType node_type;if (mindir_tensor.has_raw_data() || mindir_tensor.has_external_data()) {node_type = mindspore::lite::NodeType_ValueNode;} else {node_type = mindspore::lite::NodeType_CNode;}auto category = TensorCategory(node_type, mindir_tensor.dims_size(), data_type, mindir_tensor.raw_data().size());auto *lite_tensor = new mindspore::lite::Tensor(data_type, shape, format, category);lite_tensor->set_tensor_name(mindir_tensor_wrap.name());if (this->LoadTensorData(lite_tensor, mindir_tensor) != RET_OK) {MS_LOG(WARNING) << "MindirModel: Convert tensor failed, load tensor data failed, tensor data will be empty.";}return lite_tensor;
}
1.1.1、MindirModel::LoadTensorData
int MindirModel::LoadTensorData(mindspore::lite::Tensor *lite_tensor, const mind_ir::TensorProto &mindir_tensor) {if (mindir_tensor.has_raw_data()) {return memcpy_s(lite_tensor->MutableData(), lite_tensor->Size(), mindir_tensor.raw_data().data(),mindir_tensor.raw_data().size());}if (mindir_tensor.has_external_data()) {std::string file = this->GetModelPath() + "/" + mindir_tensor.external_data().location();// Read filestd::basic_ifstream<char> fid(file, std::ios::in | std::ios::binary);...fid.seekg(0, std::ios_base::end);size_t file_size = static_cast<size_t>(fid.tellg());fid.clear();fid.seekg(0);auto plain_data = std::make_unique<char[]>(file_size);constexpr uint8_t is_little_endian = 1;constexpr int byte_order_index = 0;fid.read(plain_data.get(), file_size);fid.close();// if byte order is not same return falseif ((plain_data[byte_order_index] == is_little_endian) != common::IsLittleByteOrder()) {MS_LOG(ERROR) << "The byte order of export MindIr device and load MindIr device is not same!";return mindspore::lite::RET_ERROR;}const uint8_t *data = reinterpret_cast<const uint8_t *>(plain_data.get());auto ret =common::huge_memcpy(reinterpret_cast<uint8_t *>(lite_tensor->MutableData()), lite_tensor->Size(),data + mindir_tensor.external_data().offset(), mindir_tensor.external_data().length());return mindspore::lite::RET_OK;}return mindspore::lite::RET_NOT_SUPPORT;
}

三、LiteSession::InitGraphInputTensors(model)

void LiteSession::InitGraphInputTensors(const lite::Model *model) {MS_ASSERT(model != nullptr);auto graph_in_size = model->graph_.input_indices_.size();MS_LOG(ERROR) << "NH#InitGraphInputTensors in_size: " << graph_in_size;for (size_t i = 0; i < graph_in_size; ++i) {auto in_tensor_idx = model->graph_.input_indices_[i];MS_ASSERT(in_tensor_idx < this->tensors_.size());auto *in_tensor = this->tensors_.at(in_tensor_idx);MS_ASSERT(in_tensor != nullptr);this->inputs_.emplace_back(in_tensor);}
}

四、LiteSession::InitGraphOutputTensors(model)

void LiteSession::InitGraphOutputTensors(const lite::Model *model) {MS_ASSERT(model != nullptr);MS_ASSERT(this->outputs_.empty());auto graph_out_size = model->graph_.output_indices_.size();for (size_t i = 0; i < graph_out_size; ++i) {auto out_tensor_idx = model->graph_.output_indices_[i];MS_ASSERT(out_tensor_idx < this->tensors_.size());auto *out_tensor = this->tensors_.at(out_tensor_idx);MS_ASSERT(out_tensor != nullptr);this->outputs_.emplace_back(out_tensor);}
}

五、Scheduler::Schedule(std::vector<mindspore::kernel::KernelExec *> *dst_kernels)

Scheduler::Schedule 何时调用?NNRT 方式由LiteSession::CompileGraph 触发进入到Shedule函数内部后CPU的话传入到InitDelegateKernels里的DelegateKernels为nullptr 直接返回;nnrt方式的话就会把前面LiteSession::CreateNNRTDelegate() Relace上去 DelegateKernels,
所以CPU方式加载模型就是直接把模型文件的Buffer 转为相应的对象,存储到LiteSession的std::vector<mindspore::lite::Tensor *> inputs_里,走nnrt需要先创建对应的Delegate,走mindspore则不需要创建,直接返回ret_ok

int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {MS_LOG(DEBUG) << "Start schedule.";int check_input_ret = CheckInputParam(dst_kernels);
...shape_fusion_pass_ =std::make_shared<ShapeFusionPass>(context_, reinterpret_cast<LiteModel *>(src_model_), src_tensors_);int ret = SchedulePreProcess();
...if (*is_control_flow_) {control_flow_scheduler_ = std::make_shared<ControlFlowScheduler>(context_, ms_context_, src_tensors_);}ret = ScheduleGraphToKernels(dst_kernels);FreeOpParameters();op_parameters_.clear();...if (context_->float_mode) {kernel::KernelExecUtil::FindAllInoutKernels(*dst_kernels);ret = DelQuantDTypeCastKernel(dst_kernels);if (ret != RET_OK) {MS_LOG(ERROR) << "Delete quant_dtype_cast kernel failed.";return ret;}}shape_fusion_pass_->StoreStateAndReset();MS_LOG(DEBUG) << "Start to init delegate kernels.";ret = InitDelegateKernels(dst_kernels);...MS_LOG(DEBUG) << "Finish to init delegate kernels.";ret = CheckCpuValid(dst_kernels);if (ret != RET_OK) {MS_LOG(ERROR) << "kernels invalid in set devices.";return ret;}kernel::KernelExecUtil::FindAllInoutKernels(*dst_kernels);ret = ConstructSubGraphs(dst_kernels);ret = ProcessSubGraphTranspose(dst_kernels);if (ret != RET_OK) {MS_LOG(ERROR) << "Process SubGraph with multi layout failed.";return ret;}if (*is_control_flow_) {
control_flow_scheduler_->SetSubgraphForPartialNode(&partial_kernel_subgraph_index_map_,                                              &subgraph_index_subgraph_kernel_map_);ret = control_flow_scheduler_->Schedule(dst_kernels);}auto status = RuntimePass(dst_kernels, src_tensors_);ret = InitKernels(std::move(*dst_kernels));if (ret != RET_OK) {MS_LOG(ERROR) << "InitKernels failed.";return ret;}shape_fusion_pass_->RestoreState();return RET_OK;
}

未完待续

http://www.lqws.cn/news/577729.html

相关文章:

  • 阶段二开始-第一章—8天Python从入门到精通【itheima】-121节+122节(函数和方法的类型注解+Union联合类型注解)
  • Ruby 安装使用教程
  • 单例模式7种实现
  • Golang的多环境配置
  • Golang快速开发框架——项目立项与系统配置读取组件viper(一)
  • uni-app使用uview2自定义tabber
  • camera调试:安卓添加xml注册
  • 【软考高项论文】论信息系统项目的整体管理
  • Java 图书管理系统
  • 使用Verilog设计模块输出中位数,尽可能较少资源使用
  • 华为智选焕新鸿蒙智选,继续携手IAM赋能智慧家居健康生态协同演进
  • SmartDV推出先进的H.264和H.265视频编码器和解码器IP
  • Flutter 布局之 IntrinsicHeight 组件
  • 类图+案例+代码详解:软件设计模式----生成器模式(建造者模式)
  • 系统性能优化-8 TCP缓冲区与拥塞控制
  • Java开发新变革!飞算JavaAI深度剖析与实战指南
  • 深入理解 MVCC:数据库高并发的核心引擎
  • 高效数据采集:Python与Rust完美结合
  • Redis有哪些常用应用场景?
  • 1.6 基于NICE接口的存储器访问通道扩展自定义指令的简单示例
  • 大语言模型LLM在训练/推理时的padding
  • SQL参数化查询:防注入与计划缓存的双重优势
  • 衡石科技chatbot分析手册--钉钉数据问答机器人配置
  • 设计模式之外观模式
  • 【微服务】.Net中使用Consul实现服务高可用
  • 大语言模型微调的效能控制与评估策略
  • 提示技术系列——链式提示
  • 跨主机用 Docker Compose 部署 PostgreSQL + PostGIS 主从
  • 对象池模式:减少GC的Kotlin实战指南
  • 基于 SpringBoot+Vue.js+ElementUI 的 Cosplay 论坛设计与实现7000字论文