OpenCV3.4 的神经网络功能主要提供了以下三种:
ml 模块中的多层感知机 (Artificial Neural Networks - Multi-Layer Perceptrons), 提供了 MLP 的创建, 训练, 参数设置等函数. 如:
- static Ptr<ANN_MLP> create ()
- Creates empty model.
- static Ptr<ANN_MLP> load (const String &filepath)
- Loads and creates a serialized ANN from a file.
- void setAnnealFinalT (double val)
- void setAnnealInitialT (double val)
- void setAnnealItePerStep (int val)
- virtual void setBackpropMomentumScale (double val)=0
- virtual void setBackpropWeightScale (double val)=0
- virtual void setLayerSizes (InputArray _layer_sizes)=0
- virtual void setRpropDW0 (double val)=0
- virtual void setRpropDWMax (double val)=0
- enum ActivationFunctions {
- IDENTITY = 0,
- SIGMOID_SYM = 1,
- GAUSSIAN = 2,
- RELU = 3,
- LEAKYRELU = 4
- }
- enum TrainFlags {
- UPDATE_WEIGHTS = 1,
- NO_INPUT_SCALE = 2,
- NO_OUTPUT_SCALE = 4
- }
- enum TrainingMethods {
- BACKPROP =0,
- RPROP = 1,
- ANNEAL = 2
- }
请参看帮助文档 https://docs.opencv.org/3.4.0/d0/dce/classcv_1_1ml_1_1ANN__MLP.html .
DNN 模块, 提供了很多用于创建, 加载, 训练深度网络和参数设置以及加载 TensorFlow,Caffe,Torch 模型的方法和类, 如:
- class cv::dnn::BackendNode
- Derivatives of this class encapsulates functions of certain backends.
- class cv::dnn::BackendWrapper
- Derivatives of this class wraps cv::Mat for different backends and targets.
- class cv::dnn::Dict
- This class implements name-value dictionary, values are instances of DictValue.
- struct cv::dnn::DictValue
- This struct stores the scalar value (or array) of one of the following type: double, cv::String or int64.
- class cv::dnn::Layer
- This interface class allows to build new Layers - are building blocks of networks.
- class cv::dnn::LayerParams
- This class provides all data needed to initialize layer.
- class cv::dnn::Net
- This class allows to create and manipulate comprehensive artificial neural networks.
- Mat cv::dnn::blobFromImages (const std::vector<Mat> &images, double scalefactor=1.0, Size size=Size(), const Scalar &mean=Scalar(), bool swapRB=true, bool crop=true)
- Creates 4-dimensional blob from series of images. Optionally resizes and crops images from center, subtract mean values, scales values by scalefactor, swap Blue and Red channels.
- void cv::dnn::NMSBoxes (const std::vector<Rect> &bboxes, const std::vector<float> &scores, const float score_threshold, const float nms_threshold, std::vector<int> &indices, const float eta=1.f, const int top_k=0)
- Performs non maximum suppression given boxes and corresponding scores.
- Net cv::dnn::readNetFromCaffe (const String &prototxt, const String &caffeModel=String())
- Reads a network model stored in Caffe framework's format.
- Net cv::dnn::readNetFromDarknet (const String &cfgFile, const String &darknetModel=String())
- Reads a network model stored in Darknet model files.
- Net cv::dnn::readNetFromTensorflow (const String &model, const String &config=String())
- Reads a network model stored in TensorFlow framework's format.
- Net cv::dnn::readNetFromTorch (const String &model, bool isBinary=true)
参看帮助文档 https://docs.opencv.org/3.4.0/d6/d0f/group__dnn.html .
第三方深度网络工具, 详情请查看帮助文档.
下面给出示例.
1. 基于 MLP 的识别. 该程序人工生成四类动物数据, 通过 MLP 网络训练模型并检测测试数据类型.
- #exam1.py
- import cv2
- import numpy as np
- from random import randint
- #创建 MLP 网络, 并设置训练方法, 激活函数, 层大小和迭代终止条件.
- animals_net = cv2.ml.ANN_MLP_create()
- animals_net.setTrainMethod(cv2.ml.ANN_MLP_RPROP | cv2.ml.ANN_MLP_UPDATE_WEIGHTS)
- animals_net.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
- animals_net.setLayerSizes(np.array([3, 6, 4]))
- animals_net.setTermCriteria(( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 ))
- #生成四类动物数据及类标记
- def dog_sample():
- return [randint(10, 20), 1, randint(38, 42)]
- def dog_class():
- return [1, 0, 0, 0]
- def condor_sample():
- return [randint(3,10), randint(3,5), 0]
- def condor_class():
- return [0, 1, 0, 0]
- def dolphin_sample():
- return [randint(30, 190), randint(5, 15), randint(80, 100)]
- def dolphin_class():
- return [0, 0, 1, 0]
- def dragon_sample():
- return [randint(1200, 1800), randint(30, 40), randint(160, 180)]
- def dragon_class():
- return [0, 0, 0, 1]
- #将动物数据和类标记组成一个记录 (样本)
- def record(sample, classification):
- return (np.array([sample], dtype=np.float32), np.array([classification], dtype=np.float32))
- #获取 5000 个样本数据
- records = []
- RECORDS = 5000
- for x in range(0, RECORDS):
- records.append(record(dog_sample(), dog_class()))
- records.append(record(condor_sample(), condor_class()))
- records.append(record(dolphin_sample(), dolphin_class()))
- records.append(record(dragon_sample(), dragon_class()))
- #训练 MLP 网络
- EPOCHS = 2
- for e in range(0, EPOCHS):
- print("Epoch %d:" % e)
- for t, c in records:
- animals_net.train(t, cv2.ml.ROW_SAMPLE, c)
- #预测测试样本类别
- TESTS = 100
- dog_results = 0
- for x in range(0, TESTS):
- clas = int(animals_net.predict(np.array([dog_sample()], dtype=np.float32))[0])
- print("class: %d" % clas)
- if (clas) == 0:
- dog_results += 1
- condor_results = 0
- for x in range(0, TESTS):
- clas = int(animals_net.predict(np.array([condor_sample()], dtype=np.float32))[0])
- print("class: %d" % clas)
- if (clas) == 1:
- condor_results += 1
- dolphin_results = 0
- for x in range(0, TESTS):
- clas = int(animals_net.predict(np.array([dolphin_sample()], dtype=np.float32))[0])
- print("class: %d" % clas)
- if (clas) == 2:
- dolphin_results += 1
- dragon_results = 0
- for x in range(0, TESTS):
- clas = int(animals_net.predict(np.array([dragon_sample()], dtype=np.float32))[0])
- print("class: %d" % clas)
- if (clas) == 3:
- dragon_results += 1
- #输出测试准确率
- print("Dog accuracy: %f%%" % (dog_results))
- print("condor accuracy: %f%%" % (condor_results))
- print("dolphin accuracy: %f%%" % (dolphin_results))
- print("dragon accuracy: %f%%" % (dragon_results))
2. 基于 DNN 的识别. 该程序加载预先训练的 caffe 模型在摄像头获取的图像上检测人脸.
- import numpy as np
- import argparse
- import cv2 as cv
- # 若出现 ImportError, 请配置环境变量 PYTHONPATH 为 Python 可执行文件的地址.
- # 若不能解决, 请更新相关包 (或卸载后重新安装).
- try:
- import cv2 as cv
- except ImportError:
- raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation,'
- 'configure environemnt variable PYTHONPATH to"opencv_build_dir/lib"directory (with"python3"subdirectory if required)')
- # 导入 DNN 模块
- from cv2 import dnn
- inWidth = 300
- inHeight = 300
- confThreshold = 0.5
- # 该文件包含在 opencv3.4\sources\samples\dnn\face_detector 目录中, 该目录的上级目录为 OpenCV3.4 的下载或安装目录
- prototxt = 'face_detector/deploy.prototxt'
- # 该 caffe 模型文件需先下载, 请参看 opencv3.4\sources\samples\dnn\face_detector 目录中的文本文件
- caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'
- # 加载 caffe 模型并从摄像头获取图像
- if __name__ == '__main__':
- net = dnn.readNetFromCaffe(prototxt, caffemodel)
- cap = cv.VideoCapture(0)
- while True:
- ret, frame = cap.read()
- cols = frame.shape[1]
- rows = frame.shape[0]
- #将获取的图像设置为网络输入, 设置网络传播方向, 检测人脸
- net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False))
- detections = net.forward()
- perf_stats = net.getPerfProfile()
- print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000))
- for i in range(detections.shape[2]):
- confidence = detections[0, 0, i, 2]
- if confidence> confThreshold:
- xLeftBottom = int(detections[0, 0, i, 3] * cols)
- yLeftBottom = int(detections[0, 0, i, 4] * rows)
- xRightTop = int(detections[0, 0, i, 5] * cols)
- yRightTop = int(detections[0, 0, i, 6] * rows)
- cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
- (0, 255, 0))
- label = "face: %.4f" % confidence
- labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
- cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
- (xLeftBottom + labelSize[0], yLeftBottom + baseLine),
- (255, 255, 255), cv.FILLED)
- cv.putText(frame, label, (xLeftBottom, yLeftBottom),
- cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
- cv.imshow("detections", frame)
- if cv.waitKey(1) != -1:
- break
来源: http://www.bubuko.com/infodetail-2662734.html