Caffe源码中syncedmem文件分析
Caffe源碼(caffe version:09868ac , date: 2015.08.15)中有一些重要文件,這里介紹下syncedmem文件。
1.??????include文件:
(1)、<caffe/common.hpp>:此文件的介紹可以參考:http://blog.csdn.net/fengbingchun/article/details/54955236? ;
(2)、<caffe/util/math_functions.hpp>:此文件的介紹可以參考: http://blog.csdn.net/fengbingchun/article/details/56280708?;
2.??????內(nèi)聯(lián)函數(shù)CaffeMallocHost/CaffeFreeHost:
(1)、CaffeMallocHost:CPU模式下,通過調(diào)用C語言的malloc函數(shù)分配內(nèi)存;
(2)、CaffeFreeHost:CPU模式下,通過調(diào)用C語言的free函數(shù)釋放內(nèi)存;
3.????????類SyncedMemory:在主機(jī)(CPU)和設(shè)備(GPU)之間管理內(nèi)存分配和數(shù)據(jù)同步,封裝CPU和GPU之間數(shù)據(jù)交互操作。
<caffe/syncedmem.hpp>文件的詳細(xì)介紹如下:
#ifndef CAFFE_SYNCEDMEM_HPP_
#define CAFFE_SYNCEDMEM_HPP_#include <cstdlib>#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"namespace caffe {// If CUDA is available and in GPU mode, host memory will be allocated pinned,
// using cudaMallocHost. It avoids dynamic pinning for transfers (DMA).
// The improvement in performance seems negligible in the single GPU case,
// but might be more significant for parallel training. Most importantly,
// it improved stability for large models on many GPUs.
// CPU模式下,通過調(diào)用C語言的malloc函數(shù)分配內(nèi)存
inline void CaffeMallocHost(void** ptr, size_t size) {
#ifndef CPU_ONLYif (Caffe::mode() == Caffe::GPU) {CUDA_CHECK(cudaMallocHost(ptr, size));return;}
#endif*ptr = malloc(size);CHECK(*ptr) << "host allocation of size " << size << " failed";
}// CPU模式下,通過調(diào)用C語言的free函數(shù)釋放內(nèi)存
inline void CaffeFreeHost(void* ptr) {
#ifndef CPU_ONLYif (Caffe::mode() == Caffe::GPU) {CUDA_CHECK(cudaFreeHost(ptr));return;}
#endiffree(ptr);
}/*** @brief Manages memory allocation and synchronization between the host (CPU)* and device (GPU).** TODO(dox): more thorough description.*/
// 在主機(jī)(Host/CPU)和設(shè)備(Device/GPU)之間管理內(nèi)存分配和數(shù)據(jù)同步,封裝CPU和GPU之間數(shù)據(jù)交互操作
class SyncedMemory {public:
// 默認(rèn)構(gòu)造函數(shù),簡單初始化,數(shù)據(jù)狀態(tài)置為UNINITIALIZEDSyncedMemory(): cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {}
// 帶size參數(shù)的顯示構(gòu)造函數(shù),并未分配內(nèi)存,數(shù)據(jù)狀態(tài)置為UNINITIALIZEDexplicit SyncedMemory(size_t size): cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {}
// 析構(gòu)函數(shù),CPU模式下,當(dāng)cpu_ptr_非空并且own_cpu_data_為true時,僅會調(diào)用CaffeFreeHost函數(shù)釋放內(nèi)存~SyncedMemory();
// 獲取CPU數(shù)據(jù)指針,數(shù)據(jù)不可更改,內(nèi)部會調(diào)用to_cpu函數(shù),在CPU模式下,數(shù)據(jù)狀態(tài)為HEAD_AT_CPU,在GPU模式下,數(shù)據(jù)狀態(tài)置為SYNCEDconst void* cpu_data();
// 調(diào)用CaffeFreeHost釋放內(nèi)存,如果own_cpu_data_為非空,則調(diào)用CaffeFreeHost釋放內(nèi)存,并修改CPU數(shù)據(jù)指針使其指向data,并置own_cpu_data_為false,數(shù)據(jù)狀態(tài)置為HEAD_AT_CPUvoid set_cpu_data(void* data);
// 獲取GPU數(shù)據(jù)指針,數(shù)據(jù)不可更改,在GPU模式下,數(shù)據(jù)狀態(tài)為HEAD_AT_GPU,在CPU模式下,數(shù)據(jù)狀態(tài)置為SYNCEDconst void* gpu_data();
// 在GPU模式下,內(nèi)部會調(diào)用to_gpu函數(shù),如果own_gpu_data_為非空,調(diào)用cudaFree釋放顯存,并修改GPU數(shù)據(jù)指針使其指向data,并置own_gpu_data_為false,在GPU模式下,數(shù)據(jù)狀態(tài)置為HEAD_AT_GPUvoid set_gpu_data(void* data);
// 獲取CPU數(shù)據(jù)指針,數(shù)據(jù)可更改,內(nèi)部會調(diào)用to_cpu函數(shù),數(shù)據(jù)狀態(tài)置為HEAD_AT_CPUvoid* mutable_cpu_data();
// 獲取GPU數(shù)據(jù)指針,數(shù)據(jù)可更改,在GPU模式下,內(nèi)部會調(diào)用to_gpu函數(shù),數(shù)據(jù)狀態(tài)置為HEAD_AT_GPUvoid* mutable_gpu_data();
// SyncedHead為枚舉類型,數(shù)據(jù)存放的位置,包括四種數(shù)據(jù)狀態(tài),依次為未初始化、在CPU、在GPU、已同步enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
// 返回數(shù)據(jù)狀態(tài),即數(shù)據(jù)存放的位置SyncedHead head() { return head_; }
// 返回數(shù)據(jù)大小(字節(jié))size_t size() { return size_; }#ifndef CPU_ONLY
// 異步推送數(shù)據(jù)從CPU到GPU,并置數(shù)據(jù)狀態(tài)為SYNCEDvoid async_gpu_push(const cudaStream_t& stream);
#endifprivate:
// 把數(shù)據(jù)存放到CPU上,
// 如果數(shù)據(jù)狀態(tài)為UNINITIALIZED,則調(diào)用CaffeMallocHost分配內(nèi)存,并初始化數(shù)據(jù)內(nèi)容為0,置own_cpu_data_為true,置數(shù)據(jù)狀態(tài)為HEAD_AT_CPU,
// 如果數(shù)據(jù)狀態(tài)為HEAD_AT_GPU,如果在GPU模式下,如果cpu_ptr_為空,則調(diào)用CaffeMallocHost分配內(nèi)存,并置own_cpu_data_為true,然后則將顯存數(shù)據(jù)拷貝到內(nèi)存(數(shù)據(jù)同步),并將數(shù)據(jù)狀態(tài)置為SYNCED// 其它數(shù)據(jù)狀態(tài)不作任何操作void to_cpu();
// 把數(shù)據(jù)存放到GPU上,僅在GPU模式作操作,在CPU模式下不作任何操作,
// 如果數(shù)據(jù)狀態(tài)為UNINITIALIZED,則調(diào)用cudaMalloc分配顯存,并初始化數(shù)據(jù)內(nèi)容為0,置數(shù)據(jù)狀態(tài)為HEAD_AT_GPU,并置own_gpu_data_為true
// 如果數(shù)據(jù)狀態(tài)為HEAD_AT_CPU,如果gpu_ptr_為空,則調(diào)用cudaMalloc分配顯存,并置own_gpu_data_為true,然后將內(nèi)存數(shù)據(jù)拷貝到顯存(數(shù)據(jù)同步),并將數(shù)據(jù)狀態(tài)置為SYNCED
// 其它數(shù)據(jù)狀態(tài)不作任何操作void to_gpu();
// 指向CPU的數(shù)據(jù)指針void* cpu_ptr_;
// 指向GPU的數(shù)據(jù)指針void* gpu_ptr_;
// 數(shù)據(jù)大小(字節(jié))size_t size_;
// 數(shù)據(jù)狀態(tài),當(dāng)前數(shù)據(jù)存放的位置SyncedHead head_;
// 是否通過SyncedMemory類分配了CPU內(nèi)存bool own_cpu_data_;
// 是否通過SyncedMemory類分配了GPU顯存bool own_gpu_data_;
// 設(shè)備編號int gpu_device_;// 禁止使用SyncedMemory類的拷貝和賦值操作DISABLE_COPY_AND_ASSIGN(SyncedMemory);
}; // class SyncedMemory} // namespace caffe#endif // CAFFE_SYNCEDMEM_HPP_
測試代碼如下:
int test_caffe_syncedmem()
{caffe::SyncedMemory mem(10);caffe::SyncedMemory* p_mem = new caffe::SyncedMemory(10 * sizeof(float));if (mem.head() != caffe::SyncedMemory::UNINITIALIZED ||mem.size() != 10 ||p_mem->size() != 10 * sizeof(float) ||mem.cpu_data() == nullptr ||mem.mutable_cpu_data() == nullptr ||mem.head() != caffe::SyncedMemory::HEAD_AT_CPU) {fprintf(stderr, "Error\n");return -1;}fprintf(stderr, "p_mem size: %d\n", p_mem->size());fprintf(stderr, "mem size: %d\n", mem.size());void* cpu_data = mem.mutable_cpu_data();if (mem.head() != caffe::SyncedMemory::HEAD_AT_CPU) {fprintf(stderr, "Error\n");return -1;}caffe::caffe_memset(mem.size(), 1, cpu_data);for (int i = 0; i < mem.size(); ++i) {if ((static_cast<char*>(cpu_data))[i] != 1) {fprintf(stderr, "Error\n");return -1;}}cpu_data = mem.mutable_cpu_data();if (mem.head() != caffe::SyncedMemory::HEAD_AT_CPU) {fprintf(stderr, "Error\n");return -1;}caffe::caffe_memset(mem.size(), 2, cpu_data);for (int i = 0; i < mem.size(); ++i) {if ((static_cast<char*>(cpu_data))[i] != 2) {fprintf(stderr, "Error\n");return -1;}}delete p_mem;return 0;
}
測試結(jié)果如下:
GitHub: https://github.com/fengbingchun/Caffe_Test
總結(jié)
以上是生活随笔為你收集整理的Caffe源码中syncedmem文件分析的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Caffe源码中math_functio
- 下一篇: Intel TBB简介及在Windows