當前位置：首頁 > 人工智能 > pytorch >内容正文

pytorch

深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现

發布時間：2024/3/7 pytorch 73 豆豆

生活随笔收集整理的這篇文章主要介紹了深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

1. 參考文獻

3D Fully Convolutional Network for Vehicle Detection in Point Cloud

2. 模型實現

''' Baidu Inc. Ref: 3D Fully Convolutional Network for Vehicle Detection in Point CloudAuthor: HSW Date: 2018-05-02 '''import sys import numpy as np import tensorflow as tf from prepare_data2 import * from baidu_cnn_3d import * KITTI_TRAIN_DATA_CNT = 7481 KITTI_TEST_DATA_CNT = 7518# create 3D-CNN Model def create_graph(sess, modelType = 0, voxel_shape = (400, 400, 20), activation=tf.nn.relu, is_train = True): '''Inputs: sess: tensorflow Session Object voxel_shape: voxel shape for network first layer activation: phrase_train: Outputs: voxel, graph, sess '''voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])phase_train = tf.placeholder(tf.bool, name="phase_train") if is_train else None with tf.variable_scope("3D_CNN_Model") as scope: model = Full_CNN_3D_Model()model.cnn3d_graph(voxel, modelType = modelType, activation=activation, phase_train = is_train)if is_train: initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")sess.run(tf.variables_initializer(initialized_var))return voxel, model, phase_train# read batch data def read_batch_data(batch_size, data_set_dir,objectType = "Car", split = "training", resolution=(0.2, 0.2, 0.2), scale=0.25, limitX = (0,80), limitY=(-40,40), limitZ=(-2.5,1.5)): '''Inputs: batch_size: data_set_dir: objectType: default is "Car"split: default is "training"resolution: scale: outputSize / inputSize limitX: limitY: limitZ: Outputs: '''kitti_3DVoxel = kitti_3DVoxel_interface(data_set_dir, objectType = objectType, split=split, scale = scale, resolution = resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)TRAIN_PROCESSED_IDX = 0TEST_PROCESSED_IDX = 0if split == "training": while TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: batch_voxel = []batch_g_obj = []batch_g_cord = []idx = 0 while idx < batch_size and TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: print(TRAIN_PROCESSED_IDX)voxel, g_obj, g_cord = kitti_3DVoxel.read_kitti_data(TRAIN_PROCESSED_IDX)TRAIN_PROCESSED_IDX += 1if voxel is None:continueidx += 1 # print(voxel.shape)batch_voxel.append(voxel)batch_g_obj.append(g_obj)batch_g_cord.append(g_cord)yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_obj, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)elif split == "testing": while TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: batch_voxel = []idx = 0while idx < batch_size and TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: voxel = kitti_3DVoxel.read_kitti_data(iter * batch_size + idx)TEST_PROCESSED_IDX += 1if voxel is None: continueidx += 1 batch_voxel.append(voxel)yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis]# train 3D-CNN Model def train(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2,0.2,0.2), scale = 0.25, lr=0.01, limitX=(0,80), limitY=(-40,40), limitZ=(-2.5,1.5), epoch=101): '''Inputs: batch_num: data_set_dir: modelType: objectType: resolution: scale: lr: limitX, limitY, limitZ: Outputs: None'''batch_size = batch_numtraining_epochs = epochsizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))voxel_shape = (sizeX, sizeY, sizeZ)with tf.Session() as sess: voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = True)saver = tf.train.Saver()total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y_pred = model.loss_Fun(lossType = 0, cord_loss_weight = 0.02)optimizer = model.create_optimizer(total_loss, optType = "Adam", learnRate = 0.001)init = tf.global_variables_initializer()sess.run(init)for epoch in range(training_epochs): batchCnt = 0; for (batch_voxel, batch_g_obj, batch_g_cord) in read_batch_data(batch_size, data_set_dir, objectType = objectType, split = "training", resolution = resolution, scale = scale, limitX = limitX, limitY = limitY, limitZ = limitZ): # print("batch_g_obj")# print(batch_g_obj.shape)sess.run(optimizer, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})cord_cost = sess.run(cord_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})obj_cost = sess.run(is_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})non_obj_cost = sess.run(non_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "cord_cost = ", "{:.9f}".format(cord_cost))print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "obj_cost = ", "{:.9f}".format(obj_cost))print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "non_obj_cost = ", "{:.9f}".format(non_obj_cost))batchCnt += 1if (epoch > 0) and (epoch % 10 == 0): saver.save(sess, "velodyne_kitti_train_" + str(epoch) + ".ckpt")print("Training Finishied !")# test 3D-CNN Model def test(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2, 0.2, 0.2), scale = 0.25, limitX = (0, 80), limitY = (-40, 40), limitZ=(-2.5, 1.5)): '''Inputs: batch_num: data_set_dir: resolution: scale:limitX, limitY, limitZ: Outputs: None '''sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))voxel_shape = (sizeX, sizeY, sizeZ)batch_size = batch_num; batch_voxel = read_batch_data(batch_num, data_set_dir, objectType = objectType, split="Testing", resolution=resolution, scale=scale, limitX=limitX, limitY=limitY, limitZ=limitZ)batch_voxel_x = batch_voxel.reshape(1, batch_voxel.shape[0], batch_voxel.shape[1], batch_voxel.shape[2], 1)with tf.Session() as sess: is_train = Falsevoxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = False)new_saver = tf.train.import_meta_graph("velodyne_kitti_train_40.ckpt.meta")last_model = "./velodyne_kitti_train_40.ckpt"saver.restore(sess, last_model)objectness = model.objectnesscordinate = model.cordinatey_pred = model.y objectness = sess.run(objectness, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]cordinate = sess.run(cordinate, feed_dict={voxel:batch_voxel_x})[0]y_pred = sess.run(y_pred, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]idx = np.where(y_pred >= 0.995)spheres = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()centers = spheres_to_centers(spheres, scale = scale, resolution=resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)corners = cordinate[idx].reshape[-1, 8, 3] + centers[:, np.newaxis]print(centers)print(corners)if __name__ == "__main__":batch_num = 3data_set_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"modelType = 1objectType = "Car"resolution = (0.2, 0.2, 0.2)scale = 0.25 lr = 0.001limitX = (0, 80)limitY = (-40, 40)limitZ = (-2.5, 1.5) epoch = 101 train(batch_num, data_set_dir = data_set_dir, modelType = modelType, objectType = objectType, resolution=resolution, scale=scale, lr =lr, limitX = limitX, limitY = limitY, limitZ = limitZ)saver = tf.train.Saver() 2.1 網絡模型
''' Baidu Inc. Ref: 3D Fully Convolutional Network for Vehicle Detection in Point CloudAuthor: HSW Date: 2018-05-02 '''import numpy as np import tensorflow as tf class Full_CNN_3D_Model(object): '''Define Full CNN Model'''def __init__(self): pass; def cnn3d_graph(self, voxel, modelType = 0, activation = tf.nn.relu, phase_train = True): if modelType == 0: # Modefied 3D-CNN, 該網絡結構不可使用,因為降采樣太嚴重(降采樣1/8)導致在預測時會出現較大誤差 self.layer1 = self.conv3d_layer(voxel , 1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)self.layer2 = self.conv3d_layer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)self.layer3 = self.conv3d_layer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)self.layer4 = self.conv3d_layer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, phase_train=phase_train)self.objectness = self.conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)self.cordinate = self.conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)self.y = tf.nn.softmax(self.objectness, dim=-1)elif modelType == 1: # 3D-CNN(論文網絡結構：降采樣1/4，即InputSize / OutputSize = 0.25)self.layer1 = self.conv3d_layer(voxel , 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)self.layer2 = self.conv3d_layer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)self.layer3 = self.conv3d_layer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)base_shape = self.layer2.get_shape().as_list()obj_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]self.objectness = self.deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)self.cordinate = self.deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)self.y = tf.nn.softmax(self.objectness, dim=-1)# batch Normalize def batch_norm(self, inputs, phase_train = True, decay = 0.9, eps = 1e-5): '''Inputs: inputs: input data for last layer phase_train: True / False, = True is train, = False is Test Outputs: norm data for next layer '''gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))axes = range(len(inputs.get_shape()) - 1)if phase_train == True:batch_mean, batch_var = tf.nn.moments(inputs, axes = [0, 1, 2, 3])train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))with tf.control_dependencies([train_mean, train_var]):return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)else: return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)# 3D Conv Layer def conv3d_layer(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): '''Inputs: inputs: pre-Layer output inputs_dims: pre-Layer output channels outputs_dims: cur-Layer output channels [length, height, width]: cur-Layer conv3d kernel size stride: conv3d kernel move step in length/height/width axisactivation: default use relu activation function padding: conv3d 'padding' parameter Outputs: 3D Conv. Layer outputs '''with tf.variable_scope("conv3D" + name): # conv3d layer kernel kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype = tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))# conv3d layer bias bias = tf.get_variable("bias", shape=[outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.0))# conv3d conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)bias = tf.nn.bias_add(conv, bias)if activation:bias = activation(bias, name="activation")bias = self.batch_norm(bias, phase_train)return bias# 3D Conv to Classification Layer def conv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): '''Inputs: inputs: pre-Layer outputs inputs_dims: pre-Layer output channels outputs_dims: cur-Layer output channels stride: conv3d kernel move step in length/height/width axisactivation: default use relu activation function padding: conv3d 'padding' parameter outputs_shape: de-conv outputs shape Outputs: conv outputs '''with tf.variable_scope("conv3D" + name):kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)return conv # 3D Deconv. to Classification Layer def deconv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): '''Inputs: inputs: pre-Layer outputs inputs_dims: pre-Layer output channels outputs_dims: cur-Layer output channels stride: conv3d kernel move step in length/height/width axisactivation: default use relu activation function padding: conv3d 'padding' parameter outputs_shape: de-conv outputs shape Outputs: de-conv outputs '''with tf.variable_scope("deconv3D"+name):kernel = tf.get_variable("weights", shape=[length, height, width, outputs_dims, inputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))deconv = tf.nn.conv3d_transpose(inputs, kernel, output_shape, stride, padding="SAME")return deconv # define loss def loss_Fun(self, lossType = 0, cord_loss_weight = 0.02): '''Inputs: lossType: = for difference loss Type cord_loss_weight: 0.02 Outputs: '''if lossType == 0: # print("g_obj")# print(self.cordinate.get_shape())g_obj = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list()[:4])g_cord = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list())non_g_obj = tf.subtract(tf.ones_like(g_obj, dtype=tf.float32), g_obj )elosion = 0.00001y = self.y is_obj_loss = -tf.reduce_sum(tf.multiply(g_obj , tf.log(y[:,:,:,:,0] + elosion))) # object loss non_obj_loss = tf.reduce_sum(tf.multiply(non_g_obj, tf.log(y[:, :, :, :, 0] + elosion))) # non-object loss cross_entropy = tf.add(is_obj_loss, non_obj_loss)obj_loss = cross_entropycord_diff = tf.multiply(g_obj , tf.reduce_sum(tf.square(tf.subtract(self.cordinate, g_cord)), 4)) # cord loss cord_loss = tf.multiply(tf.reduce_sum(cord_diff), cord_loss_weight)return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y # Create Optimizer def create_optimizer(self, all_loss, optType = "Adam", learnRate = 0.001): '''Inputs: all_loss: graph all_loss lr: learn rate Outputs: optimizer '''if optType == "Adam": opt = tf.train.AdamOptimizer(learnRate)optimizer = opt.minimize(all_loss)return optimizer

2.2? 數據預處理

'''Prepase KITTI data for 3D Object detection Ref: 3D Fully Convolutional Network for Vehicle Detection in Point CloudAuthor: Shiwen He Date: 28 April 2018 '''import numpy as np from kitti_object import kitti_object as kittiReader import kitti_util # lidar data => 3D Grid Voxel # filter lidar data by camera FoV def filter_camera_fov(pc): '''Inputs: pc: n x 3 Outputs: filter_pc: m x 3, m <= 3 Notices: FoV: from -45 degree to 45 degree '''logic_fov = np.logical_and((pc[:, 1] < pc[:, 0] - 0.27), (-pc[:, 1] < pc[:, 0] - 0.27))filter_pc = pc[logic_fov]return filter_pc # filter lidar data by detection range def filter_lidar_range(pc, limitX, limitY, limitZ):''' Inputs: pc: n x 3, limitX, limitY, limitZ: 1 x 2Outputs: filter_pc: m x 3, m <= n '''logic_x = np.logical_and(pc[:, 0] >= limitX[0], pc[:, 0] < limitX[1])logic_y = np.logical_and(pc[:, 1] >= limitY[0], pc[:, 1] < limitY[1])logic_z = np.logical_and(pc[:, 2] >= limitZ[0], pc[:, 2] < limitZ[1])logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))filter_pc = pc[:, :3][logic_xyz]return filter_pc# filter center + corners def filter_center_corners(centers, corners, boxsizes, limitX, limitY, limitZ): '''Inputs: centers: n x 3 corners: n x 8 x 3 limitX, limitY, limitZ: 1 x 2Outputs: filter_centers: m x 3, m <= n filter_corners: m x 3, m <= n '''logic_x = np.logical_and(centers[:, 0] >= limitX[0], centers[:, 0] < limitX[1])logic_y = np.logical_and(centers[:, 1] >= limitY[0], centers[:, 1] < limitY[1])logic_z = np.logical_and(centers[:, 2] >= limitZ[0], centers[:, 2] < limitZ[1])logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))filter_centers_1 = centers[logic_xyz, :]filter_corners_1 = corners[logic_xyz, :, :]filter_boxsizes_1 = boxsizes[logic_xyz, :]shape_centers = filter_centers_1.shape; filter_centers = np.zeros([shape_centers[0], 3])filter_corners = np.zeros([shape_centers[0], 8, 3]); filter_boxsizes = np.zeros([shape_centers[0], 3]); idx = 0for idx2 in range(shape_centers[0]): logic_x = np.logical_and(filter_corners_1[idx2, :, 0] >= limitX[0], filter_corners_1[idx2, :, 0] < limitX[1])logic_y = np.logical_and(filter_corners_1[idx2, :, 1] >= limitY[0], filter_corners_1[idx2, :, 1] < limitY[1])logic_z = np.logical_and(filter_corners_1[idx2, :, 2] >= limitZ[0], filter_corners_1[idx2, :, 2] < limitZ[1])logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))if logic_xyz.all(): filter_centers[idx, :3] = filter_centers_1[idx2, :]filter_corners[idx, :8, :3] = filter_corners_1[idx2, :, :] filter_boxsizes[idx, :3] = filter_boxsizes_1[idx2, :]idx += 1 if idx > 0:return filter_centers[:idx, :], filter_corners[:idx, :, :], filter_boxsizes[:idx, :]else:return None, None, Nonedef filter_label(object3Ds, objectType = 'Car'): '''Inputs: object3Ds:objectType: Outputs: centers, corners, rotatey '''idx = 0data = np.zeros([50, 7]).astype(np.float32)for iter in object3Ds: if iter.type == "DontCare": continue;if iter.type == objectType: # position data[idx, 0] = iter.t[0]data[idx, 1] = iter.t[1]data[idx, 2] = iter.t[2]# size data[idx, 3] = iter.hdata[idx, 4] = iter.wdata[idx, 5] = iter.l # rotate data[idx, 6] = iter.ryidx += 1 if idx > 0:return data[:idx, :3], data[:idx, 3:6], data[:idx, 6]else:return None, None, Nonedef proj_to_velo(calib_data):"""Inputs: calib_data: Outputs: project matrix: from camera cordination to velodyne cordination"""rect = calib_data.R0; # calib_data["R0_rect"].reshape(3, 3)velo_to_cam = calib_data.V2C; # calib_data["Tr_velo_to_cam"].reshape(3, 4)inv_rect = np.linalg.inv(rect)inv_velo_to_cam = np.linalg.pinv(velo_to_cam[:, :3])return np.dot(inv_velo_to_cam, inv_rect)# corners_3d def compute_3d_corners(centers, sizes, rotates): ''' Inputs: centers: rotates: sizes: Outputs: corners_3d: n x 8 x 3 array in Lidar coord.'''# print(centers) corners = []for place, rotate, sz in zip(centers, rotates, sizes):x, y, z = placeh, w, l = szif l > 10:continuecorner = np.array([[x - l / 2., y - w / 2., z],[x + l / 2., y - w / 2., z],[x - l / 2., y + w / 2., z],[x - l / 2., y - w / 2., z + h],[x - l / 2., y + w / 2., z + h],[x + l / 2., y + w / 2., z],[x + l / 2., y - w / 2., z + h],[x + l / 2., y + w / 2., z + h],])corner -= np.array([x, y, z])rotate_matrix = np.array([[np.cos(rotate), -np.sin(rotate), 0],[np.sin(rotate), np.cos(rotate), 0],[0, 0, 1]])a = np.dot(corner, rotate_matrix.transpose())a += np.array([x, y, z])corners.append(a)corners_3d = np.array(corners) return corners_3d# lidar data to 3D Grid Voxel def lidar_to_binary_voxel(pc, resolution, limitX, limitY, limitZ):''' Inputs: pc: n x 3, resolution: 1 x 3, limitX, limitY, limitZ: 1 x 2Outputs: voxel: shape is inputSize '''voxel_pc = np.zeros_like(pc).astype(np.int32)# Compute PointCloud Position in 3D Grid voxel_pc[:, 0] = ((pc[:, 0] - limitX[0]) / resolution[0]).astype(np.int32)voxel_pc[:, 1] = ((pc[:, 1] - limitY[0]) / resolution[1]).astype(np.int32)voxel_pc[:, 2] = ((pc[:, 2] - limitZ[0]) / resolution[2]).astype(np.int32)# 3D Grid voxel = np.zeros((int(round(limitX[1] - limitX[0]) / resolution[0]), int(round(limitY[1] - limitY[0]) / resolution[1]), \int(round((limitZ[1] - limitZ[0]) / resolution[2])))) # 3D Grid Value voxel[voxel_pc[:, 0], voxel_pc[:, 1], voxel_pc[:, 2]] = 1return voxel # label center to 3D Grid Voxel Center(sphere) def center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ):''' Inputs: center: n x 3 boxsize: n x 3scale: 1 x 1, = outputSize / inputSizeresolution: 1 x 3limitX, limitY, limitZ: 1 x 2Outputs: spheres: m x 3, m <= n '''# from 3D Box's bottom center => 3D center move_center = centers.copy(); print("centers")print(centers)print("boxsize")print(boxsize)move_center[:, 2] = centers[:, 2] + boxsize[:, 0] / 2; # compute Label Center PointCloud Position in 3D Grid spheres = np.zeros_like(move_center).astype(np.int32)spheres[:, 0] = ((move_center[:, 0] - limitX[0]) / resolution[0] * scale).astype(np.int32)spheres[:, 1] = ((move_center[:, 1] - limitY[0]) / resolution[1] * scale).astype(np.int32)spheres[:, 2] = ((move_center[:, 2] - limitZ[0]) / resolution[2] * scale).astype(np.int32)print("move_center")print(move_center)print("spheres")print(spheres)return spheres# 3D Grid Voxel Center(sphere) to label center def sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ): '''Inputs: spheres: n x 3 scale: 1 x 1, = outputSize / inputSize resolution: 1 x 3limitX, limitY, limitZ: 1 x 2 Outputs: centers: m x 3, m <= 3 '''centers = np.zeros_like(spheres).astype(np.float32); centers[:, 0] = spheres[:, 0] * resolution[0] / scale + limitX[0]centers[:, 1] = spheres[:, 1] * resolution[1] / scale + limitY[0]centers[:, 2] = spheres[:, 2] * resolution[2] / scale + limitZ[0]return centers# label corners to 3D Grid Voxel: corners - centers def corners_to_train(spheres, corners, scale, resolution, limitX, limitY, limitZ):'''Inputs: spheres: n x 3corners: n x 8 x 3 scale: 1 x 1, = outputSize / inputSizeresolution: 1 x 3limitX, limitY, limitZ: 1 x 2 Outputs: train_corners: m x 3, m <= n '''# 3D Grid Voxel Center => label center centers = sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ)train_corners = np.zeros_like(corners).astype(np.float32)# train corners for regression loss for index, (corner, center) in enumerate(zip(corners, centers)):train_corners[index] = corner - center return train_corners# create center and cordination for train def create_train_label(centers, corners, boxsize, scale, resolution, limitX, limitY, limitZ):'''Inputs: centers: n x 3 corners: n x 8 x 3 boxsize: n x 3 scale: 1 x 1, outputSize / inputSizeresolution: 1 x 3 limitX. limitY, limitZ: 1 x 2 Outputs: train_centers: m x 3, m <= n train_corners: m x 3, m <= n '''train_centers = center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ)train_corners = corners_to_train(train_centers, corners, scale, resolution, limitX, limitY, limitZ)return train_centers, train_corners def create_obj_map(train_centers, scale, resolution, limitX, limitY, limitZ):'''Inputs: centers: n x 3 scale: 1 x 1, outputSize / inputSizeresolution: 1 x 3limitX, limitY, limitZ: 1 x 2Outputs: obj_map: shape is scale * inputSize '''# 3D Grid sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))obj_map = np.zeros([sizeX, sizeY, sizeZ]) # print("sizeX, sizeY, sizeZ")# print(sizeX, sizeY, sizeZ)# objectness map: label center in objectness map where value is 1 obj_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = 1; return obj_map def create_cord_map(train_centers, train_corners, scale, resolution, limitX, limitY, limitZ):'''Inputs: train_centers: n x 3 train_corners: n x 8 x 3 scale: 1 x 1, outputSize / inputSizeresolution: 1 x 3 limitX, limitY, limitZ: 1 x 2Outputs: cord_map: shape is inputSize * scale ''' # reshape train_corners: n x 8 x 3 => n x 24 corners = train_corners.reshape(train_corners.shape[0], -1) # 3D Grid sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))sizeD = 24cord_map = np.zeros([sizeX, sizeY, sizeZ, sizeD]) # print(train_centers)cord_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = cornersreturn cord_map # kitti data interface: class kitti_3DVoxel_interface(object): def __init__(self, root_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5)):'''Inputs: case1 root_dir: train or val. data dir, train or val.'s file struct like: root_dir->training->velodyneroot_dir->training->calibroot_dir->training->label_2 case2 root_dir: test data dir, test's file struct like: root_dir->testing->velodyneroot_dir->testing->calib Outputs: -None '''self.root_dir = root_dirself.split = splitself.object = kittiReader(self.root_dir, self.split)self.objectType = objectTypeself.scale = scaleself.resolution = resolution self.limitX = limitXself.limitY = limitYself.limitZ = limitZdef read_kitti_data(self, idx = 0): '''Inputs:idx: training or testing sample indexOutputs:voxel : inputSizeobj_map : scale * inputSizecord_map : scale * inputSize'''kitti_Object3Ds = Nonekitti_Lidar = None kitti_Calib = Noneif self.split == 'training':# read Lidar data + Lidar Label + Calib data kitti_Object3Ds = self.object.get_label_objects(idx); kitti_Lidar = self.object.get_lidar(idx); kitti_Calib = self.object.get_calibration(idx); # lidar data filter filter_fov = filter_camera_fov(kitti_Lidar) filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)# label filter centers, boxsizes, rotates = filter_label(kitti_Object3Ds, self.objectType)if centers is None:return None, None, None # label center: Notice from camera Coordination to velo. Coordination if not(kitti_Calib is None): proj_velo = proj_to_velo(kitti_Calib)[:, :3]centers = np.dot(centers, proj_velo.transpose())[:, :3] # label corners: corners = compute_3d_corners(centers, boxsizes, rotates)# print(corners)# print(corners.shape)# filter centers + corners filter_centers, filter_corners, boxsizes = filter_center_corners(centers, corners, boxsizes, self.limitX, self.limitY, self.limitZ)# print(filter_centers)# print(filter_corners)if not(filter_centers is None): # training centertrain_centers, train_corners = create_train_label(filter_centers, filter_corners, boxsizes, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)# print("filter_centers")# print(filter_centers)# print("train_centers")# print(train_centers)# obj_map / cord_map / voxel obj_map = create_obj_map(train_centers, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)cord_map = create_cord_map(train_centers, train_corners, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)return voxel, obj_map, cord_mapelse: return None, None, None elif self.split == 'testing':# read Lidar Data + Calib + Data kitti_Lidar = self.object.get_lidar(idx); kitti_Calib = self.object.get_calibration(idx); # lidar data filter filter_fov = filter_camera_fov(kitti_Lidar) filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)return voxelif __name__ == '__main__':data_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"kitti_3DVoxel = kitti_3DVoxel_interface(data_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5))sampleIdx = 195; voxel, obj_map, cord_map = kitti_3DVoxel.read_kitti_data(sampleIdx)if not(voxel is None): print(voxel.shape)print(obj_map.shape) print(cord_map.shape)

2.3 KITTI數據讀取相關

''' Helper class and functions for loading KITTI objectsAuthor: Charles R. Qi Date: September 2017 ''' from __future__ import print_functionimport os import sys import numpy as np import cv2 from PIL import Image BASE_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(BASE_DIR) sys.path.append(os.path.join(ROOT_DIR, 'mayavi')) import kitti_util as utilstry:raw_input # Python 2 except NameError:raw_input = input # Python 3# 3D static data class kitti_object(object):'''Load and parse object data into a usable format.'''def __init__(self, root_dir, split='training'):'''root_dir contains training and testing folders'''self.root_dir = root_dirself.split = splitself.split_dir = os.path.join(root_dir, split)if split == 'training':self.num_samples = 7481elif split == 'testing':self.num_samples = 7518else:print('Unknown split: %s' % (split))exit(-1)# data dir self.image_dir = os.path.join(self.split_dir, 'image_2')self.calib_dir = os.path.join(self.split_dir, 'calib')self.lidar_dir = os.path.join(self.split_dir, 'velodyne')self.label_dir = os.path.join(self.split_dir, 'label_2')def __len__(self):return self.num_samples# read image: return image def get_image(self, idx):assert(idx<self.num_samples) img_filename = os.path.join(self.image_dir, '%06d.png'%(idx))return utils.load_image(img_filename)# read lidar: return n x 4 def get_lidar(self, idx): assert(idx<self.num_samples) lidar_filename = os.path.join(self.lidar_dir, '%06d.bin'%(idx))return utils.load_velo_scan(lidar_filename)# read calib file: def get_calibration(self, idx):assert(idx<self.num_samples) calib_filename = os.path.join(self.calib_dir, '%06d.txt'%(idx))return utils.Calibration(calib_filename)# read label def get_label_objects(self, idx):assert(idx<self.num_samples and self.split=='training') label_filename = os.path.join(self.label_dir, '%06d.txt'%(idx))return utils.read_label(label_filename)# read depth map def get_depth_map(self, idx):pass# read top_down image def get_top_down(self, idx):passclass kitti_object_video(object):''' Load data for KITTI videos '''def __init__(self, img_dir, lidar_dir, calib_dir):self.calib = utils.Calibration(calib_dir, from_video=True)self.img_dir = img_dirself.lidar_dir = lidar_dirself.img_filenames = sorted([os.path.join(img_dir, filename) \for filename in os.listdir(img_dir)])self.lidar_filenames = sorted([os.path.join(lidar_dir, filename) \for filename in os.listdir(lidar_dir)])print(len(self.img_filenames))print(len(self.lidar_filenames))#assert(len(self.img_filenames) == len(self.lidar_filenames))self.num_samples = len(self.img_filenames)def __len__(self):return self.num_samplesdef get_image(self, idx):assert(idx<self.num_samples) img_filename = self.img_filenames[idx]return utils.load_image(img_filename)def get_lidar(self, idx): assert(idx<self.num_samples) lidar_filename = self.lidar_filenames[idx]return utils.load_velo_scan(lidar_filename)def get_calibration(self, unused):return self.calibdef viz_kitti_video():video_path = os.path.join(ROOT_DIR, 'dataset/2011_09_26/')dataset = kitti_object_video(\os.path.join(video_path, '2011_09_26_drive_0023_sync/image_02/data'),os.path.join(video_path, '2011_09_26_drive_0023_sync/velodyne_points/data'),video_path)print(len(dataset))for i in range(len(dataset)):img = dataset.get_image(0)pc = dataset.get_lidar(0)Image.fromarray(img).show()draw_lidar(pc)raw_input()pc[:,0:3] = dataset.get_calibration().project_velo_to_rect(pc[:,0:3])draw_lidar(pc)raw_input()returndef show_image_with_boxes(img, objects, calib, show3d=True):''' Show image with 2D bounding boxes '''img1 = np.copy(img) # for 2d bboximg2 = np.copy(img) # for 3d bboxfor obj in objects:if obj.type=='DontCare':continuecv2.rectangle(img1, (int(obj.xmin),int(obj.ymin)),(int(obj.xmax),int(obj.ymax)), (0,255,0), 2)box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P)img2 = utils.draw_projected_box3d(img2, box3d_pts_2d)Image.fromarray(img1).show()if show3d:Image.fromarray(img2).show()def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,return_more=False, clip_distance=2.0):''' Filter lidar points, keep those in image FOV '''pts_2d = calib.project_velo_to_image(pc_velo)fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \(pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)fov_inds = fov_inds & (pc_velo[:,0]>clip_distance)imgfov_pc_velo = pc_velo[fov_inds,:]if return_more:return imgfov_pc_velo, pts_2d, fov_indselse:return imgfov_pc_velodef show_lidar_with_boxes(pc_velo, objects, calib,img_fov=False, img_width=None, img_height=None): ''' Show all LiDAR points.Draw 3d box in LiDAR point cloud (in velo coord system) '''if 'mlab' not in sys.modules: import mayavi.mlab as mlabfrom viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3dprint(('All point num: ', pc_velo.shape[0]))fig = mlab.figure(figure=None, bgcolor=(0,0,0),fgcolor=None, engine=None, size=(1000, 500))if img_fov:pc_velo = get_lidar_in_image_fov(pc_velo, calib, 0, 0,img_width, img_height)print(('FOV point num: ', pc_velo.shape[0]))draw_lidar(pc_velo, fig=fig)for obj in objects:if obj.type=='DontCare':continue# Draw 3d bounding boxbox3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P) box3d_pts_3d_velo = calib.project_rect_to_velo(box3d_pts_3d)# Draw heading arrowori3d_pts_2d, ori3d_pts_3d = utils.compute_orientation_3d(obj, calib.P)ori3d_pts_3d_velo = calib.project_rect_to_velo(ori3d_pts_3d)x1,y1,z1 = ori3d_pts_3d_velo[0,:]x2,y2,z2 = ori3d_pts_3d_velo[1,:]draw_gt_boxes3d([box3d_pts_3d_velo], fig=fig)mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5),tube_radius=None, line_width=1, figure=fig)mlab.show(1)def show_lidar_on_image(pc_velo, img, calib, img_width, img_height):''' Project LiDAR points to image '''imgfov_pc_velo, pts_2d, fov_inds = get_lidar_in_image_fov(pc_velo,calib, 0, 0, img_width, img_height, True)imgfov_pts_2d = pts_2d[fov_inds,:]imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo)import matplotlib.pyplot as pltcmap = plt.cm.get_cmap('hsv', 256)cmap = np.array([cmap(i) for i in range(256)])[:,:3]*255for i in range(imgfov_pts_2d.shape[0]):depth = imgfov_pc_rect[i,2]color = cmap[int(640.0/depth),:]cv2.circle(img, (int(np.round(imgfov_pts_2d[i,0])),int(np.round(imgfov_pts_2d[i,1]))),2, color=tuple(color), thickness=-1)Image.fromarray(img).show() return imgdef dataset_viz():dataset = kitti_object(os.path.join(ROOT_DIR, 'dataset/KITTI/object'))for data_idx in range(len(dataset)):# Load data from datasetobjects = dataset.get_label_objects(data_idx)objects[0].print_object()img = dataset.get_image(data_idx)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_height, img_width, img_channel = img.shapeprint(('Image shape: ', img.shape))pc_velo = dataset.get_lidar(data_idx)[:,0:3]calib = dataset.get_calibration(data_idx)# Draw 2d and 3d boxes on imageshow_image_with_boxes(img, objects, calib, False)raw_input()# Show all LiDAR points. Draw 3d box in LiDAR point cloudshow_lidar_with_boxes(pc_velo, objects, calib, True, img_width, img_height)raw_input()if __name__=='__main__':import mayavi.mlab as mlabfrom viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3ddataset_viz()

""" Helper methods for loading and parsing KITTI data.Author: Charles R. Qi Date: September 2017 """ from __future__ import print_functionimport numpy as np import cv2 import osclass Object3d(object):''' 3d object label '''def __init__(self, label_file_line):data = label_file_line.split(' ')data[1:] = [float(x) for x in data[1:]]# extract label, truncation, occlusionself.type = data[0] # 'Car', 'Pedestrian', ...self.truncation = data[1] # truncated pixel ratio [0..1]self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknownself.alpha = data[3] # object observation angle [-pi..pi]# extract 2d bounding box in 0-based coordinatesself.xmin = data[4] # leftself.ymin = data[5] # topself.xmax = data[6] # rightself.ymax = data[7] # bottomself.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])# extract 3d bounding box informationself.h = data[8] # box heightself.w = data[9] # box widthself.l = data[10] # box length (in meters)self.t = (data[11],data[12],data[13]) # location (x,y,z) in camera coord.self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]def print_object(self):print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \(self.type, self.truncation, self.occlusion, self.alpha))print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \(self.xmin, self.ymin, self.xmax, self.ymax))print('3d bbox h,w,l: %f, %f, %f' % \(self.h, self.w, self.l))print('3d bbox location, ry: (%f, %f, %f), %f' % \(self.t[0],self.t[1],self.t[2],self.ry))class Calibration(object):''' Calibration matrices and utils3d XYZ in <label>.txt are in rect camera coord.2d box xy are in image2 coordPoints in <lidar>.bin are in Velodyne coord.y_image2 = P^2_rect * x_recty_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velox_ref = Tr_velo_to_cam * x_velox_rect = R0_rect * x_refP^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;0, f^2_v, c^2_v, -f^2_v b^2_y;0, 0, 1, 0]= K * [1|t]image2 coord:----> x-axis (u)||v y-axis (v)velodyne coord:front x, left y, up zrect/ref camera coord:right x, down y, front zRef (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdfTODO(rqi): do matrix multiplication only once for each projection.'''def __init__(self, calib_filepath, from_video=False):if from_video:calibs = self.read_calib_from_video(calib_filepath)else:calibs = self.read_calib_file(calib_filepath)# Projection matrix from rect camera coord to image2 coordself.P = calibs['P2'] self.P = np.reshape(self.P, [3,4])# Rigid transform from Velodyne coord to reference camera coordself.V2C = calibs['Tr_velo_to_cam']self.V2C = np.reshape(self.V2C, [3,4])self.C2V = inverse_rigid_trans(self.V2C)# Rotation from reference camera coord to rect camera coordself.R0 = calibs['R0_rect']self.R0 = np.reshape(self.R0,[3,3])# Camera intrinsics and extrinsicsself.c_u = self.P[0,2]self.c_v = self.P[1,2]self.f_u = self.P[0,0]self.f_v = self.P[1,1]self.b_x = self.P[0,3]/(-self.f_u) # relative self.b_y = self.P[1,3]/(-self.f_v)def read_calib_file(self, filepath):''' Read in a calibration file and parse into a dictionary.Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py'''data = {}with open(filepath, 'r') as f:for line in f.readlines():line = line.rstrip()if len(line)==0: continuekey, value = line.split(':', 1)# The only non-float values in these files are dates, which# we don't care about anywaytry:data[key] = np.array([float(x) for x in value.split()])except ValueError:passreturn datadef read_calib_from_video(self, calib_root_dir):''' Read calibration for camera 2 from video calib files.there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir'''data = {}cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))Tr_velo_to_cam = np.zeros((3,4))Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])Tr_velo_to_cam[:,3] = velo2cam['T']data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])data['R0_rect'] = cam2cam['R_rect_00']data['P2'] = cam2cam['P_rect_02']return datadef cart2hom(self, pts_3d):''' Input: nx3 points in CartesianOupput: nx4 points in Homogeneous by pending 1'''n = pts_3d.shape[0]pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))return pts_3d_hom# =========================== # ------- 3d to 3d ---------- # =========================== def project_velo_to_ref(self, pts_3d_velo):pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4return np.dot(pts_3d_velo, np.transpose(self.V2C))def project_ref_to_velo(self, pts_3d_ref):pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4return np.dot(pts_3d_ref, np.transpose(self.C2V))def project_rect_to_ref(self, pts_3d_rect):''' Input and Output are nx3 points '''return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))def project_ref_to_rect(self, pts_3d_ref):''' Input and Output are nx3 points '''return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))def project_rect_to_velo(self, pts_3d_rect):''' Input: nx3 points in rect camera coord.Output: nx3 points in velodyne coord.''' pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)return self.project_ref_to_velo(pts_3d_ref)def project_velo_to_rect(self, pts_3d_velo):pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)return self.project_ref_to_rect(pts_3d_ref)# =========================== # ------- 3d to 2d ---------- # =========================== def project_rect_to_image(self, pts_3d_rect):''' Input: nx3 points in rect camera coord.Output: nx2 points in image2 coord.'''pts_3d_rect = self.cart2hom(pts_3d_rect)pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3pts_2d[:,0] /= pts_2d[:,2]pts_2d[:,1] /= pts_2d[:,2]return pts_2d[:,0:2]def project_velo_to_image(self, pts_3d_velo):''' Input: nx3 points in velodyne coord.Output: nx2 points in image2 coord.'''pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)return self.project_rect_to_image(pts_3d_rect)# =========================== # ------- 2d to 3d ---------- # =========================== def project_image_to_rect(self, uv_depth):''' Input: nx3 first two channels are uv, 3rd channelis depth in rect camera coord.Output: nx3 points in rect camera coord.'''n = uv_depth.shape[0]x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_xy = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_ypts_3d_rect = np.zeros((n,3))pts_3d_rect[:,0] = xpts_3d_rect[:,1] = ypts_3d_rect[:,2] = uv_depth[:,2]return pts_3d_rectdef project_image_to_velo(self, uv_depth):pts_3d_rect = self.project_image_to_rect(uv_depth)return self.project_rect_to_velo(pts_3d_rect)def rotx(t):''' 3D Rotation about the x-axis. '''c = np.cos(t)s = np.sin(t)return np.array([[1, 0, 0],[0, c, -s],[0, s, c]])def roty(t):''' Rotation about the y-axis. '''c = np.cos(t)s = np.sin(t)return np.array([[c, 0, s],[0, 1, 0],[-s, 0, c]])def rotz(t):''' Rotation about the z-axis. '''c = np.cos(t)s = np.sin(t)return np.array([[c, -s, 0],[s, c, 0],[0, 0, 1]])def transform_from_rot_trans(R, t):''' Transforation matrix from rotation matrix and translation vector. '''R = R.reshape(3, 3)t = t.reshape(3, 1)return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))def inverse_rigid_trans(Tr):''' Inverse a rigid body transform matrix (3x4 as [R|t])[R'|-R't; 0|1]'''inv_Tr = np.zeros_like(Tr) # 3x4inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])return inv_Trdef read_label(label_filename):lines = [line.rstrip() for line in open(label_filename)]objects = [Object3d(line) for line in lines]return objectsdef load_image(img_filename):return cv2.imread(img_filename)def load_velo_scan(velo_filename):scan = np.fromfile(velo_filename, dtype=np.float32)scan = scan.reshape((-1, 4))return scandef project_to_image(pts_3d, P):''' Project 3d points to image plane.Usage: pts_2d = projectToImage(pts_3d, P)input: pts_3d: nx3 matrixP: 3x4 projection matrixoutput: pts_2d: nx2 matrixP(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)=> normalize projected_pts_2d(2xn)<=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)=> normalize projected_pts_2d(nx2)'''n = pts_3d.shape[0]pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))print(('pts_3d_extend shape: ', pts_3d_extend.shape))pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3pts_2d[:,0] /= pts_2d[:,2]pts_2d[:,1] /= pts_2d[:,2]return pts_2d[:,0:2]# corners_2d + corners_3d def compute_box_3d(obj, P):''' Takes an object and a projection matrix (P) and projects the 3dbounding box into the image plane.Returns:corners_2d: (8,2) array in left image coord.corners_3d: (8,3) array in in rect camera coord.'''# compute rotational matrix around yaw axisR = roty(obj.ry) # 3d bounding box dimensionsl = obj.l;w = obj.w;h = obj.h;# 3d bounding box cornersx_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];y_corners = [0,0,0,0,-h,-h,-h,-h];z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];# rotate and translate 3d bounding boxcorners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))#print corners_3d.shapecorners_3d[0,:] = corners_3d[0,:] + obj.t[0];corners_3d[1,:] = corners_3d[1,:] + obj.t[1];corners_3d[2,:] = corners_3d[2,:] + obj.t[2];#print 'cornsers_3d: ', corners_3d # only draw 3d bounding box for objs in front of the cameraif np.any(corners_3d[2,:]<0.1):corners_2d = Nonereturn corners_2d, np.transpose(corners_3d)# project the 3d bounding box into the image planecorners_2d = project_to_image(np.transpose(corners_3d), P);#print 'corners_2d: ', corners_2dreturn corners_2d, np.transpose(corners_3d)def compute_orientation_3d(obj, P):''' Takes an object and a projection matrix (P) and projects the 3dobject orientation vector into the image plane.Returns:orientation_2d: (2,2) array in left image coord.orientation_3d: (2,3) array in in rect camera coord.'''# compute rotational matrix around yaw axisR = roty(obj.ry)# orientation in object coordinate systemorientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])# rotate and translate in camera coordinate system, project in imageorientation_3d = np.dot(R, orientation_3d)orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]# vector behind image plane?if np.any(orientation_3d[2,:]<0.1):orientation_2d = Nonereturn orientation_2d, np.transpose(orientation_3d)# project orientation into the image planeorientation_2d = project_to_image(np.transpose(orientation_3d), P);return orientation_2d, np.transpose(orientation_3d)def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):''' Draw 3d bounding box in imageqs: (8,3) array of vertices for the 3d box in following order:1 -------- 0/| /|2 -------- 3 .| | | |. 5 -------- 4|/ |/6 -------- 7'''qs = qs.astype(np.int32)for k in range(0,4):# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.htmli,j=k,(k+1)%4# use LINE_AA for opencv3cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)i,j=k+4,(k+1)%4 + 4cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)i,j=k,k+4cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)return image

3. 通過測試還在訓練，但是我的硬件設備較差，所以，訓練速度比較慢

總結

以上是生活随笔為你收集整理的深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： VC驿站 - WwW.CcTry.CoM
下一篇：人工智能在自动驾驶深度学习中的应用