@inproceedings{06dd0c6eedc54de2845cbe98afc7d9ef,
title = "Learning deep semantic attributes for user video summarization",
abstract = "This paper presents a Semantic Attribute assisted video SUMmarization framework (SASUM). Compared with traditional methods, SASUM has several innovative features. Firstly, we use a natural language processing tool to discover a set of keywords from an image and text corpora to form the semantic attributes of visual contents. Secondly, we train a deep convolution neural network to extract visual features as well as predict the semantic attributes of video segments which enables us to represent video contents with visual and semantic features simultaneously. Thirdly, we construct a temporally constrained video segment affinity matrix and use a partially near duplicate image discovery technique to cluster visually and semantically consistent video frames together. These frame clusters can then be condensed to form an informative and compact summary of the video. We will present experimental results to show the effectiveness of the semantic attributes in assisting the visual features in video summarization and our new technique achieves state-of-the-art performance.",
keywords = "Bundling Center Clustering, Deep Convolution Neural Network, Semantic Attribute, Video Summarization",
author = "Ke Sun and Jiasong Zhu and Zhuo Lei and Xianxu Hou and Qian Zhang and Jiang Duan and Guoping Qiu",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 2017 IEEE International Conference on Multimedia and Expo, ICME 2017 ; Conference date: 10-07-2017 Through 14-07-2017",
year = "2017",
month = aug,
day = "28",
doi = "10.1109/ICME.2017.8019411",
language = "English",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
pages = "643--648",
booktitle = "2017 IEEE International Conference on Multimedia and Expo, ICME 2017",
address = "United States",
}