@article{M2CC8CB09,
title = "Hybrid All-Reduce Strategy with Layer Overlapping for Reducing
Communication Overhead in Distributed Deep Learning",
journal = "KIPS Transactions on Computer and Communication Systems",
year = "2021",
issn = "2287-5891",
doi = "10.3745/KTCCS.2021.10.7.191",
author = "Daehyun Kim/Sangho Yeo/Sangyoon Oh",
keywords = "Distributed Deep Learning, Synchronization, Layer Overlapping, Allreduce",
abstract = "Since the size of training dataset become large and the model is getting deeper to achieve high accuracy in deep learning, the deep
neural network training requires a lot of computation and it takes too much time with a single node. Therefore, distributed deep learning
is proposed to reduce the training time by distributing computation across multiple nodes. In this study, we propose hybrid allreduce
strategy that considers the characteristics of each layer and communication and computational overlapping technique for synchronization
of distributed deep learning. Since the convolution layer has fewer parameters than the fully-connected layer as well as it is located
at the upper, only short overlapping time is allowed. Thus, butterfly allreduce is used to synchronize the convolution layer. On the other
hand, fully-connecter layer is synchronized using ring all-reduce. The empirical experiment results on PyTorch with our proposed scheme
shows that the proposed method reduced the training time by up to 33% compared to the baseline PyTorch."
}