@article{M5FDA4A65,
title = "Optimized Implementation of Block Cipher PIPO
in Parallel-Way on 64-bit ARM Processors",
journal = "KIPS Transactions on Computer and Communication Systems",
year = "2021",
issn = "2287-5891",
doi = "https://doi.org/10.3745/KTCCS.2021.10.8.223",
author = "Si Woo Eum/Hyeok Dong Kwon/Hyun Jun Kim/Kyoung Bae Jang/Hyun Ji Kim/Jae Hoon Park/Gyeung Ju Song/Min Joo Sim/Hwa Jeong Seo",
keywords = "PIPO Block Cipher, 64-bit ARM Processor, Parallel Optimal Implementation",
abstract = "The lightweight block cipher PIPO announced at ICISC’20 has been effectively implemented by applying the bit slice technique. In this
paper, we propose a parallel optimal implementation of PIPO for ARM processors. The proposed implementation enables parallel encryption
of 8-plaintexts and 16-plaintexts. The implementation targets the A10x fusion processor. On the target processor, the existing reference
PIPO code has performance of 34.6 cpb and 44.7 cpb in 64/128 and 64/256 standards. Among the proposed methods, the general
implementation has a performance of 12.0 cpb and 15.6 cpb in the 8-plaintexts 64/128 and 64/256 standards, and 6.3 cpb and 8.1 cpb
in the 16-plaintexts 64/128 and 64/256 standards. Compared to the existing reference code implementation, the 8-plaintexts parallel
implementation for each standard has about 65.3%, 66.4%, and the 16-plaintexts parallel implementation, about 81.8%, and 82.1% better
performance. The register minimum alignment implementation shows performance of 8.2 cpb and 10.2 cpb in the 8-plaintexts 64/128
and 64/256 specifications, and 3.9 cpb and 4.8 cpb in the 16-plaintexts 64/128 and 64/256 specifications. Compared to the existing reference
code implementation, the 8-plaintexts parallel implementation has improved performance by about 76.3% and 77.2%, and the 16-plaintext
parallel implementation is about 88.7% and 89.3% higher for each standard."
}