@article{M5FDA4A65, title = "Optimized Implementation of Block Cipher PIPO in Parallel-Way on 64-bit ARM Processors", journal = "KIPS Transactions on Computer and Communication Systems", year = "2021", issn = "2287-5891", doi = "https://doi.org/10.3745/KTCCS.2021.10.8.223", author = "Si Woo Eum/Hyeok Dong Kwon/Hyun Jun Kim/Kyoung Bae Jang/Hyun Ji Kim/Jae Hoon Park/Gyeung Ju Song/Min Joo Sim/Hwa Jeong Seo", keywords = "PIPO Block Cipher, 64-bit ARM Processor, Parallel Optimal Implementation", abstract = "The lightweight block cipher PIPO announced at ICISC’20 has been effectively implemented by applying the bit slice technique. In this paper, we propose a parallel optimal implementation of PIPO for ARM processors. The proposed implementation enables parallel encryption of 8-plaintexts and 16-plaintexts. The implementation targets the A10x fusion processor. On the target processor, the existing reference PIPO code has performance of 34.6 cpb and 44.7 cpb in 64/128 and 64/256 standards. Among the proposed methods, the general implementation has a performance of 12.0 cpb and 15.6 cpb in the 8-plaintexts 64/128 and 64/256 standards, and 6.3 cpb and 8.1 cpb in the 16-plaintexts 64/128 and 64/256 standards. Compared to the existing reference code implementation, the 8-plaintexts parallel implementation for each standard has about 65.3%, 66.4%, and the 16-plaintexts parallel implementation, about 81.8%, and 82.1% better performance. The register minimum alignment implementation shows performance of 8.2 cpb and 10.2 cpb in the 8-plaintexts 64/128 and 64/256 specifications, and 3.9 cpb and 4.8 cpb in the 16-plaintexts 64/128 and 64/256 specifications. Compared to the existing reference code implementation, the 8-plaintexts parallel implementation has improved performance by about 76.3% and 77.2%, and the 16-plaintext parallel implementation is about 88.7% and 89.3% higher for each standard." }