From: Ira W. Snyder Date: Sun, 21 Oct 2007 23:36:05 +0000 (-0700) Subject: Optimize ShiftRows() and InvShiftRows() X-Git-Tag: optimized~3 X-Git-Url: https://www.irasnyder.com/gitweb/?p=aes.git;a=commitdiff_plain;h=4bfbc559f6d8e186f8b95b48ae9603ee32dbb17d Optimize ShiftRows() and InvShiftRows() This switches the code so that it stores the state matrix in the same column-wise fashion that the FIPS document specifies. This makes it possible to optimize ShiftRows() and InvShiftRows() by accessing the bytes inside it as words instead of as bytes. This is guaranteed to work because the storage allocated by std::vector is guaranteed to be continuous, so that pointer arithmetic works like normal arrays. Signed-off-by: Ira W. Snyder --- diff --git a/aes.cpp b/aes.cpp index 11980ca..e4aef0b 100644 --- a/aes.cpp +++ b/aes.cpp @@ -1,4 +1,5 @@ #include "aes.hpp" +#include /* static function prototypes */ static byteArray word2bytes (word input); @@ -41,7 +42,15 @@ byteArray AES::encrypt (const byteArray& plaintext) const throw incorrectTextSizeException (); int round; - byteArray state (plaintext); + byteArray state; + + /* Copy the plaintext into the state matrix. It is copied in + * column-wise, because the AES Spec. does it this way. + * + * It also allows us to optimize ShiftRows later */ + for (int c=0; c> ((4-r)*8)); - - /* Unpack the bytes from the word back into the state matrix */ - temp = word2bytes (w); - state[r] = temp.at (0); - state[r+4] = temp.at (1); - state[r+8] = temp.at (2); - state[r+12] = temp.at (3); +#if __BYTE_ORDER == LITTLE_ENDIAN + *w_ptr = (*w_ptr >> r*8) | (*w_ptr << ((4-r)*8)); +#else // BIG_ENDIAN + *w_ptr = (*w_ptr << r*8) | (*w_ptr >> ((4-r)*8)); +#endif + w_ptr++; } } @@ -229,24 +271,32 @@ void AES::InvShiftRows (byteArray& state) const if (state.size() != Nb * 4) throw badStateArrayException (); + /* This is a more-optimized way of doing ShiftRows than using + * bytes2word() and word2bytes() to pack and unpack the state matrix + * into words in order to use the shift-or method of doing the + * circular shift. It works because the memory used by a std::vector + * is guaranteed to be contiguous. + * + * Since bytes are stored in the byteArray vector, and they are in + * the proper order, we can access it like a word, and then shift that, + * instead of packing and then unpacking later. + * + * This should improve performance a little bit, because we are doing + * less assignments now. We do have to do more work in encrypt() and + * decrypt(), but that is 16 assignments, vs. 32 assignments per call + * to ShiftRows(). */ + int r; - word w; - byteArray temp; + word *w_ptr = (word*)&state[0]; for (r=0; r> (r*8)); - - /* Unpack the bytes from the word back into the state matrix */ - temp = word2bytes (w); - state[r] = temp.at (0); - state[r+4] = temp.at (1); - state[r+8] = temp.at (2); - state[r+12] = temp.at (3); +#if __BYTE_ORDER == LITTLE_ENDIAN + *w_ptr = (*w_ptr << r*8) | (*w_ptr >> ((4-r)*8)); +#else // BIG_ENDIAN + *w_ptr = (*w_ptr >> (4-r)*8) | (*w_ptr << r*8); +#endif + w_ptr++; } } @@ -271,7 +321,7 @@ void AES::MixColumns (byteArray& state) const { /* Get this column */ for (c=0; c