All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode
@ 2018-10-16 10:09 Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 1/8] crypto: expand algorithm coverage for cipher benchmark Daniel P. Berrangé
                   ` (7 more replies)
  0 siblings, 8 replies; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

  v1: https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg01601.html

The XTS cipher mode is significantly slower than CBC mode. This series
approximately doubles the XTS performance which will improve the I/O
rate for LUKS disks.

Changed in v2:

 - Use union for xts_uint128 to allow bytewise access without casts
 - Use byteswapping for endian correct bit gf128 shifting
 - Optimize for aligned buffers, with fallback for unaligned buffers
 - Add tests for unaligned buffer usage

Daniel P. Berrangé (8):
  crypto: expand algorithm coverage for cipher benchmark
  crypto: remove code duplication in tweak encrypt/decrypt
  crypto: introduce a xts_uint128 data type
  crypto: convert xts_tweak_encdec to use xts_uint128 type
  crypto: convert xts_mult_x to use xts_uint128 type
  crypto: annotate xts_tweak_encdec as inlineable
  crypto: refactor XTS cipher mode test suite
  crypto: add testing for unaligned buffers with XTS cipher mode

 crypto/xts.c                    | 199 +++++++++++++++-------------
 tests/benchmark-crypto-cipher.c | 149 +++++++++++++++++----
 tests/test-crypto-xts.c         | 226 +++++++++++++++++++++++---------
 3 files changed, 401 insertions(+), 173 deletions(-)

-- 
2.17.2

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 1/8] crypto: expand algorithm coverage for cipher benchmark
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 2/8] crypto: remove code duplication in tweak encrypt/decrypt Daniel P. Berrangé
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

Add testing coverage for AES with XTS, ECB and CTR modes

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 tests/benchmark-crypto-cipher.c | 149 +++++++++++++++++++++++++++-----
 1 file changed, 126 insertions(+), 23 deletions(-)

diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c
index f5a0d0bc32..67fdf8c31d 100644
--- a/tests/benchmark-crypto-cipher.c
+++ b/tests/benchmark-crypto-cipher.c
@@ -15,17 +15,27 @@
 #include "crypto/init.h"
 #include "crypto/cipher.h"
 
-static void test_cipher_speed(const void *opaque)
+static void test_cipher_speed(size_t chunk_size,
+                              QCryptoCipherMode mode,
+                              QCryptoCipherAlgorithm alg)
 {
     QCryptoCipher *cipher;
     Error *err = NULL;
     double total = 0.0;
-    size_t chunk_size = (size_t)opaque;
     uint8_t *key = NULL, *iv = NULL;
     uint8_t *plaintext = NULL, *ciphertext = NULL;
-    size_t nkey = qcrypto_cipher_get_key_len(QCRYPTO_CIPHER_ALG_AES_128);
-    size_t niv = qcrypto_cipher_get_iv_len(QCRYPTO_CIPHER_ALG_AES_128,
-                                           QCRYPTO_CIPHER_MODE_CBC);
+    size_t nkey;
+    size_t niv;
+
+    if (!qcrypto_cipher_supports(alg, mode)) {
+        return;
+    }
+
+    nkey = qcrypto_cipher_get_key_len(alg);
+    niv = qcrypto_cipher_get_iv_len(alg, mode);
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+        nkey *= 2;
+    }
 
     key = g_new0(uint8_t, nkey);
     memset(key, g_test_rand_int(), nkey);
@@ -38,14 +48,14 @@ static void test_cipher_speed(const void *opaque)
     plaintext = g_new0(uint8_t, chunk_size);
     memset(plaintext, g_test_rand_int(), chunk_size);
 
-    cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128,
-                                QCRYPTO_CIPHER_MODE_CBC,
+    cipher = qcrypto_cipher_new(alg, mode,
                                 key, nkey, &err);
     g_assert(cipher != NULL);
 
-    g_assert(qcrypto_cipher_setiv(cipher,
-                                  iv, niv,
-                                  &err) == 0);
+    if (mode != QCRYPTO_CIPHER_MODE_ECB)
+        g_assert(qcrypto_cipher_setiv(cipher,
+                                      iv, niv,
+                                      &err) == 0);
 
     g_test_timer_start();
     do {
@@ -55,13 +65,26 @@ static void test_cipher_speed(const void *opaque)
                                         chunk_size,
                                         &err) == 0);
         total += chunk_size;
-    } while (g_test_timer_elapsed() < 5.0);
+    } while (g_test_timer_elapsed() < 1.0);
 
     total /= MiB;
-    g_print("cbc(aes128): ");
-    g_print("Testing chunk_size %zu bytes ", chunk_size);
-    g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last());
-    g_print("%.2f MB/sec\n", total / g_test_timer_last());
+    g_print("Enc chunk %zu bytes ", chunk_size);
+    g_print("%.2f MB/sec ", total / g_test_timer_last());
+
+    total = 0.0;
+    g_test_timer_start();
+    do {
+        g_assert(qcrypto_cipher_decrypt(cipher,
+                                        plaintext,
+                                        ciphertext,
+                                        chunk_size,
+                                        &err) == 0);
+        total += chunk_size;
+    } while (g_test_timer_elapsed() < 1.0);
+
+    total /= MiB;
+    g_print("Dec chunk %zu bytes ", chunk_size);
+    g_print("%.2f MB/sec ", total / g_test_timer_last());
 
     qcrypto_cipher_free(cipher);
     g_free(plaintext);
@@ -70,19 +93,99 @@ static void test_cipher_speed(const void *opaque)
     g_free(key);
 }
 
-int main(int argc, char **argv)
+
+static void test_cipher_speed_ecb_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_ECB,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_ecb_aes_256(const void *opaque)
 {
-    size_t i;
-    char name[64];
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_ECB,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+static void test_cipher_speed_cbc_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CBC,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
 
+static void test_cipher_speed_cbc_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CBC,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+static void test_cipher_speed_ctr_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CTR,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_ctr_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CTR,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+static void test_cipher_speed_xts_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_XTS,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_xts_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_XTS,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+
+int main(int argc, char **argv)
+{
     g_test_init(&argc, &argv, NULL);
     g_assert(qcrypto_init(NULL) == 0);
 
-    for (i = 512; i <= 64 * KiB; i *= 2) {
-        memset(name, 0 , sizeof(name));
-        snprintf(name, sizeof(name), "/crypto/cipher/speed-%zu", i);
-        g_test_add_data_func(name, (void *)i, test_cipher_speed);
-    }
+#define ADD_TEST(mode, cipher, keysize, chunk)                          \
+    g_test_add_data_func(                                               \
+        "/crypto/cipher/" #mode "-" #cipher "-" #keysize "/chunk-" #chunk, \
+        (void *)chunk,                                                  \
+        test_cipher_speed_ ## mode ## _ ## cipher ## _ ## keysize)
+
+#define ADD_TESTS(chunk)                        \
+    do {                                        \
+        ADD_TEST(ecb, aes, 128, chunk);         \
+        ADD_TEST(ecb, aes, 256, chunk);         \
+        ADD_TEST(cbc, aes, 128, chunk);         \
+        ADD_TEST(cbc, aes, 256, chunk);         \
+        ADD_TEST(ctr, aes, 128, chunk);         \
+        ADD_TEST(ctr, aes, 256, chunk);         \
+        ADD_TEST(xts, aes, 128, chunk);         \
+        ADD_TEST(xts, aes, 256, chunk);         \
+    } while (0)
+
+    ADD_TESTS(512);
+    ADD_TESTS(4096);
+    ADD_TESTS(16384);
+    ADD_TESTS(65536);
 
     return g_test_run();
 }
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 2/8] crypto: remove code duplication in tweak encrypt/decrypt
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 1/8] crypto: expand algorithm coverage for cipher benchmark Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 3/8] crypto: introduce a xts_uint128 data type Daniel P. Berrangé
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

The tweak encrypt/decrypt functions are identical except for the
comments, so can be merged. Profiling data shows that the compiler is
in fact already merging the two merges in the object files.

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 crypto/xts.c | 64 ++++++++++++----------------------------------------
 1 file changed, 15 insertions(+), 49 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 95212341f6..3c1a92f01d 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -43,20 +43,20 @@ static void xts_mult_x(uint8_t *I)
 
 
 /**
- * xts_tweak_uncrypt:
+ * xts_tweak_encdec:
  * @param ctxt: the cipher context
  * @param func: the cipher function
- * @src: buffer providing the cipher text of XTS_BLOCK_SIZE bytes
- * @dst: buffer to output the plain text of XTS_BLOCK_SIZE bytes
+ * @src: buffer providing the input text of XTS_BLOCK_SIZE bytes
+ * @dst: buffer to output the output text of XTS_BLOCK_SIZE bytes
  * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes
  *
- * Decrypt data with a tweak
+ * Encrypt/decrypt data with a tweak
  */
-static void xts_tweak_decrypt(const void *ctx,
-                              xts_cipher_func *func,
-                              const uint8_t *src,
-                              uint8_t *dst,
-                              uint8_t *iv)
+static void xts_tweak_encdec(const void *ctx,
+                             xts_cipher_func *func,
+                             const uint8_t *src,
+                             uint8_t *dst,
+                             uint8_t *iv)
 {
     unsigned long x;
 
@@ -105,7 +105,7 @@ void xts_decrypt(const void *datactx,
     encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv);
 
     for (i = 0; i < lim; i++) {
-        xts_tweak_decrypt(datactx, decfunc, src, dst, T);
+        xts_tweak_encdec(datactx, decfunc, src, dst, T);
 
         src += XTS_BLOCK_SIZE;
         dst += XTS_BLOCK_SIZE;
@@ -117,7 +117,7 @@ void xts_decrypt(const void *datactx,
         xts_mult_x(CC);
 
         /* PP = tweak decrypt block m-1 */
-        xts_tweak_decrypt(datactx, decfunc, src, PP, CC);
+        xts_tweak_encdec(datactx, decfunc, src, PP, CC);
 
         /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */
         for (i = 0; i < mo; i++) {
@@ -129,7 +129,7 @@ void xts_decrypt(const void *datactx,
         }
 
         /* Pm-1 = Tweak uncrypt CC */
-        xts_tweak_decrypt(datactx, decfunc, CC, dst, T);
+        xts_tweak_encdec(datactx, decfunc, CC, dst, T);
     }
 
     /* Decrypt the iv back */
@@ -137,40 +137,6 @@ void xts_decrypt(const void *datactx,
 }
 
 
-/**
- * xts_tweak_crypt:
- * @param ctxt: the cipher context
- * @param func: the cipher function
- * @src: buffer providing the plain text of XTS_BLOCK_SIZE bytes
- * @dst: buffer to output the cipher text of XTS_BLOCK_SIZE bytes
- * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes
- *
- * Encrypt data with a tweak
- */
-static void xts_tweak_encrypt(const void *ctx,
-                              xts_cipher_func *func,
-                              const uint8_t *src,
-                              uint8_t *dst,
-                              uint8_t *iv)
-{
-    unsigned long x;
-
-    /* tweak encrypt block i */
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = src[x] ^ iv[x];
-    }
-
-    func(ctx, XTS_BLOCK_SIZE, dst, dst);
-
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = dst[x] ^ iv[x];
-    }
-
-    /* LFSR the tweak */
-    xts_mult_x(iv);
-}
-
-
 void xts_encrypt(const void *datactx,
                  const void *tweakctx,
                  xts_cipher_func *encfunc,
@@ -200,7 +166,7 @@ void xts_encrypt(const void *datactx,
     encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv);
 
     for (i = 0; i < lim; i++) {
-        xts_tweak_encrypt(datactx, encfunc, src, dst, T);
+        xts_tweak_encdec(datactx, encfunc, src, dst, T);
 
         dst += XTS_BLOCK_SIZE;
         src += XTS_BLOCK_SIZE;
@@ -209,7 +175,7 @@ void xts_encrypt(const void *datactx,
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
         /* CC = tweak encrypt block m-1 */
-        xts_tweak_encrypt(datactx, encfunc, src, CC, T);
+        xts_tweak_encdec(datactx, encfunc, src, CC, T);
 
         /* Cm = first length % XTS_BLOCK_SIZE bytes of CC */
         for (i = 0; i < mo; i++) {
@@ -222,7 +188,7 @@ void xts_encrypt(const void *datactx,
         }
 
         /* Cm-1 = Tweak encrypt PP */
-        xts_tweak_encrypt(datactx, encfunc, PP, dst, T);
+        xts_tweak_encdec(datactx, encfunc, PP, dst, T);
     }
 
     /* Decrypt the iv back */
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 3/8] crypto: introduce a xts_uint128 data type
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 1/8] crypto: expand algorithm coverage for cipher benchmark Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 2/8] crypto: remove code duplication in tweak encrypt/decrypt Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 12:45   ` Alberto Garcia
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type Daniel P. Berrangé
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

The new type is designed to allow use of 64-bit arithmetic instead
of operating 1-byte at a time. The following patches will use this to
improve performance.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 crypto/xts.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 3c1a92f01d..bee23f890e 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -26,6 +26,12 @@
 #include "qemu/osdep.h"
 #include "crypto/xts.h"
 
+typedef union {
+    uint8_t b[XTS_BLOCK_SIZE];
+    uint64_t u[2];
+} xts_uint128;
+
+
 static void xts_mult_x(uint8_t *I)
 {
     int x;
@@ -85,7 +91,7 @@ void xts_decrypt(const void *datactx,
                  uint8_t *dst,
                  const uint8_t *src)
 {
-    uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE];
+    xts_uint128 PP, CC, T;
     unsigned long i, m, mo, lim;
 
     /* get number of blocks */
@@ -102,10 +108,10 @@ void xts_decrypt(const void *datactx,
     }
 
     /* encrypt the iv */
-    encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv);
+    encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
 
     for (i = 0; i < lim; i++) {
-        xts_tweak_encdec(datactx, decfunc, src, dst, T);
+        xts_tweak_encdec(datactx, decfunc, src, dst, T.b);
 
         src += XTS_BLOCK_SIZE;
         dst += XTS_BLOCK_SIZE;
@@ -113,27 +119,27 @@ void xts_decrypt(const void *datactx,
 
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
-        memcpy(CC, T, XTS_BLOCK_SIZE);
-        xts_mult_x(CC);
+        memcpy(&CC, &T, XTS_BLOCK_SIZE);
+        xts_mult_x(CC.b);
 
         /* PP = tweak decrypt block m-1 */
-        xts_tweak_encdec(datactx, decfunc, src, PP, CC);
+        xts_tweak_encdec(datactx, decfunc, src, PP.b, CC.b);
 
         /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */
         for (i = 0; i < mo; i++) {
-            CC[i] = src[XTS_BLOCK_SIZE + i];
-            dst[XTS_BLOCK_SIZE + i] = PP[i];
+            CC.b[i] = src[XTS_BLOCK_SIZE + i];
+            dst[XTS_BLOCK_SIZE + i] = PP.b[i];
         }
         for (; i < XTS_BLOCK_SIZE; i++) {
-            CC[i] = PP[i];
+            CC.b[i] = PP.b[i];
         }
 
         /* Pm-1 = Tweak uncrypt CC */
-        xts_tweak_encdec(datactx, decfunc, CC, dst, T);
+        xts_tweak_encdec(datactx, decfunc, CC.b, dst, T.b);
     }
 
     /* Decrypt the iv back */
-    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T);
+    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T.b);
 }
 
 
@@ -146,7 +152,7 @@ void xts_encrypt(const void *datactx,
                  uint8_t *dst,
                  const uint8_t *src)
 {
-    uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE];
+    xts_uint128 PP, CC, T;
     unsigned long i, m, mo, lim;
 
     /* get number of blocks */
@@ -163,10 +169,10 @@ void xts_encrypt(const void *datactx,
     }
 
     /* encrypt the iv */
-    encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv);
+    encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
 
     for (i = 0; i < lim; i++) {
-        xts_tweak_encdec(datactx, encfunc, src, dst, T);
+        xts_tweak_encdec(datactx, encfunc, src, dst, T.b);
 
         dst += XTS_BLOCK_SIZE;
         src += XTS_BLOCK_SIZE;
@@ -175,22 +181,22 @@ void xts_encrypt(const void *datactx,
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
         /* CC = tweak encrypt block m-1 */
-        xts_tweak_encdec(datactx, encfunc, src, CC, T);
+        xts_tweak_encdec(datactx, encfunc, src, CC.b, T.b);
 
         /* Cm = first length % XTS_BLOCK_SIZE bytes of CC */
         for (i = 0; i < mo; i++) {
-            PP[i] = src[XTS_BLOCK_SIZE + i];
-            dst[XTS_BLOCK_SIZE + i] = CC[i];
+            PP.b[i] = src[XTS_BLOCK_SIZE + i];
+            dst[XTS_BLOCK_SIZE + i] = CC.b[i];
         }
 
         for (; i < XTS_BLOCK_SIZE; i++) {
-            PP[i] = CC[i];
+            PP.b[i] = CC.b[i];
         }
 
         /* Cm-1 = Tweak encrypt PP */
-        xts_tweak_encdec(datactx, encfunc, PP, dst, T);
+        xts_tweak_encdec(datactx, encfunc, PP.b, dst, T.b);
     }
 
     /* Decrypt the iv back */
-    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T);
+    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T.b);
 }
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
                   ` (2 preceding siblings ...)
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 3/8] crypto: introduce a xts_uint128 data type Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 13:09   ` Alberto Garcia
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x " Daniel P. Berrangé
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

Using 64-bit arithmetic increases the performance for xts-aes-128
when built with gcrypt:

  Encrypt: 272 MB/s -> 355 MB/s
  Decrypt: 275 MB/s -> 362 MB/s

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 crypto/xts.c | 84 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index bee23f890e..2e3430672c 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -31,6 +31,13 @@ typedef union {
     uint64_t u[2];
 } xts_uint128;
 
+static inline void xts_uint128_xor(xts_uint128 *D,
+                                   const xts_uint128 *S1,
+                                   const xts_uint128 *S2)
+{
+    D->u[0] = S1->u[0] ^ S2->u[0];
+    D->u[1] = S1->u[1] ^ S2->u[1];
+}
 
 static void xts_mult_x(uint8_t *I)
 {
@@ -60,25 +67,19 @@ static void xts_mult_x(uint8_t *I)
  */
 static void xts_tweak_encdec(const void *ctx,
                              xts_cipher_func *func,
-                             const uint8_t *src,
-                             uint8_t *dst,
-                             uint8_t *iv)
+                             const xts_uint128 *src,
+                             xts_uint128 *dst,
+                             xts_uint128 *iv)
 {
-    unsigned long x;
-
     /* tweak encrypt block i */
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = src[x] ^ iv[x];
-    }
+    xts_uint128_xor(dst, src, iv);
 
-    func(ctx, XTS_BLOCK_SIZE, dst, dst);
+    func(ctx, XTS_BLOCK_SIZE, dst->b, dst->b);
 
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = dst[x] ^ iv[x];
-    }
+    xts_uint128_xor(dst, dst, iv);
 
     /* LFSR the tweak */
-    xts_mult_x(iv);
+    xts_mult_x(iv->b);
 }
 
 
@@ -110,20 +111,34 @@ void xts_decrypt(const void *datactx,
     /* encrypt the iv */
     encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
 
-    for (i = 0; i < lim; i++) {
-        xts_tweak_encdec(datactx, decfunc, src, dst, T.b);
-
-        src += XTS_BLOCK_SIZE;
-        dst += XTS_BLOCK_SIZE;
+    if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) &&
+        QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) {
+        xts_uint128 *S = (xts_uint128 *)src;
+        xts_uint128 *D = (xts_uint128 *)dst;
+        for (i = 0; i < lim; i++, S++, D++) {
+            xts_tweak_encdec(datactx, decfunc, S, D, &T);
+        }
+    } else {
+        xts_uint128 S, D;
+
+        for (i = 0; i < lim; i++) {
+            memcpy(&S, src, XTS_BLOCK_SIZE);
+            xts_tweak_encdec(datactx, decfunc, &S, &D, &T);
+            memcpy(dst, &D, XTS_BLOCK_SIZE);
+            src += XTS_BLOCK_SIZE;
+            dst += XTS_BLOCK_SIZE;
+        }
     }
 
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
+        xts_uint128 S, D;
         memcpy(&CC, &T, XTS_BLOCK_SIZE);
         xts_mult_x(CC.b);
 
         /* PP = tweak decrypt block m-1 */
-        xts_tweak_encdec(datactx, decfunc, src, PP.b, CC.b);
+        memcpy(&S, src, XTS_BLOCK_SIZE);
+        xts_tweak_encdec(datactx, decfunc, &S, &PP, &CC);
 
         /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */
         for (i = 0; i < mo; i++) {
@@ -135,7 +150,8 @@ void xts_decrypt(const void *datactx,
         }
 
         /* Pm-1 = Tweak uncrypt CC */
-        xts_tweak_encdec(datactx, decfunc, CC.b, dst, T.b);
+        xts_tweak_encdec(datactx, decfunc, &CC, &D, &T);
+        memcpy(dst, &D, XTS_BLOCK_SIZE);
     }
 
     /* Decrypt the iv back */
@@ -171,17 +187,32 @@ void xts_encrypt(const void *datactx,
     /* encrypt the iv */
     encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
 
-    for (i = 0; i < lim; i++) {
-        xts_tweak_encdec(datactx, encfunc, src, dst, T.b);
+    if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) &&
+        QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) {
+        xts_uint128 *S = (xts_uint128 *)src;
+        xts_uint128 *D = (xts_uint128 *)dst;
+        for (i = 0; i < lim; i++, S++, D++) {
+            xts_tweak_encdec(datactx, encfunc, S, D, &T);
+        }
+    } else {
+        xts_uint128 S, D;
+
+        for (i = 0; i < lim; i++) {
+            memcpy(&S, src, XTS_BLOCK_SIZE);
+            xts_tweak_encdec(datactx, encfunc, &S, &D, &T);
+            memcpy(dst, &D, XTS_BLOCK_SIZE);
 
-        dst += XTS_BLOCK_SIZE;
-        src += XTS_BLOCK_SIZE;
+            dst += XTS_BLOCK_SIZE;
+            src += XTS_BLOCK_SIZE;
+        }
     }
 
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
+        xts_uint128 S, D;
         /* CC = tweak encrypt block m-1 */
-        xts_tweak_encdec(datactx, encfunc, src, CC.b, T.b);
+        memcpy(&S, src, XTS_BLOCK_SIZE);
+        xts_tweak_encdec(datactx, encfunc, &S, &CC, &T);
 
         /* Cm = first length % XTS_BLOCK_SIZE bytes of CC */
         for (i = 0; i < mo; i++) {
@@ -194,7 +225,8 @@ void xts_encrypt(const void *datactx,
         }
 
         /* Cm-1 = Tweak encrypt PP */
-        xts_tweak_encdec(datactx, encfunc, PP.b, dst, T.b);
+        xts_tweak_encdec(datactx, encfunc, &PP, &D, &T);
+        memcpy(dst, &D, XTS_BLOCK_SIZE);
     }
 
     /* Decrypt the iv back */
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x to use xts_uint128 type
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
                   ` (3 preceding siblings ...)
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 13:35   ` Alberto Garcia
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 6/8] crypto: annotate xts_tweak_encdec as inlineable Daniel P. Berrangé
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

Using 64-bit arithmetic increases the performance for xts-aes-128
when built with gcrypt:

  Encrypt: 355 MB/s -> 545 MB/s
  Decrypt: 362 MB/s -> 568 MB/s

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 crypto/xts.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 2e3430672c..56c0e4e6ed 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/bswap.h"
 #include "crypto/xts.h"
 
 typedef union {
@@ -39,19 +40,33 @@ static inline void xts_uint128_xor(xts_uint128 *D,
     D->u[1] = S1->u[1] ^ S2->u[1];
 }
 
-static void xts_mult_x(uint8_t *I)
+static inline void xts_uint128_cpu_to_les(xts_uint128 *v)
 {
-    int x;
-    uint8_t t, tt;
+    cpu_to_le64s(&v->u[0]);
+    cpu_to_le64s(&v->u[1]);
+}
 
-    for (x = t = 0; x < 16; x++) {
-        tt = I[x] >> 7;
-        I[x] = ((I[x] << 1) | t) & 0xFF;
-        t = tt;
-    }
-    if (tt) {
-        I[0] ^= 0x87;
+static inline void xts_uint128_le_to_cpus(xts_uint128 *v)
+{
+    le64_to_cpus(&v->u[0]);
+    le64_to_cpus(&v->u[1]);
+}
+
+static void xts_mult_x(xts_uint128 *I)
+{
+    uint64_t tt;
+
+    xts_uint128_cpu_to_les(I);
+
+    tt = I->u[0] >> 63;
+    I->u[0] = I->u[0] << 1;
+
+    if (I->u[1] >> 63) {
+        I->u[0] ^= 0x87;
     }
+    I->u[1] = (I->u[1] << 1) | tt;
+
+    xts_uint128_le_to_cpus(I);
 }
 
 
@@ -79,7 +94,7 @@ static void xts_tweak_encdec(const void *ctx,
     xts_uint128_xor(dst, dst, iv);
 
     /* LFSR the tweak */
-    xts_mult_x(iv->b);
+    xts_mult_x(iv);
 }
 
 
@@ -134,7 +149,7 @@ void xts_decrypt(const void *datactx,
     if (mo > 0) {
         xts_uint128 S, D;
         memcpy(&CC, &T, XTS_BLOCK_SIZE);
-        xts_mult_x(CC.b);
+        xts_mult_x(&CC);
 
         /* PP = tweak decrypt block m-1 */
         memcpy(&S, src, XTS_BLOCK_SIZE);
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 6/8] crypto: annotate xts_tweak_encdec as inlineable
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
                   ` (4 preceding siblings ...)
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x " Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 7/8] crypto: refactor XTS cipher mode test suite Daniel P. Berrangé
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 8/8] crypto: add testing for unaligned buffers with XTS cipher mode Daniel P. Berrangé
  7 siblings, 0 replies; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

Encouraging the compiler to inline xts_tweak_encdec increases the
performance for xts-aes-128 when built with gcrypt:

  Encrypt: 545 MB/s -> 580 MB/s
  Decrypt: 568 MB/s -> 602 MB/s

Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 crypto/xts.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 56c0e4e6ed..3571607fa7 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -80,11 +80,11 @@ static void xts_mult_x(xts_uint128 *I)
  *
  * Encrypt/decrypt data with a tweak
  */
-static void xts_tweak_encdec(const void *ctx,
-                             xts_cipher_func *func,
-                             const xts_uint128 *src,
-                             xts_uint128 *dst,
-                             xts_uint128 *iv)
+static inline void xts_tweak_encdec(const void *ctx,
+                                    xts_cipher_func *func,
+                                    const xts_uint128 *src,
+                                    xts_uint128 *dst,
+                                    xts_uint128 *iv)
 {
     /* tweak encrypt block i */
     xts_uint128_xor(dst, src, iv);
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 7/8] crypto: refactor XTS cipher mode test suite
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
                   ` (5 preceding siblings ...)
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 6/8] crypto: annotate xts_tweak_encdec as inlineable Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 14:34   ` Alberto Garcia
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 8/8] crypto: add testing for unaligned buffers with XTS cipher mode Daniel P. Berrangé
  7 siblings, 1 reply; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

The current XTS test overloads two different tests in a single function
making the code a little hard to follow. Split it into distinct test
cases.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 tests/test-crypto-xts.c | 140 +++++++++++++++++++++++-----------------
 1 file changed, 80 insertions(+), 60 deletions(-)

diff --git a/tests/test-crypto-xts.c b/tests/test-crypto-xts.c
index 1f1412c45a..81606d90ad 100644
--- a/tests/test-crypto-xts.c
+++ b/tests/test-crypto-xts.c
@@ -1,7 +1,7 @@
 /*
  * QEMU Crypto XTS cipher mode
  *
- * Copyright (c) 2015-2016 Red Hat, Inc.
+ * Copyright (c) 2015-2018 Red Hat, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -340,70 +340,79 @@ static void test_xts_aes_decrypt(const void *ctx,
 static void test_xts(const void *opaque)
 {
     const QCryptoXTSTestData *data = opaque;
-    unsigned char out[512], Torg[16], T[16];
+    uint8_t out[512], Torg[16], T[16];
     uint64_t seq;
-    int j;
-    unsigned long len;
     struct TestAES aesdata;
     struct TestAES aestweak;
 
-    for (j = 0; j < 2; j++) {
-        /* skip the cases where
-         * the length is smaller than 2*blocklen
-         * or the length is not a multiple of 32
-         */
-        if ((j == 1) && ((data->PTLEN < 32) || (data->PTLEN % 32))) {
-            continue;
-        }
-        len = data->PTLEN / 2;
-
-        AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
-        AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
-        AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
-        AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
-
-        seq = data->seqnum;
-        STORE64L(seq, Torg);
-        memset(Torg + 8, 0, 8);
-
-        memcpy(T, Torg, sizeof(T));
-        if (j == 0) {
-            xts_encrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, data->PTLEN, out, data->PTX);
-        } else {
-            xts_encrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, out, data->PTX);
-            xts_encrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, &out[len], &data->PTX[len]);
-        }
+    AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
+    AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
+    AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
+    AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
 
-        g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
-
-        memcpy(T, Torg, sizeof(T));
-        if (j == 0) {
-            xts_decrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, data->PTLEN, out, data->CTX);
-        } else {
-            xts_decrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, out, data->CTX);
-            xts_decrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, &out[len], &data->CTX[len]);
-        }
+    seq = data->seqnum;
+    STORE64L(seq, Torg);
+    memset(Torg + 8, 0, 8);
 
-        g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
-    }
+    memcpy(T, Torg, sizeof(T));
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, data->PTX);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    memcpy(T, Torg, sizeof(T));
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, data->CTX);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+}
+
+
+static void test_xts_split(const void *opaque)
+{
+    const QCryptoXTSTestData *data = opaque;
+    uint8_t out[512], Torg[16], T[16];
+    uint64_t seq;
+    unsigned long len = data->PTLEN / 2;
+    struct TestAES aesdata;
+    struct TestAES aestweak;
+
+    AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
+    AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
+    AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
+    AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
+
+    seq = data->seqnum;
+    STORE64L(seq, Torg);
+    memset(Torg + 8, 0, 8);
+
+    memcpy(T, Torg, sizeof(T));
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, out, data->PTX);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, &out[len], &data->PTX[len]);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    memcpy(T, Torg, sizeof(T));
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, out, data->CTX);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, &out[len], &data->CTX[len]);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
 }
 
 
@@ -416,7 +425,18 @@ int main(int argc, char **argv)
     g_assert(qcrypto_init(NULL) == 0);
 
     for (i = 0; i < G_N_ELEMENTS(test_data); i++) {
-        g_test_add_data_func(test_data[i].path, &test_data[i], test_xts);
+        gchar *path = g_strdup_printf("%s/basic", test_data[i].path);
+        g_test_add_data_func(path, &test_data[i], test_xts);
+        g_free(path);
+
+        /* skip the cases where the length is smaller than 2*blocklen
+         * or the length is not a multiple of 32
+         */
+        if ((test_data[i].PTLEN >= 32) && !(test_data[i].PTLEN % 32)) {
+            path = g_strdup_printf("%s/split", test_data[i].path);
+            g_test_add_data_func(path, &test_data[i], test_xts_split);
+            g_free(path);
+        }
     }
 
     return g_test_run();
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH v2 8/8] crypto: add testing for unaligned buffers with XTS cipher mode
  2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
                   ` (6 preceding siblings ...)
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 7/8] crypto: refactor XTS cipher mode test suite Daniel P. Berrangé
@ 2018-10-16 10:09 ` Daniel P. Berrangé
  2018-10-16 14:50   ` Alberto Garcia
  7 siblings, 1 reply; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 10:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Daniel P. Berrangé, Alberto Garcia

Validate that the XTS cipher mode will correctly operate with plain
text, cipher text and IV buffers that are not 64-bit aligned.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 tests/test-crypto-xts.c | 86 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/tests/test-crypto-xts.c b/tests/test-crypto-xts.c
index 81606d90ad..6fb61cf635 100644
--- a/tests/test-crypto-xts.c
+++ b/tests/test-crypto-xts.c
@@ -416,6 +416,88 @@ static void test_xts_split(const void *opaque)
 }
 
 
+static void test_xts_unaligned(const void *opaque)
+{
+#define BAD_ALIGN 3
+    const QCryptoXTSTestData *data = opaque;
+    uint8_t in[512 + BAD_ALIGN], out[512 + BAD_ALIGN];
+    uint8_t Torg[16], T[16 + BAD_ALIGN];
+    uint64_t seq;
+    struct TestAES aesdata;
+    struct TestAES aestweak;
+
+    AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
+    AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
+    AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
+    AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
+
+    seq = data->seqnum;
+    STORE64L(seq, Torg);
+    memset(Torg + 8, 0, 8);
+
+    /* IV not aligned */
+    memcpy(T + BAD_ALIGN, Torg, 16);
+    memcpy(in, data->PTX, data->PTLEN);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T + BAD_ALIGN, data->PTLEN, out, in);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    /* plain text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in + BAD_ALIGN, data->PTX, data->PTLEN);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, in + BAD_ALIGN);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    /* cipher text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in, data->PTX, data->PTLEN);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out + BAD_ALIGN, in);
+
+    g_assert(memcmp(out + BAD_ALIGN, data->CTX, data->PTLEN) == 0);
+
+
+    /* IV not aligned */
+    memcpy(T + BAD_ALIGN, Torg, 16);
+    memcpy(in, data->CTX, data->PTLEN);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T + BAD_ALIGN, data->PTLEN, out, in);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+
+    /* cipher text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in + BAD_ALIGN, data->CTX, data->PTLEN);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, in + BAD_ALIGN);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+
+    /* plain text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in, data->CTX, data->PTLEN);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out + BAD_ALIGN, in);
+
+    g_assert(memcmp(out + BAD_ALIGN, data->PTX, data->PTLEN) == 0);
+}
+
+
 int main(int argc, char **argv)
 {
     size_t i;
@@ -437,6 +519,10 @@ int main(int argc, char **argv)
             g_test_add_data_func(path, &test_data[i], test_xts_split);
             g_free(path);
         }
+
+        path = g_strdup_printf("%s/unaligned", test_data[i].path);
+        g_test_add_data_func(path, &test_data[i], test_xts_unaligned);
+        g_free(path);
     }
 
     return g_test_run();
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] crypto: introduce a xts_uint128 data type
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 3/8] crypto: introduce a xts_uint128 data type Daniel P. Berrangé
@ 2018-10-16 12:45   ` Alberto Garcia
  0 siblings, 0 replies; 17+ messages in thread
From: Alberto Garcia @ 2018-10-16 12:45 UTC (permalink / raw)
  To: Daniel P. Berrangé, qemu-devel

On Tue 16 Oct 2018 12:09:13 PM CEST, Daniel P. Berrangé wrote:
> The new type is designed to allow use of 64-bit arithmetic instead
> of operating 1-byte at a time. The following patches will use this to
> improve performance.
>
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>

Reviewed-by: Alberto Garcia <berto@igalia.com>

Berto

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type Daniel P. Berrangé
@ 2018-10-16 13:09   ` Alberto Garcia
  2018-10-16 13:51     ` Daniel P. Berrangé
  0 siblings, 1 reply; 17+ messages in thread
From: Alberto Garcia @ 2018-10-16 13:09 UTC (permalink / raw)
  To: Daniel P. Berrangé, qemu-devel

On Tue 16 Oct 2018 12:09:14 PM CEST, Daniel P. Berrangé wrote:

> @@ -110,20 +111,34 @@ void xts_decrypt(const void *datactx,
>      /* encrypt the iv */
>      encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
>  
> -    for (i = 0; i < lim; i++) {
> -        xts_tweak_encdec(datactx, decfunc, src, dst, T.b);
> -
> -        src += XTS_BLOCK_SIZE;
> -        dst += XTS_BLOCK_SIZE;
> +    if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) &&
> +        QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) {
> +        xts_uint128 *S = (xts_uint128 *)src;
> +        xts_uint128 *D = (xts_uint128 *)dst;
> +        for (i = 0; i < lim; i++, S++, D++) {
> +            xts_tweak_encdec(datactx, decfunc, S, D, &T);
> +        }
> +    } else {
> +        xts_uint128 S, D;
> +
> +        for (i = 0; i < lim; i++) {
> +            memcpy(&S, src, XTS_BLOCK_SIZE);
> +            xts_tweak_encdec(datactx, decfunc, &S, &D, &T);
> +            memcpy(dst, &D, XTS_BLOCK_SIZE);
> +            src += XTS_BLOCK_SIZE;
> +            dst += XTS_BLOCK_SIZE;
> +        }

The patch looks good to me, but a couple of comments:

- As far as I can see xts_tweak_encdec() works the same regardless of
  whether src and dst point to the same address or not. As a matter of
  fact both qcrypto_block_decrypt() and qcrypto_block_encrypt() do the
  decryption and encryption in place, and as you can see the
  qcrypto_cipher_*crypt() calls in crypto/block.c pass the same buffer
  as input and output.

  So instead of having S and D you should be fine with just one of them.

- I think this is just a matter of style preference, but in the first
  for loop you can remove the comma operator (i++, S++, D++) and use
  S[i] and D[I] instead in the line after that. I'm fine if you prefer
  the current style, though.

Berto

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x to use xts_uint128 type
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x " Daniel P. Berrangé
@ 2018-10-16 13:35   ` Alberto Garcia
  2018-10-16 13:59     ` Daniel P. Berrangé
  0 siblings, 1 reply; 17+ messages in thread
From: Alberto Garcia @ 2018-10-16 13:35 UTC (permalink / raw)
  To: Daniel P. Berrangé, qemu-devel

On Tue 16 Oct 2018 12:09:15 PM CEST, Daniel P. Berrangé wrote:
> Using 64-bit arithmetic increases the performance for xts-aes-128
> when built with gcrypt:
>
>   Encrypt: 355 MB/s -> 545 MB/s
>   Decrypt: 362 MB/s -> 568 MB/s
>
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>

This patch is also fine, but I have a couple of minor comments:

> +static void xts_mult_x(xts_uint128 *I)
> +{
> +    uint64_t tt;
> +
> +    xts_uint128_cpu_to_les(I);
> +
> +    tt = I->u[0] >> 63;
> +    I->u[0] = I->u[0] << 1;

Perhaps I->u[0] <<= 1 , for clarity and consistency with the following
line (I->u[0] ^= 0x87) ? But I don't mind if you prefer to keep it as is
now.

> +    if (I->u[1] >> 63) {
> +        I->u[0] ^= 0x87;
>      }
> +    I->u[1] = (I->u[1] << 1) | tt;
> +
> +    xts_uint128_le_to_cpus(I);

I think both endianness conversion calls should be flipped. First you
convert from the buffer byte order (LE) to the CPU byte order so you can
do the bit shifts, then back to the original byte order (LE).

Changing this doesn't have any practical effect because both calls
perform the exact same operation, but it documents better what's going
on.

With this changed,

Reviewed-by: Alberto Garcia <berto@igalia.com>

Berto

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type
  2018-10-16 13:09   ` Alberto Garcia
@ 2018-10-16 13:51     ` Daniel P. Berrangé
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 13:51 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel

On Tue, Oct 16, 2018 at 03:09:16PM +0200, Alberto Garcia wrote:
> On Tue 16 Oct 2018 12:09:14 PM CEST, Daniel P. Berrangé wrote:
> 
> > @@ -110,20 +111,34 @@ void xts_decrypt(const void *datactx,
> >      /* encrypt the iv */
> >      encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
> >  
> > -    for (i = 0; i < lim; i++) {
> > -        xts_tweak_encdec(datactx, decfunc, src, dst, T.b);
> > -
> > -        src += XTS_BLOCK_SIZE;
> > -        dst += XTS_BLOCK_SIZE;
> > +    if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) &&
> > +        QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) {
> > +        xts_uint128 *S = (xts_uint128 *)src;
> > +        xts_uint128 *D = (xts_uint128 *)dst;
> > +        for (i = 0; i < lim; i++, S++, D++) {
> > +            xts_tweak_encdec(datactx, decfunc, S, D, &T);
> > +        }
> > +    } else {
> > +        xts_uint128 S, D;
> > +
> > +        for (i = 0; i < lim; i++) {
> > +            memcpy(&S, src, XTS_BLOCK_SIZE);
> > +            xts_tweak_encdec(datactx, decfunc, &S, &D, &T);
> > +            memcpy(dst, &D, XTS_BLOCK_SIZE);
> > +            src += XTS_BLOCK_SIZE;
> > +            dst += XTS_BLOCK_SIZE;
> > +        }
> 
> The patch looks good to me, but a couple of comments:
> 
> - As far as I can see xts_tweak_encdec() works the same regardless of
>   whether src and dst point to the same address or not. As a matter of
>   fact both qcrypto_block_decrypt() and qcrypto_block_encrypt() do the
>   decryption and encryption in place, and as you can see the
>   qcrypto_cipher_*crypt() calls in crypto/block.c pass the same buffer
>   as input and output.
> 
>   So instead of having S and D you should be fine with just one of them.

Yes, I could do that in the 2nd loop.

> 
> - I think this is just a matter of style preference, but in the first
>   for loop you can remove the comma operator (i++, S++, D++) and use
>   S[i] and D[I] instead in the line after that. I'm fine if you prefer
>   the current style, though.

The syntax I used results in slightly more efficient asm code.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x to use xts_uint128 type
  2018-10-16 13:35   ` Alberto Garcia
@ 2018-10-16 13:59     ` Daniel P. Berrangé
  2018-10-16 14:22       ` Alberto Garcia
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel P. Berrangé @ 2018-10-16 13:59 UTC (permalink / raw)
  To: Alberto Garcia; +Cc: qemu-devel

On Tue, Oct 16, 2018 at 03:35:01PM +0200, Alberto Garcia wrote:
> On Tue 16 Oct 2018 12:09:15 PM CEST, Daniel P. Berrangé wrote:
> > Using 64-bit arithmetic increases the performance for xts-aes-128
> > when built with gcrypt:
> >
> >   Encrypt: 355 MB/s -> 545 MB/s
> >   Decrypt: 362 MB/s -> 568 MB/s
> >
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> 
> This patch is also fine, but I have a couple of minor comments:
> 
> > +static void xts_mult_x(xts_uint128 *I)
> > +{
> > +    uint64_t tt;
> > +
> > +    xts_uint128_cpu_to_les(I);
> > +
> > +    tt = I->u[0] >> 63;
> > +    I->u[0] = I->u[0] << 1;
> 
> Perhaps I->u[0] <<= 1 , for clarity and consistency with the following
> line (I->u[0] ^= 0x87) ? But I don't mind if you prefer to keep it as is
> now.

In fact I could do the following:

@@ -59,12 +59,13 @@ static void xts_mult_x(xts_uint128 *I)
     xts_uint128_cpu_to_les(I);
 
     tt = I->u[0] >> 63;
-    I->u[0] = I->u[0] << 1;
+    I->u[0] <<= 1;
 
     if (I->u[1] >> 63) {
         I->u[0] ^= 0x87;
     }
-    I->u[1] = (I->u[1] << 1) | tt;
+    I->u[1] <<= 1;
+    I->u[1] |= tt;
 
     xts_uint128_le_to_cpus(I);
 }

either way it generates the exact same asm code

> 
> > +    if (I->u[1] >> 63) {
> > +        I->u[0] ^= 0x87;
> >      }
> > +    I->u[1] = (I->u[1] << 1) | tt;
> > +
> > +    xts_uint128_le_to_cpus(I);
> 
> I think both endianness conversion calls should be flipped. First you
> convert from the buffer byte order (LE) to the CPU byte order so you can
> do the bit shifts, then back to the original byte order (LE).
> 
> Changing this doesn't have any practical effect because both calls
> perform the exact same operation, but it documents better what's going
> on.

Yep, ok

> With this changed,
> 
> Reviewed-by: Alberto Garcia <berto@igalia.com>

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x to use xts_uint128 type
  2018-10-16 13:59     ` Daniel P. Berrangé
@ 2018-10-16 14:22       ` Alberto Garcia
  0 siblings, 0 replies; 17+ messages in thread
From: Alberto Garcia @ 2018-10-16 14:22 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: qemu-devel

On Tue 16 Oct 2018 03:59:27 PM CEST, Daniel P. Berrangé wrote:
> In fact I could do the following:
>
> @@ -59,12 +59,13 @@ static void xts_mult_x(xts_uint128 *I)
>      xts_uint128_cpu_to_les(I);
>  
>      tt = I->u[0] >> 63;
> -    I->u[0] = I->u[0] << 1;
> +    I->u[0] <<= 1;
>  
>      if (I->u[1] >> 63) {
>          I->u[0] ^= 0x87;
>      }
> -    I->u[1] = (I->u[1] << 1) | tt;
> +    I->u[1] <<= 1;
> +    I->u[1] |= tt;
>  
>      xts_uint128_le_to_cpus(I);
>  }

Yep, that looks good.

Berto

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 7/8] crypto: refactor XTS cipher mode test suite
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 7/8] crypto: refactor XTS cipher mode test suite Daniel P. Berrangé
@ 2018-10-16 14:34   ` Alberto Garcia
  0 siblings, 0 replies; 17+ messages in thread
From: Alberto Garcia @ 2018-10-16 14:34 UTC (permalink / raw)
  To: Daniel P. Berrangé, qemu-devel

On Tue 16 Oct 2018 12:09:17 PM CEST, Daniel P. Berrangé wrote:
> The current XTS test overloads two different tests in a single function
> making the code a little hard to follow. Split it into distinct test
> cases.
>
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>

Reviewed-by: Alberto Garcia <berto@igalia.com>

Berto

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH v2 8/8] crypto: add testing for unaligned buffers with XTS cipher mode
  2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 8/8] crypto: add testing for unaligned buffers with XTS cipher mode Daniel P. Berrangé
@ 2018-10-16 14:50   ` Alberto Garcia
  0 siblings, 0 replies; 17+ messages in thread
From: Alberto Garcia @ 2018-10-16 14:50 UTC (permalink / raw)
  To: Daniel P. Berrangé, qemu-devel

On Tue 16 Oct 2018 12:09:18 PM CEST, Daniel P. Berrangé wrote:
> Validate that the XTS cipher mode will correctly operate with plain
> text, cipher text and IV buffers that are not 64-bit aligned.
>
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>

Nice :)

Reviewed-by: Alberto Garcia <berto@igalia.com>

Berto

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-10-16 14:50 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-16 10:09 [Qemu-devel] [PATCH v2 0/8] crypto: improve performance of XTS cipher mode Daniel P. Berrangé
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 1/8] crypto: expand algorithm coverage for cipher benchmark Daniel P. Berrangé
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 2/8] crypto: remove code duplication in tweak encrypt/decrypt Daniel P. Berrangé
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 3/8] crypto: introduce a xts_uint128 data type Daniel P. Berrangé
2018-10-16 12:45   ` Alberto Garcia
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 4/8] crypto: convert xts_tweak_encdec to use xts_uint128 type Daniel P. Berrangé
2018-10-16 13:09   ` Alberto Garcia
2018-10-16 13:51     ` Daniel P. Berrangé
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 5/8] crypto: convert xts_mult_x " Daniel P. Berrangé
2018-10-16 13:35   ` Alberto Garcia
2018-10-16 13:59     ` Daniel P. Berrangé
2018-10-16 14:22       ` Alberto Garcia
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 6/8] crypto: annotate xts_tweak_encdec as inlineable Daniel P. Berrangé
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 7/8] crypto: refactor XTS cipher mode test suite Daniel P. Berrangé
2018-10-16 14:34   ` Alberto Garcia
2018-10-16 10:09 ` [Qemu-devel] [PATCH v2 8/8] crypto: add testing for unaligned buffers with XTS cipher mode Daniel P. Berrangé
2018-10-16 14:50   ` Alberto Garcia

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.