linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Joakim Tjernlund <joakim.tjernlund@lumentis.se>
To: torvalds@transmeta.com, alan@lxorguk.ukuu.org.uk
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH]  crc32 improvements for 2.5
Date: Wed, 05 Feb 2003 17:23:11 +0100	[thread overview]
Message-ID: <IGEFJKJNHJDCBKALBJLLGEPOFJAA.joakim.tjernlund@lumentis.se> (raw)
In-Reply-To: <1044365707.4067.4.camel@passion.cambridge.redhat.com>

Hi

I did the optimizations in the crc32 patch Brian Murphy submitted a while ago.
Now I have cleaned it up a little and made some more optimizations.

gcc is quite bad at loop optimizations (at least for PPC) so I have
rewritten them to make gcc to generate better code. Even recent gcc's(3.2.x) produces
better code.

Also reduced the unrolling since it did not make a noticeable difference. 

           Joakim Tjernlund

--- lib/crc32.c.org	Thu Jan  9 00:19:02 2003
+++ lib/crc32.c	Tue Feb  4 19:05:01 2003
@@ -87,55 +87,51 @@
 {
 # if CRC_LE_BITS == 8
 	const u32      *b =(u32 *)p;
-	const u32      *e;
-	/* load data 32 bits wide, xor data 32 bits wide. */
+	const u32      *tab = crc32table_le;
 
-	crc = __cpu_to_le32(crc);
-	/* Align it */
-	for ( ; ((long)b)&3 && len ; len--){
-# ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_le[ (crc ^ *((u8 *)b)++) & 0xff ];
-# else
-		crc = (crc<<8) ^ crc32table_le[ crc>>24 ^ *((u8 *)b)++ ];
-# endif
-	}
-	e = (u32 *) ( (u8 *)b + (len & ~7));
-	while (b < e) {
-		crc ^= *b++;
-# ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-# else
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-# endif
-		crc ^= *b++;
 # ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_le[ crc & 0xff ];
+#  define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
+#  define ENDIAN_SHIFT 0
 # else
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_le[ crc >> 24 ];
+#  define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
+#  define ENDIAN_SHIFT 24
 # endif
+
+	crc = __cpu_to_le32(crc);
+	/* Align it */
+	if(unlikely(((long)b)&3 && len)){
+		do {
+			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
+			DO_CRC;
+		} while ((--len) && ((long)b)&3 );
+	}
+	if(likely(len >= 4)){
+		/* load data 32 bits wide, xor data 32 bits wide. */
+		size_t save_len = len & 3;
+	        len = len >> 2;
+		--b; /* use pre increment below(*++b) for speed */
+		do {
+			crc ^= *++b;
+			DO_CRC;
+			DO_CRC;
+			DO_CRC;
+			DO_CRC;
+		} while (--len);
+		b++; /* point to next byte(s) */
+		len = save_len;
 	}
 	/* And the last few bytes */
-	e = (u32 *)((u8 *)b + (len & 7));
-	while (b < e){
-# ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_le[ (crc ^ *((u8 *)b)++) & 0xff ];
-# else
-		crc = (crc<<8) ^ crc32table_le[ crc>>24 ^ *((u8 *)b)++ ];
-# endif
+	if(len){
+		do {
+			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
+			DO_CRC;
+		} while (--len);
 	}
-	return __le32_to_cpu(crc) ;
+
+	return __le32_to_cpu(crc);
+#undef ENDIAN_SHIFT
+#undef DO_CRC
+
 # elif CRC_LE_BITS == 4
 	while (len--) {
 		crc ^= *p++;
@@ -196,55 +192,50 @@
 {
 # if CRC_BE_BITS == 8
 	const u32      *b =(u32 *)p;
-	const u32      *e;
-	/* load data 32 bits wide, xor data 32 bits wide. */
+	const u32      *tab = crc32table_be;
 
-	crc = __cpu_to_be32(crc);
-	/* Align it */
-	for ( ; ((long)b)&3 && len ; len--){
-# ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_be[ (crc ^ *((u8 *)b)++) & 0xff ];
-# else
-		crc = (crc<<8) ^ crc32table_be[ crc>>24 ^ *((u8 *)b)++ ];
-# endif
-	}
-	e = (u32 *) ( (u8 *)b + (len & ~7));
-	while (b < e) {
-		crc ^= *b++;
-# ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-# else
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-# endif
-		crc ^= *b++;
 # ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
-		crc = (crc>>8) ^ crc32table_be[ crc & 0xff ];
+#  define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
+#  define ENDIAN_SHIFT 24
 # else
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
-		crc = (crc<<8) ^ crc32table_be[ crc >> 24 ];
+#  define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
+#  define ENDIAN_SHIFT 0
 # endif
+
+	crc = __cpu_to_be32(crc);
+	/* Align it */
+	if(unlikely(((long)b)&3 && len)){
+		do {
+			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
+			DO_CRC;
+		} while ((--len) && ((long)b)&3 );
+	}
+	if(likely(len >= 4)){
+		/* load data 32 bits wide, xor data 32 bits wide. */
+		size_t save_len = len & 3;
+	        len = len >> 2;
+		--b; /* use pre increment below(*++b) for speed */
+		do {
+			crc ^= *++b;
+			DO_CRC;
+			DO_CRC;
+			DO_CRC;
+			DO_CRC;
+		} while (--len);
+		b++; /* point to next byte(s) */
+		len = save_len;
 	}
 	/* And the last few bytes */
-	e = (u32 *)((u8 *)b + (len & 7));
-	while (b < e){
-# ifdef __LITTLE_ENDIAN
-		crc = (crc>>8) ^ crc32table_be[ (crc ^ *((u8 *)b)++) & 0xff ];
-# else
-		crc = (crc<<8) ^ crc32table_be[ crc>>24 ^ *((u8 *)b)++ ];
-# endif
-	}
-	return __be32_to_cpu(crc) ;
+	if(len){
+		do {
+			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
+			DO_CRC;
+		} while (--len);
+	}
+	return __be32_to_cpu(crc);
+#undef ENDIAN_SHIFT
+#undef DO_CRC
+
 # elif CRC_BE_BITS == 4
 	while (len--) {
 		crc ^= *p++ << 24;



       reply	other threads:[~2003-02-05 16:13 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1044365707.4067.4.camel@passion.cambridge.redhat.com>
2003-02-05 16:23 ` Joakim Tjernlund [this message]
2003-02-13 11:13 ` [PATCH] crc32 improvements for 2.5 [RESEND] Joakim Tjernlund
2003-02-13 17:54   ` Andrew Morton
2003-02-13 21:45     ` Joakim Tjernlund
2003-02-18  9:28 ` [PATCH] crc32 improvements for 2.5, more optimizations Joakim Tjernlund

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=IGEFJKJNHJDCBKALBJLLGEPOFJAA.joakim.tjernlund@lumentis.se \
    --to=joakim.tjernlund@lumentis.se \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).