Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (39 commits) random: Reorder struct entropy_store to remove padding on 64bits padata: update API documentation padata: Remove padata_get_cpumask crypto: pcrypt - Update pcrypt cpumask according to the padata cpumask notifier crypto: pcrypt - Rename pcrypt_instance padata: Pass the padata cpumasks to the cpumask_change_notifier chain padata: Rearrange set_cpumask functions padata: Rename padata_alloc functions crypto: pcrypt - Dont calulate a callback cpu on empty callback cpumask padata: Check for valid cpumasks padata: Allocate cpumask dependend recources in any case padata: Fix cpu index counting crypto: geode_aes - Convert pci_table entries to PCI_VDEVICE (if PCI_ANY_ID is used) pcrypt: Added sysfs interface to pcrypt padata: Added sysfs primitives to padata subsystem padata: Make two separate cpumasks padata: update documentation padata: simplify serialization mechanism padata: make padata_do_parallel to return zero on success padata: Handle empty padata cpumasks ...

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (39 commits) random: Reorder struct entropy_store to remove padding on 64bits padata: update API documentation padata: Remove padata_get_cpumask crypto: pcrypt - Update pcrypt cpumask according to the padata cpumask notifier crypto: pcrypt - Rename pcrypt_instance padata: Pass the padata cpumasks to the cpumask_change_notifier chain padata: Rearrange set_cpumask functions padata: Rename padata_alloc functions crypto: pcrypt - Dont calulate a callback cpu on empty callback cpumask padata: Check for valid cpumasks padata: Allocate cpumask dependend recources in any case padata: Fix cpu index counting crypto: geode_aes - Convert pci_table entries to PCI_VDEVICE (if PCI_ANY_ID is used) pcrypt: Added sysfs interface to pcrypt padata: Added sysfs primitives to padata subsystem padata: Make two separate cpumasks padata: update documentation padata: simplify serialization mechanism padata: make padata_do_parallel to return zero on success padata: Handle empty padata cpumasks ...
Linus Torvalds
2 parents ffd386a9a8 4015d9a865
Showing 24 changed files Side-by-side Diff
Documentation/padata.txt
arch/s390/crypto/Makefile
arch/s390/crypto/crypto_des.h
arch/s390/crypto/des_s390.c
crypto/Kconfig
crypto/Makefile
crypto/algboss.c
crypto/authenc.c
crypto/ctr.c
crypto/pcrypt.c
crypto/testmgr.c
crypto/twofish.c
crypto/twofish_generic.c
crypto/xts.c
drivers/char/hw_random/n2-drv.c
drivers/char/random.c
drivers/crypto/geode-aes.c
drivers/crypto/hifn_795x.c
drivers/crypto/mv_cesa.c
drivers/crypto/n2_core.c
 The padata parallel execution mechanism
-Last updated for 2.6.34
+Last updated for 2.6.36
  
 Padata is a mechanism by which the kernel can farm work out to be done in
 parallel on multiple CPUs while retaining the ordering of tasks.  It was
  
  
  
  
  
  
  
  
@@ -13,32 +13,87 @@
  
     #include <linux/padata.h>
  
-    struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-				         struct workqueue_struct *wq);
+    struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+					 const struct cpumask *pcpumask,
+					 const struct cpumask *cbcpumask);
  
-The cpumask describes which processors will be used to execute work
-submitted to this instance.  The workqueue wq is where the work will
-actually be done; it should be a multithreaded queue, naturally.
+The pcpumask describes which processors will be used to execute work
+submitted to this instance in parallel. The cbcpumask defines which
+processors are allowed to use as the serialization callback processor.
+The workqueue wq is where the work will actually be done; it should be
+a multithreaded queue, naturally.
  
+To allocate a padata instance with the cpu_possible_mask for both
+cpumasks this helper function can be used:
+
+    struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq);
+
+Note: Padata maintains two kinds of cpumasks internally. The user supplied
+cpumasks, submitted by padata_alloc/padata_alloc_possible and the 'usable'
+cpumasks. The usable cpumasks are always the subset of active cpus in the
+user supplied cpumasks, these are the cpumasks padata actually use. So
+it is legal to supply a cpumask to padata that contains offline cpus.
+Once a offline cpu in the user supplied cpumask comes online, padata
+is going to use it.
+
 There are functions for enabling and disabling the instance:
  
-    void padata_start(struct padata_instance *pinst);
+    int padata_start(struct padata_instance *pinst);
     void padata_stop(struct padata_instance *pinst);
  
-These functions literally do nothing beyond setting or clearing the
-"padata_start() was called" flag; if that flag is not set, other functions
-will refuse to work.
+These functions are setting or clearing the "PADATA_INIT" flag;
+if that flag is not set, other functions will refuse to work.
+padata_start returns zero on success (flag set) or -EINVAL if the
+padata cpumask contains no active cpu (flag not set).
+padata_stop clears the flag and blocks until the padata instance
+is unused.
  
 The list of CPUs to be used can be adjusted with these functions:
  
-    int padata_set_cpumask(struct padata_instance *pinst,
+    int padata_set_cpumasks(struct padata_instance *pinst,
+			    cpumask_var_t pcpumask,
+			    cpumask_var_t cbcpumask);
+    int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			   cpumask_var_t cpumask);
-    int padata_add_cpu(struct padata_instance *pinst, int cpu);
-    int padata_remove_cpu(struct padata_instance *pinst, int cpu);
+    int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
+    int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
  
-Changing the CPU mask has the look of an expensive operation, though, so it
-probably should not be done with great frequency.
+Changing the CPU masks are expensive operations, though, so it should not be
+done with great frequency.
  
+It's possible to change both cpumasks of a padata instance with
+padata_set_cpumasks by specifying the cpumasks for parallel execution (pcpumask)
+and for the serial callback function (cbcpumask). padata_set_cpumask is to
+change just one of the cpumasks. Here cpumask_type is one of PADATA_CPU_SERIAL,
+PADATA_CPU_PARALLEL and cpumask specifies the new cpumask to use.
+To simply add or remove one cpu from a certain cpumask the functions
+padata_add_cpu/padata_remove_cpu are used. cpu specifies the cpu to add or
+remove and mask is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL.
+
+If a user is interested in padata cpumask changes, he can register to
+the padata cpumask change notifier:
+
+    int padata_register_cpumask_notifier(struct padata_instance *pinst,
+					 struct notifier_block *nblock);
+
+To unregister from that notifier:
+
+    int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+					   struct notifier_block *nblock);
+
+The padata cpumask change notifier notifies about changes of the usable
+cpumasks, i.e. the subset of active cpus in the user supplied cpumask.
+
+Padata calls the notifier chain with:
+
+    blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
+				 notification_mask,
+				 &pd_new->cpumask);
+
+Here cpumask_change_notifier is registered notifier, notification_mask
+is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL and cpumask is a pointer
+to a struct padata_cpumask that contains the new cpumask informations.
+
 Actually submitting work to the padata instance requires the creation of a
 padata_priv structure:
  
@@ -50,7 +105,7 @@
  
 This structure will almost certainly be embedded within some larger
 structure specific to the work to be done.  Most its fields are private to
-padata, but the structure should be zeroed at initialization time, and the
+padata, but the structure should be zeroed at initialisation time, and the
 parallel() and serial() functions should be provided.  Those functions will
 be called in the process of getting the work done as we will see
 momentarily.
@@ -63,12 +118,10 @@
 The pinst and padata structures must be set up as described above; cb_cpu
 specifies which CPU will be used for the final callback when the work is
 done; it must be in the current instance's CPU mask.  The return value from
-padata_do_parallel() is a little strange; zero is an error return
-indicating that the caller forgot the padata_start() formalities.  -EBUSY
-means that somebody, somewhere else is messing with the instance's CPU
-mask, while -EINVAL is a complaint about cb_cpu not being in that CPU mask.
-If all goes well, this function will return -EINPROGRESS, indicating that
-the work is in progress.
+padata_do_parallel() is zero on success, indicating that the work is in
+progress. -EBUSY means that somebody, somewhere else is messing with the
+instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being
+in that CPU mask or about a not running instance.
  
 Each task submitted to padata_do_parallel() will, in turn, be passed to
 exactly one call to the above-mentioned parallel() function, on one CPU, so
@@ -5,7 +5,7 @@
 obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o des_check_key.o
+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
@@ -15,5 +15,5 @@
  
 extern int crypto_des_check_key(const u8*, unsigned int, u32*);
  
-#endif //__CRYPTO_DES_H__
+#endif /*__CRYPTO_DES_H__*/
@@ -14,32 +14,21 @@
  *
  */
  
-#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
  
 #include "crypt_s390.h"
-#include "crypto_des.h"
  
-#define DES_BLOCK_SIZE 8
-#define DES_KEY_SIZE 8
-
-#define DES3_128_KEY_SIZE	(2 * DES_KEY_SIZE)
-#define DES3_128_BLOCK_SIZE	DES_BLOCK_SIZE
-
 #define DES3_192_KEY_SIZE	(3 * DES_KEY_SIZE)
-#define DES3_192_BLOCK_SIZE	DES_BLOCK_SIZE
  
 struct crypt_s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES_KEY_SIZE];
 };
  
-struct crypt_s390_des3_128_ctx {
-	u8 iv[DES_BLOCK_SIZE];
-	u8 key[DES3_128_KEY_SIZE];
-};
-
 struct crypt_s390_des3_192_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES3_192_KEY_SIZE];
  
@@ -50,13 +39,16 @@
 {
 	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
-	int ret;
+	u32 tmp[DES_EXPKEY_WORDS];
  
-	/* test if key is valid (not a weak key) */
-	ret = crypto_des_check_key(key, keylen, flags);
-	if (ret == 0)
-		memcpy(dctx->key, key, keylen);
-	return ret;
+	/* check for weak keys */
+	if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	memcpy(dctx->key, key, keylen);
+	return 0;
 }
  
 static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
@@ -237,165 +229,6 @@
  *   complementation keys.  Any weakness is obviated by the use of
  *   multiple keys.
  *
- *   However, if the two  independent 64-bit keys are equal,
- *   then the DES3 operation is simply the same as DES.
- *   Implementers MUST reject keys that exhibit this property.
- *
- */
-static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen)
-{
-	int i, ret;
-	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8 *temp_key = key;
-	u32 *flags = &tfm->crt_flags;
-
-	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE)) &&
-	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
-	for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) {
-		ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
-		if (ret < 0)
-			return ret;
-	}
-	memcpy(dctx->key, key, keylen);
-	return 0;
-}
-
-static void des3_128_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-
-	crypt_s390_km(KM_TDEA_128_ENCRYPT, dctx->key, dst, (void*)src,
-		      DES3_128_BLOCK_SIZE);
-}
-
-static void des3_128_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-
-	crypt_s390_km(KM_TDEA_128_DECRYPT, dctx->key, dst, (void*)src,
-		      DES3_128_BLOCK_SIZE);
-}
-
-static struct crypto_alg des3_128_alg = {
-	.cra_name		=	"des3_ede128",
-	.cra_driver_name	=	"des3_ede128-s390",
-	.cra_priority		=	CRYPT_S390_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(des3_128_alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	DES3_128_KEY_SIZE,
-			.cia_max_keysize	=	DES3_128_KEY_SIZE,
-			.cia_setkey		=	des3_128_setkey,
-			.cia_encrypt		=	des3_128_encrypt,
-			.cia_decrypt		=	des3_128_decrypt,
-		}
-	}
-};
-
-static int ecb_des3_128_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk);
-}
-
-static int ecb_des3_128_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk);
-}
-
-static struct crypto_alg ecb_des3_128_alg = {
-	.cra_name		=	"ecb(des3_ede128)",
-	.cra_driver_name	=	"ecb-des3_ede128-s390",
-	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(
-						ecb_des3_128_alg.cra_list),
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_128_KEY_SIZE,
-			.max_keysize		=	DES3_128_KEY_SIZE,
-			.setkey			=	des3_128_setkey,
-			.encrypt		=	ecb_des3_128_encrypt,
-			.decrypt		=	ecb_des3_128_decrypt,
-		}
-	}
-};
-
-static int cbc_des3_128_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk);
-}
-
-static int cbc_des3_128_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk);
-}
-
-static struct crypto_alg cbc_des3_128_alg = {
-	.cra_name		=	"cbc(des3_ede128)",
-	.cra_driver_name	=	"cbc-des3_ede128-s390",
-	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(
-						cbc_des3_128_alg.cra_list),
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_128_KEY_SIZE,
-			.max_keysize		=	DES3_128_KEY_SIZE,
-			.ivsize			=	DES3_128_BLOCK_SIZE,
-			.setkey			=	des3_128_setkey,
-			.encrypt		=	cbc_des3_128_encrypt,
-			.decrypt		=	cbc_des3_128_decrypt,
-		}
-	}
-};
-
-/*
- * RFC2451:
- *
- *   For DES-EDE3, there is no known need to reject weak or
- *   complementation keys.  Any weakness is obviated by the use of
- *   multiple keys.
- *
  *   However, if the first two or last two independent 64-bit keys are
  *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
  *   same as DES.  Implementers MUST reject keys that exhibit this
  
@@ -405,9 +238,7 @@
 static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	int i, ret;
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8 *temp_key = key;
 	u32 *flags = &tfm->crt_flags;
  
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
@@ -417,11 +248,6 @@
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
-	for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) {
-		ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
-		if (ret < 0)
-			return ret;
-	}
 	memcpy(dctx->key, key, keylen);
 	return 0;
 }
@@ -431,7 +257,7 @@
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
  
 	crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
-		      DES3_192_BLOCK_SIZE);
+		      DES_BLOCK_SIZE);
 }
  
 static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -439,7 +265,7 @@
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
  
 	crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
-		      DES3_192_BLOCK_SIZE);
+		      DES_BLOCK_SIZE);
 }
  
 static struct crypto_alg des3_192_alg = {
@@ -447,7 +273,7 @@
 	.cra_driver_name	=	"des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(des3_192_alg.cra_list),
@@ -489,7 +315,7 @@
 	.cra_driver_name	=	"ecb-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
@@ -533,7 +359,7 @@
 	.cra_driver_name	=	"cbc-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
@@ -543,7 +369,7 @@
 		.blkcipher = {
 			.min_keysize		=	DES3_192_KEY_SIZE,
 			.max_keysize		=	DES3_192_KEY_SIZE,
-			.ivsize			=	DES3_192_BLOCK_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
 			.setkey			=	des3_192_setkey,
 			.encrypt		=	cbc_des3_192_encrypt,
 			.decrypt		=	cbc_des3_192_decrypt,
  
@@ -553,10 +379,9 @@
  
 static int des_s390_init(void)
 {
-	int ret = 0;
+	int ret;
  
 	if (!crypt_s390_func_available(KM_DEA_ENCRYPT) ||
-	    !crypt_s390_func_available(KM_TDEA_128_ENCRYPT) ||
 	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
 		return -EOPNOTSUPP;
  
@@ -569,17 +394,6 @@
 	ret = crypto_register_alg(&cbc_des_alg);
 	if (ret)
 		goto cbc_des_err;
-
-	ret = crypto_register_alg(&des3_128_alg);
-	if (ret)
-		goto des3_128_err;
-	ret = crypto_register_alg(&ecb_des3_128_alg);
-	if (ret)
-		goto ecb_des3_128_err;
-	ret = crypto_register_alg(&cbc_des3_128_alg);
-	if (ret)
-		goto cbc_des3_128_err;
-
 	ret = crypto_register_alg(&des3_192_alg);
 	if (ret)
 		goto des3_192_err;
@@ -589,7 +403,6 @@
 	ret = crypto_register_alg(&cbc_des3_192_alg);
 	if (ret)
 		goto cbc_des3_192_err;
-
 out:
 	return ret;
  
@@ -598,12 +411,6 @@
 ecb_des3_192_err:
 	crypto_unregister_alg(&des3_192_alg);
 des3_192_err:
-	crypto_unregister_alg(&cbc_des3_128_alg);
-cbc_des3_128_err:
-	crypto_unregister_alg(&ecb_des3_128_alg);
-ecb_des3_128_err:
-	crypto_unregister_alg(&des3_128_alg);
-des3_128_err:
 	crypto_unregister_alg(&cbc_des_alg);
 cbc_des_err:
 	crypto_unregister_alg(&ecb_des_alg);
  
  
@@ -613,21 +420,18 @@
 	goto out;
 }
  
-static void __exit des_s390_fini(void)
+static void __exit des_s390_exit(void)
 {
 	crypto_unregister_alg(&cbc_des3_192_alg);
 	crypto_unregister_alg(&ecb_des3_192_alg);
 	crypto_unregister_alg(&des3_192_alg);
-	crypto_unregister_alg(&cbc_des3_128_alg);
-	crypto_unregister_alg(&ecb_des3_128_alg);
-	crypto_unregister_alg(&des3_128_alg);
 	crypto_unregister_alg(&cbc_des_alg);
 	crypto_unregister_alg(&ecb_des_alg);
 	crypto_unregister_alg(&des_alg);
 }
  
 module_init(des_s390_init);
-module_exit(des_s390_fini);
+module_exit(des_s390_exit);
  
 MODULE_ALIAS("des");
 MODULE_ALIAS("des3_ede");
@@ -80,6 +80,11 @@
  
 config CRYPTO_PCOMP
 	tristate
+	select CRYPTO_PCOMP2
+	select CRYPTO_ALGAPI
+
+config CRYPTO_PCOMP2
+	tristate
 	select CRYPTO_ALGAPI2
  
 config CRYPTO_MANAGER
@@ -94,7 +99,15 @@
 	select CRYPTO_AEAD2
 	select CRYPTO_HASH2
 	select CRYPTO_BLKCIPHER2
-	select CRYPTO_PCOMP
+	select CRYPTO_PCOMP2
+
+config CRYPTO_MANAGER_TESTS
+	bool "Run algolithms' self-tests"
+	default y
+	depends on CRYPTO_MANAGER2
+	help
+	  Run cryptomanager's tests for the new crypto algorithms being
+	  registered.
  
 config CRYPTO_GF128MUL
 	tristate "GF(2^128) multiplication functions (EXPERIMENTAL)"
@@ -26,7 +26,7 @@
 crypto_hash-objs += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
  
-obj-$(CONFIG_CRYPTO_PCOMP) += pcompress.o
+obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
  
 cryptomgr-objs := algboss.o testmgr.o
  
@@ -61,7 +61,7 @@
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
-obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
+obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
 obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
@@ -206,6 +206,7 @@
 	return NOTIFY_OK;
 }
  
+#ifdef CONFIG_CRYPTO_MANAGER_TESTS
 static int cryptomgr_test(void *data)
 {
 	struct crypto_test_param *param = data;
@@ -266,6 +267,7 @@
 err:
 	return NOTIFY_OK;
 }
+#endif /* CONFIG_CRYPTO_MANAGER_TESTS */
  
 static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
 			    void *data)
  
@@ -273,8 +275,10 @@
 	switch (msg) {
 	case CRYPTO_MSG_ALG_REQUEST:
 		return cryptomgr_schedule_probe(data);
+#ifdef CONFIG_CRYPTO_MANAGER_TESTS
 	case CRYPTO_MSG_ALG_REGISTER:
 		return cryptomgr_schedule_test(data);
+#endif
 	}
  
 	return NOTIFY_DONE;
@@ -616,7 +616,7 @@
 	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
 			       CRYPTO_ALG_TYPE_AHASH_MASK);
 	if (IS_ERR(auth))
-		return ERR_PTR(PTR_ERR(auth));
+		return ERR_CAST(auth);
  
 	auth_base = &auth->base;
  
@@ -185,7 +185,7 @@
 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
 	if (IS_ERR(alg))
-		return ERR_PTR(PTR_ERR(alg));
+		return ERR_CAST(alg);
  
 	/* Block size must be >= 4 bytes. */
 	err = -EINVAL;
@@ -24,13 +24,41 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/kobject.h>
+#include <linux/cpu.h>
 #include <crypto/pcrypt.h>
  
-static struct padata_instance *pcrypt_enc_padata;
-static struct padata_instance *pcrypt_dec_padata;
-static struct workqueue_struct *encwq;
-static struct workqueue_struct *decwq;
+struct padata_pcrypt {
+	struct padata_instance *pinst;
+	struct workqueue_struct *wq;
  
+	/*
+	 * Cpumask for callback CPUs. It should be
+	 * equal to serial cpumask of corresponding padata instance,
+	 * so it is updated when padata notifies us about serial
+	 * cpumask change.
+	 *
+	 * cb_cpumask is protected by RCU. This fact prevents us from
+	 * using cpumask_var_t directly because the actual type of
+	 * cpumsak_var_t depends on kernel configuration(particularly on
+	 * CONFIG_CPUMASK_OFFSTACK macro). Depending on the configuration
+	 * cpumask_var_t may be either a pointer to the struct cpumask
+	 * or a variable allocated on the stack. Thus we can not safely use
+	 * cpumask_var_t with RCU operations such as rcu_assign_pointer or
+	 * rcu_dereference. So cpumask_var_t is wrapped with struct
+	 * pcrypt_cpumask which makes possible to use it with RCU.
+	 */
+	struct pcrypt_cpumask {
+		cpumask_var_t mask;
+	} *cb_cpumask;
+	struct notifier_block nblock;
+};
+
+static struct padata_pcrypt pencrypt;
+static struct padata_pcrypt pdecrypt;
+static struct kset           *pcrypt_kset;
+
 struct pcrypt_instance_ctx {
 	struct crypto_spawn spawn;
 	unsigned int tfm_count;
  
  
  
  
  
  
@@ -42,25 +70,32 @@
 };
  
 static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
-			      struct padata_instance *pinst)
+			      struct padata_pcrypt *pcrypt)
 {
 	unsigned int cpu_index, cpu, i;
+	struct pcrypt_cpumask *cpumask;
  
 	cpu = *cb_cpu;
  
-	if (cpumask_test_cpu(cpu, cpu_active_mask))
+	rcu_read_lock_bh();
+	cpumask = rcu_dereference(pcrypt->cb_cpumask);
+	if (cpumask_test_cpu(cpu, cpumask->mask))
 			goto out;
  
-	cpu_index = cpu % cpumask_weight(cpu_active_mask);
+	if (!cpumask_weight(cpumask->mask))
+			goto out;
  
-	cpu = cpumask_first(cpu_active_mask);
+	cpu_index = cpu % cpumask_weight(cpumask->mask);
+
+	cpu = cpumask_first(cpumask->mask);
 	for (i = 0; i < cpu_index; i++)
-		cpu = cpumask_next(cpu, cpu_active_mask);
+		cpu = cpumask_next(cpu, cpumask->mask);
  
 	*cb_cpu = cpu;
  
 out:
-	return padata_do_parallel(pinst, padata, cpu);
+	rcu_read_unlock_bh();
+	return padata_do_parallel(pcrypt->pinst, padata, cpu);
 }
  
 static int pcrypt_aead_setkey(struct crypto_aead *parent,
@@ -142,11 +177,9 @@
 			       req->cryptlen, req->iv);
 	aead_request_set_assoc(creq, req->assoc, req->assoclen);
  
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
-	if (err)
-		return err;
-	else
-		err = crypto_aead_encrypt(creq);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
+	if (!err)
+		return -EINPROGRESS;
  
 	return err;
 }
@@ -186,11 +219,9 @@
 			       req->cryptlen, req->iv);
 	aead_request_set_assoc(creq, req->assoc, req->assoclen);
  
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_dec_padata);
-	if (err)
-		return err;
-	else
-		err = crypto_aead_decrypt(creq);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
+	if (!err)
+		return -EINPROGRESS;
  
 	return err;
 }
@@ -232,11 +263,9 @@
 	aead_givcrypt_set_assoc(creq, areq->assoc, areq->assoclen);
 	aead_givcrypt_set_giv(creq, req->giv, req->seq);
  
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
-	if (err)
-		return err;
-	else
-		err = crypto_aead_givencrypt(creq);
+	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
+	if (!err)
+		return -EINPROGRESS;
  
 	return err;
 }
@@ -376,6 +405,115 @@
 	kfree(inst);
 }
  
+static int pcrypt_cpumask_change_notify(struct notifier_block *self,
+					unsigned long val, void *data)
+{
+	struct padata_pcrypt *pcrypt;
+	struct pcrypt_cpumask *new_mask, *old_mask;
+	struct padata_cpumask *cpumask = (struct padata_cpumask *)data;
+
+	if (!(val & PADATA_CPU_SERIAL))
+		return 0;
+
+	pcrypt = container_of(self, struct padata_pcrypt, nblock);
+	new_mask = kmalloc(sizeof(*new_mask), GFP_KERNEL);
+	if (!new_mask)
+		return -ENOMEM;
+	if (!alloc_cpumask_var(&new_mask->mask, GFP_KERNEL)) {
+		kfree(new_mask);
+		return -ENOMEM;
+	}
+
+	old_mask = pcrypt->cb_cpumask;
+
+	cpumask_copy(new_mask->mask, cpumask->cbcpu);
+	rcu_assign_pointer(pcrypt->cb_cpumask, new_mask);
+	synchronize_rcu_bh();
+
+	free_cpumask_var(old_mask->mask);
+	kfree(old_mask);
+	return 0;
+}
+
+static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name)
+{
+	int ret;
+
+	pinst->kobj.kset = pcrypt_kset;
+	ret = kobject_add(&pinst->kobj, NULL, name);
+	if (!ret)
+		kobject_uevent(&pinst->kobj, KOBJ_ADD);
+
+	return ret;
+}
+
+static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
+			      const char *name)
+{
+	int ret = -ENOMEM;
+	struct pcrypt_cpumask *mask;
+
+	get_online_cpus();
+
+	pcrypt->wq = create_workqueue(name);
+	if (!pcrypt->wq)
+		goto err;
+
+	pcrypt->pinst = padata_alloc_possible(pcrypt->wq);
+	if (!pcrypt->pinst)
+		goto err_destroy_workqueue;
+
+	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+	if (!mask)
+		goto err_free_padata;
+	if (!alloc_cpumask_var(&mask->mask, GFP_KERNEL)) {
+		kfree(mask);
+		goto err_free_padata;
+	}
+
+	cpumask_and(mask->mask, cpu_possible_mask, cpu_active_mask);
+	rcu_assign_pointer(pcrypt->cb_cpumask, mask);
+
+	pcrypt->nblock.notifier_call = pcrypt_cpumask_change_notify;
+	ret = padata_register_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	if (ret)
+		goto err_free_cpumask;
+
+	ret = pcrypt_sysfs_add(pcrypt->pinst, name);
+	if (ret)
+		goto err_unregister_notifier;
+
+	put_online_cpus();
+
+	return ret;
+
+err_unregister_notifier:
+	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+err_free_cpumask:
+	free_cpumask_var(mask->mask);
+	kfree(mask);
+err_free_padata:
+	padata_free(pcrypt->pinst);
+err_destroy_workqueue:
+	destroy_workqueue(pcrypt->wq);
+err:
+	put_online_cpus();
+
+	return ret;
+}
+
+static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
+{
+	kobject_put(&pcrypt->pinst->kobj);
+	free_cpumask_var(pcrypt->cb_cpumask->mask);
+	kfree(pcrypt->cb_cpumask);
+
+	padata_stop(pcrypt->pinst);
+	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	destroy_workqueue(pcrypt->wq);
+	padata_free(pcrypt->pinst);
+}
+
 static struct crypto_template pcrypt_tmpl = {
 	.name = "pcrypt",
 	.alloc = pcrypt_alloc,
  
  
  
  
  
  
  
  
@@ -385,52 +523,39 @@
  
 static int __init pcrypt_init(void)
 {
-	encwq = create_workqueue("pencrypt");
-	if (!encwq)
+	int err = -ENOMEM;
+
+	pcrypt_kset = kset_create_and_add("pcrypt", NULL, kernel_kobj);
+	if (!pcrypt_kset)
 		goto err;
  
-	decwq = create_workqueue("pdecrypt");
-	if (!decwq)
-		goto err_destroy_encwq;
+	err = pcrypt_init_padata(&pencrypt, "pencrypt");
+	if (err)
+		goto err_unreg_kset;
  
+	err = pcrypt_init_padata(&pdecrypt, "pdecrypt");
+	if (err)
+		goto err_deinit_pencrypt;
  
-	pcrypt_enc_padata = padata_alloc(cpu_possible_mask, encwq);
-	if (!pcrypt_enc_padata)
-		goto err_destroy_decwq;
+	padata_start(pencrypt.pinst);
+	padata_start(pdecrypt.pinst);
  
-	pcrypt_dec_padata = padata_alloc(cpu_possible_mask, decwq);
-	if (!pcrypt_dec_padata)
-		goto err_free_padata;
-
-	padata_start(pcrypt_enc_padata);
-	padata_start(pcrypt_dec_padata);
-
 	return crypto_register_template(&pcrypt_tmpl);
  
-err_free_padata:
-	padata_free(pcrypt_enc_padata);
-
-err_destroy_decwq:
-	destroy_workqueue(decwq);
-
-err_destroy_encwq:
-	destroy_workqueue(encwq);
-
+err_deinit_pencrypt:
+	pcrypt_fini_padata(&pencrypt);
+err_unreg_kset:
+	kset_unregister(pcrypt_kset);
 err:
-	return -ENOMEM;
+	return err;
 }
  
 static void __exit pcrypt_exit(void)
 {
-	padata_stop(pcrypt_enc_padata);
-	padata_stop(pcrypt_dec_padata);
+	pcrypt_fini_padata(&pencrypt);
+	pcrypt_fini_padata(&pdecrypt);
  
-	destroy_workqueue(encwq);
-	destroy_workqueue(decwq);
-
-	padata_free(pcrypt_enc_padata);
-	padata_free(pcrypt_dec_padata);
-
+	kset_unregister(pcrypt_kset);
 	crypto_unregister_template(&pcrypt_tmpl);
 }
  
@@ -22,6 +22,17 @@
 #include <crypto/rng.h>
  
 #include "internal.h"
+
+#ifndef CONFIG_CRYPTO_MANAGER_TESTS
+
+/* a perfect nop */
+int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
+{
+	return 0;
+}
+
+#else
+
 #include "testmgr.h"
  
 /*
@@ -2530,5 +2541,8 @@
 non_fips_alg:
 	return -EINVAL;
 }
+
+#endif /* CONFIG_CRYPTO_MANAGER_TESTS */
+
 EXPORT_SYMBOL_GPL(alg_test);
-/*
- * Twofish for CryptoAPI
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors 
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <asm/byteorder.h>
-#include <crypto/twofish.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include <linux/bitops.h>
-
-/* Macros to compute the g() function in the encryption and decryption
- * rounds.  G1 is the straight g() function; G2 includes the 8-bit
- * rotation for the high 32-bit word. */
-
-#define G1(a) \
-     (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \
-   ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24])
-
-#define G2(b) \
-     (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \
-   ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24])
-
-/* Encryption and decryption Feistel rounds.  Each one calls the two g()
- * macros, does the PHT, and performs the XOR and the appropriate bit
- * rotations.  The parameters are the round number (used to select subkeys),
- * and the four 32-bit chunks of the text. */
-
-#define ENCROUND(n, a, b, c, d) \
-   x = G1 (a); y = G2 (b); \
-   x += y; y += x + ctx->k[2 * (n) + 1]; \
-   (c) ^= x + ctx->k[2 * (n)]; \
-   (c) = ror32((c), 1); \
-   (d) = rol32((d), 1) ^ y
-
-#define DECROUND(n, a, b, c, d) \
-   x = G1 (a); y = G2 (b); \
-   x += y; y += x; \
-   (d) ^= y + ctx->k[2 * (n) + 1]; \
-   (d) = ror32((d), 1); \
-   (c) = rol32((c), 1); \
-   (c) ^= (x + ctx->k[2 * (n)])
-
-/* Encryption and decryption cycles; each one is simply two Feistel rounds
- * with the 32-bit chunks re-ordered to simulate the "swap" */
-
-#define ENCCYCLE(n) \
-   ENCROUND (2 * (n), a, b, c, d); \
-   ENCROUND (2 * (n) + 1, c, d, a, b)
-
-#define DECCYCLE(n) \
-   DECROUND (2 * (n) + 1, c, d, a, b); \
-   DECROUND (2 * (n), a, b, c, d)
-
-/* Macros to convert the input and output bytes into 32-bit words,
- * and simultaneously perform the whitening step.  INPACK packs word
- * number n into the variable named by x, using whitening subkey number m.
- * OUTUNPACK unpacks word number n from the variable named by x, using
- * whitening subkey number m. */
-
-#define INPACK(n, x, m) \
-   x = le32_to_cpu(src[n]) ^ ctx->w[m]
-
-#define OUTUNPACK(n, x, m) \
-   x ^= ctx->w[m]; \
-   dst[n] = cpu_to_le32(x)
-
-
-
-/* Encrypt one block.  in and out may be the same. */
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
-	const __le32 *src = (const __le32 *)in;
-	__le32 *dst = (__le32 *)out;
-
-	/* The four 32-bit chunks of the text. */
-	u32 a, b, c, d;
-	
-	/* Temporaries used by the round function. */
-	u32 x, y;
-
-	/* Input whitening and packing. */
-	INPACK (0, a, 0);
-	INPACK (1, b, 1);
-	INPACK (2, c, 2);
-	INPACK (3, d, 3);
-	
-	/* Encryption Feistel cycles. */
-	ENCCYCLE (0);
-	ENCCYCLE (1);
-	ENCCYCLE (2);
-	ENCCYCLE (3);
-	ENCCYCLE (4);
-	ENCCYCLE (5);
-	ENCCYCLE (6);
-	ENCCYCLE (7);
-	
-	/* Output whitening and unpacking. */
-	OUTUNPACK (0, c, 4);
-	OUTUNPACK (1, d, 5);
-	OUTUNPACK (2, a, 6);
-	OUTUNPACK (3, b, 7);
-	
-}
-
-/* Decrypt one block.  in and out may be the same. */
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
-	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
-	const __le32 *src = (const __le32 *)in;
-	__le32 *dst = (__le32 *)out;
-  
-	/* The four 32-bit chunks of the text. */
-	u32 a, b, c, d;
-	
-	/* Temporaries used by the round function. */
-	u32 x, y;
-	
-	/* Input whitening and packing. */
-	INPACK (0, c, 4);
-	INPACK (1, d, 5);
-	INPACK (2, a, 6);
-	INPACK (3, b, 7);
-	
-	/* Encryption Feistel cycles. */
-	DECCYCLE (7);
-	DECCYCLE (6);
-	DECCYCLE (5);
-	DECCYCLE (4);
-	DECCYCLE (3);
-	DECCYCLE (2);
-	DECCYCLE (1);
-	DECCYCLE (0);
-
-	/* Output whitening and unpacking. */
-	OUTUNPACK (0, a, 0);
-	OUTUNPACK (1, b, 1);
-	OUTUNPACK (2, c, 2);
-	OUTUNPACK (3, d, 3);
-
-}
-
-static struct crypto_alg alg = {
-	.cra_name           =   "twofish",
-	.cra_driver_name    =   "twofish-generic",
-	.cra_priority       =   100,
-	.cra_flags          =   CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize      =   TF_BLOCK_SIZE,
-	.cra_ctxsize        =   sizeof(struct twofish_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
-	.cra_u              =   { .cipher = {
-	.cia_min_keysize    =   TF_MIN_KEY_SIZE,
-	.cia_max_keysize    =   TF_MAX_KEY_SIZE,
-	.cia_setkey         =   twofish_setkey,
-	.cia_encrypt        =   twofish_encrypt,
-	.cia_decrypt        =   twofish_decrypt } }
-};
-
-static int __init twofish_mod_init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit twofish_mod_fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(twofish_mod_init);
-module_exit(twofish_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm");
+/*
+ * Twofish for CryptoAPI
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors 
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <asm/byteorder.h>
+#include <crypto/twofish.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <linux/bitops.h>
+
+/* Macros to compute the g() function in the encryption and decryption
+ * rounds.  G1 is the straight g() function; G2 includes the 8-bit
+ * rotation for the high 32-bit word. */
+
+#define G1(a) \
+     (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \
+   ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24])
+
+#define G2(b) \
+     (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \
+   ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24])
+
+/* Encryption and decryption Feistel rounds.  Each one calls the two g()
+ * macros, does the PHT, and performs the XOR and the appropriate bit
+ * rotations.  The parameters are the round number (used to select subkeys),
+ * and the four 32-bit chunks of the text. */
+
+#define ENCROUND(n, a, b, c, d) \
+   x = G1 (a); y = G2 (b); \
+   x += y; y += x + ctx->k[2 * (n) + 1]; \
+   (c) ^= x + ctx->k[2 * (n)]; \
+   (c) = ror32((c), 1); \
+   (d) = rol32((d), 1) ^ y
+
+#define DECROUND(n, a, b, c, d) \
+   x = G1 (a); y = G2 (b); \
+   x += y; y += x; \
+   (d) ^= y + ctx->k[2 * (n) + 1]; \
+   (d) = ror32((d), 1); \
+   (c) = rol32((c), 1); \
+   (c) ^= (x + ctx->k[2 * (n)])
+
+/* Encryption and decryption cycles; each one is simply two Feistel rounds
+ * with the 32-bit chunks re-ordered to simulate the "swap" */
+
+#define ENCCYCLE(n) \
+   ENCROUND (2 * (n), a, b, c, d); \
+   ENCROUND (2 * (n) + 1, c, d, a, b)
+
+#define DECCYCLE(n) \
+   DECROUND (2 * (n) + 1, c, d, a, b); \
+   DECROUND (2 * (n), a, b, c, d)
+
+/* Macros to convert the input and output bytes into 32-bit words,
+ * and simultaneously perform the whitening step.  INPACK packs word
+ * number n into the variable named by x, using whitening subkey number m.
+ * OUTUNPACK unpacks word number n from the variable named by x, using
+ * whitening subkey number m. */
+
+#define INPACK(n, x, m) \
+   x = le32_to_cpu(src[n]) ^ ctx->w[m]
+
+#define OUTUNPACK(n, x, m) \
+   x ^= ctx->w[m]; \
+   dst[n] = cpu_to_le32(x)
+
+
+
+/* Encrypt one block.  in and out may be the same. */
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
+	const __le32 *src = (const __le32 *)in;
+	__le32 *dst = (__le32 *)out;
+
+	/* The four 32-bit chunks of the text. */
+	u32 a, b, c, d;
+	
+	/* Temporaries used by the round function. */
+	u32 x, y;
+
+	/* Input whitening and packing. */
+	INPACK (0, a, 0);
+	INPACK (1, b, 1);
+	INPACK (2, c, 2);
+	INPACK (3, d, 3);
+	
+	/* Encryption Feistel cycles. */
+	ENCCYCLE (0);
+	ENCCYCLE (1);
+	ENCCYCLE (2);
+	ENCCYCLE (3);
+	ENCCYCLE (4);
+	ENCCYCLE (5);
+	ENCCYCLE (6);
+	ENCCYCLE (7);
+	
+	/* Output whitening and unpacking. */
+	OUTUNPACK (0, c, 4);
+	OUTUNPACK (1, d, 5);
+	OUTUNPACK (2, a, 6);
+	OUTUNPACK (3, b, 7);
+	
+}
+
+/* Decrypt one block.  in and out may be the same. */
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
+	const __le32 *src = (const __le32 *)in;
+	__le32 *dst = (__le32 *)out;
+  
+	/* The four 32-bit chunks of the text. */
+	u32 a, b, c, d;
+	
+	/* Temporaries used by the round function. */
+	u32 x, y;
+	
+	/* Input whitening and packing. */
+	INPACK (0, c, 4);
+	INPACK (1, d, 5);
+	INPACK (2, a, 6);
+	INPACK (3, b, 7);
+	
+	/* Encryption Feistel cycles. */
+	DECCYCLE (7);
+	DECCYCLE (6);
+	DECCYCLE (5);
+	DECCYCLE (4);
+	DECCYCLE (3);
+	DECCYCLE (2);
+	DECCYCLE (1);
+	DECCYCLE (0);
+
+	/* Output whitening and unpacking. */
+	OUTUNPACK (0, a, 0);
+	OUTUNPACK (1, b, 1);
+	OUTUNPACK (2, c, 2);
+	OUTUNPACK (3, d, 3);
+
+}
+
+static struct crypto_alg alg = {
+	.cra_name           =   "twofish",
+	.cra_driver_name    =   "twofish-generic",
+	.cra_priority       =   100,
+	.cra_flags          =   CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize      =   TF_BLOCK_SIZE,
+	.cra_ctxsize        =   sizeof(struct twofish_ctx),
+	.cra_alignmask      =	3,
+	.cra_module         =   THIS_MODULE,
+	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
+	.cra_u              =   { .cipher = {
+	.cia_min_keysize    =   TF_MIN_KEY_SIZE,
+	.cia_max_keysize    =   TF_MAX_KEY_SIZE,
+	.cia_setkey         =   twofish_setkey,
+	.cia_encrypt        =   twofish_encrypt,
+	.cia_decrypt        =   twofish_decrypt } }
+};
+
+static int __init twofish_mod_init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit twofish_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(twofish_mod_init);
+module_exit(twofish_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm");
+MODULE_ALIAS("twofish");
@@ -224,7 +224,7 @@
 	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
 	if (IS_ERR(alg))
-		return ERR_PTR(PTR_ERR(alg));
+		return ERR_CAST(alg);
  
 	inst = crypto_alloc_instance("xts", alg);
 	if (IS_ERR(inst))
@@ -387,7 +387,7 @@
  
 static int n2rng_data_read(struct hwrng *rng, u32 *data)
 {
-	struct n2rng *np = (struct n2rng *) rng->priv;
+	struct n2rng *np = rng->priv;
 	unsigned long ra = __pa(&np->test_data);
 	int len;
  
@@ -407,8 +407,8 @@
 	struct poolinfo *poolinfo;
 	__u32 *pool;
 	const char *name;
-	int limit;
 	struct entropy_store *pull;
+	int limit;
  
 	/* read-write data: */
 	spinlock_t lock;
@@ -573,7 +573,7 @@
 }
  
 static struct pci_device_id geode_aes_tbl[] = {
-	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES, PCI_ANY_ID, PCI_ANY_ID} ,
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_LX_AES), } ,
 	{ 0, }
 };
  
@@ -2018,7 +2018,6 @@
 {
 	unsigned long flags;
 	struct crypto_async_request *async_req;
-	struct hifn_context *ctx;
 	struct ablkcipher_request *req;
 	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
 	int i;
@@ -2035,7 +2034,6 @@
  
 	spin_lock_irqsave(&dev->lock, flags);
 	while ((async_req = crypto_dequeue_request(&dev->queue))) {
-		ctx = crypto_tfm_ctx(async_req->tfm);
 		req = container_of(async_req, struct ablkcipher_request, base);
 		spin_unlock_irqrestore(&dev->lock, flags);
  
@@ -2139,7 +2137,6 @@
 static int hifn_process_queue(struct hifn_device *dev)
 {
 	struct crypto_async_request *async_req, *backlog;
-	struct hifn_context *ctx;
 	struct ablkcipher_request *req;
 	unsigned long flags;
 	int err = 0;
@@ -2156,7 +2153,6 @@
 		if (backlog)
 			backlog->complete(backlog, -EINPROGRESS);
  
-		ctx = crypto_tfm_ctx(async_req->tfm);
 		req = container_of(async_req, struct ablkcipher_request, base);
  
 		err = hifn_handle_req(req);
@@ -1055,20 +1055,20 @@
 	cp->queue_th = kthread_run(queue_manag, cp, "mv_crypto");
 	if (IS_ERR(cp->queue_th)) {
 		ret = PTR_ERR(cp->queue_th);
-		goto err_thread;
+		goto err_unmap_sram;
 	}
  
 	ret = request_irq(irq, crypto_int, IRQF_DISABLED, dev_name(&pdev->dev),
 			cp);
 	if (ret)
-		goto err_unmap_sram;
+		goto err_thread;
  
 	writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
 	writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG);
  
 	ret = crypto_register_alg(&mv_aes_alg_ecb);
 	if (ret)
-		goto err_reg;
+		goto err_irq;
  
 	ret = crypto_register_alg(&mv_aes_alg_cbc);
 	if (ret)
  
@@ -1091,9 +1091,9 @@
 	return 0;
 err_unreg_ecb:
 	crypto_unregister_alg(&mv_aes_alg_ecb);
-err_thread:
+err_irq:
 	free_irq(irq, cp);
-err_reg:
+err_thread:
 	kthread_stop(cp->queue_th);
 err_unmap_sram:
 	iounmap(cp->sram);
@@ -239,21 +239,57 @@
 }
 #endif
  
-struct n2_base_ctx {
-	struct list_head		list;
+struct n2_ahash_alg {
+	struct list_head	entry;
+	const char		*hash_zero;
+	const u32		*hash_init;
+	u8			hw_op_hashsz;
+	u8			digest_size;
+	u8			auth_type;
+	u8			hmac_type;
+	struct ahash_alg	alg;
 };
  
-static void n2_base_ctx_init(struct n2_base_ctx *ctx)
+static inline struct n2_ahash_alg *n2_ahash_alg(struct crypto_tfm *tfm)
 {
-	INIT_LIST_HEAD(&ctx->list);
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct ahash_alg *ahash_alg;
+
+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
+
+	return container_of(ahash_alg, struct n2_ahash_alg, alg);
 }
  
-struct n2_hash_ctx {
-	struct n2_base_ctx		base;
+struct n2_hmac_alg {
+	const char		*child_alg;
+	struct n2_ahash_alg	derived;
+};
  
+static inline struct n2_hmac_alg *n2_hmac_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct ahash_alg *ahash_alg;
+
+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
+
+	return container_of(ahash_alg, struct n2_hmac_alg, derived.alg);
+}
+
+struct n2_hash_ctx {
 	struct crypto_ahash		*fallback_tfm;
 };
  
+#define N2_HASH_KEY_MAX			32 /* HW limit for all HMAC requests */
+
+struct n2_hmac_ctx {
+	struct n2_hash_ctx		base;
+
+	struct crypto_shash		*child_shash;
+
+	int				hash_key_len;
+	unsigned char			hash_key[N2_HASH_KEY_MAX];
+};
+
 struct n2_hash_req_ctx {
 	union {
 		struct md5_state	md5;
@@ -261,9 +297,6 @@
 		struct sha256_state	sha256;
 	} u;
  
-	unsigned char			hash_key[64];
-	unsigned char			keyed_zero_hash[32];
-
 	struct ahash_request		fallback_req;
 };
  
@@ -356,6 +389,94 @@
 	crypto_free_ahash(ctx->fallback_tfm);
 }
  
+static int n2_hmac_cra_init(struct crypto_tfm *tfm)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct n2_hmac_alg *n2alg = n2_hmac_alg(tfm);
+	struct crypto_ahash *fallback_tfm;
+	struct crypto_shash *child_shash;
+	int err;
+
+	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		pr_warning("Fallback driver '%s' could not be loaded!\n",
+			   fallback_driver_name);
+		err = PTR_ERR(fallback_tfm);
+		goto out;
+	}
+
+	child_shash = crypto_alloc_shash(n2alg->child_alg, 0, 0);
+	if (IS_ERR(child_shash)) {
+		pr_warning("Child shash '%s' could not be loaded!\n",
+			   n2alg->child_alg);
+		err = PTR_ERR(child_shash);
+		goto out_free_fallback;
+	}
+
+	crypto_ahash_set_reqsize(ahash, (sizeof(struct n2_hash_req_ctx) +
+					 crypto_ahash_reqsize(fallback_tfm)));
+
+	ctx->child_shash = child_shash;
+	ctx->base.fallback_tfm = fallback_tfm;
+	return 0;
+
+out_free_fallback:
+	crypto_free_ahash(fallback_tfm);
+
+out:
+	return err;
+}
+
+static void n2_hmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(ahash);
+
+	crypto_free_ahash(ctx->base.fallback_tfm);
+	crypto_free_shash(ctx->child_shash);
+}
+
+static int n2_hmac_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+				unsigned int keylen)
+{
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct crypto_shash *child_shash = ctx->child_shash;
+	struct crypto_ahash *fallback_tfm;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(child_shash)];
+	} desc;
+	int err, bs, ds;
+
+	fallback_tfm = ctx->base.fallback_tfm;
+	err = crypto_ahash_setkey(fallback_tfm, key, keylen);
+	if (err)
+		return err;
+
+	desc.shash.tfm = child_shash;
+	desc.shash.flags = crypto_ahash_get_flags(tfm) &
+		CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	bs = crypto_shash_blocksize(child_shash);
+	ds = crypto_shash_digestsize(child_shash);
+	BUG_ON(ds > N2_HASH_KEY_MAX);
+	if (keylen > bs) {
+		err = crypto_shash_digest(&desc.shash, key, keylen,
+					  ctx->hash_key);
+		if (err)
+			return err;
+		keylen = ds;
+	} else if (keylen <= N2_HASH_KEY_MAX)
+		memcpy(ctx->hash_key, key, keylen);
+
+	ctx->hash_key_len = keylen;
+
+	return err;
+}
+
 static unsigned long wait_for_tail(struct spu_queue *qp)
 {
 	unsigned long head, hv_ret;
  
@@ -385,12 +506,12 @@
 	return hv_ret;
 }
  
-static int n2_hash_async_digest(struct ahash_request *req,
-				unsigned int auth_type, unsigned int digest_size,
-				unsigned int result_size, void *hash_loc)
+static int n2_do_async_digest(struct ahash_request *req,
+			      unsigned int auth_type, unsigned int digest_size,
+			      unsigned int result_size, void *hash_loc,
+			      unsigned long auth_key, unsigned int auth_key_len)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct cwq_initial_entry *ent;
 	struct crypto_hash_walk walk;
 	struct spu_queue *qp;
@@ -403,6 +524,7 @@
 	 */
 	if (unlikely(req->nbytes > (1 << 16))) {
 		struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+		struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
  
 		ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
 		rctx->fallback_req.base.flags =
@@ -414,8 +536,6 @@
 		return crypto_ahash_digest(&rctx->fallback_req);
 	}
  
-	n2_base_ctx_init(&ctx->base);
-
 	nbytes = crypto_hash_walk_first(req, &walk);
  
 	cpu = get_cpu();
  
@@ -430,13 +550,13 @@
 	 */
 	ent = qp->q + qp->tail;
  
-	ent->control = control_word_base(nbytes, 0, 0,
+	ent->control = control_word_base(nbytes, auth_key_len, 0,
 					 auth_type, digest_size,
 					 false, true, false, false,
 					 OPCODE_INPLACE_BIT |
 					 OPCODE_AUTH_MAC);
 	ent->src_addr = __pa(walk.data);
-	ent->auth_key_addr = 0UL;
+	ent->auth_key_addr = auth_key;
 	ent->auth_iv_addr = __pa(hash_loc);
 	ent->final_auth_state_addr = 0UL;
 	ent->enc_key_addr = 0UL;
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
@@ -475,116 +595,57 @@
 	return err;
 }
  
-static int n2_md5_async_digest(struct ahash_request *req)
+static int n2_hash_async_digest(struct ahash_request *req)
 {
+	struct n2_ahash_alg *n2alg = n2_ahash_alg(req->base.tfm);
 	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct md5_state *m = &rctx->u.md5;
+	int ds;
  
+	ds = n2alg->digest_size;
 	if (unlikely(req->nbytes == 0)) {
-		static const char md5_zero[MD5_DIGEST_SIZE] = {
-			0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
-			0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
-		};
-
-		memcpy(req->result, md5_zero, MD5_DIGEST_SIZE);
+		memcpy(req->result, n2alg->hash_zero, ds);
 		return 0;
 	}
-	m->hash[0] = cpu_to_le32(0x67452301);
-	m->hash[1] = cpu_to_le32(0xefcdab89);
-	m->hash[2] = cpu_to_le32(0x98badcfe);
-	m->hash[3] = cpu_to_le32(0x10325476);
+	memcpy(&rctx->u, n2alg->hash_init, n2alg->hw_op_hashsz);
  
-	return n2_hash_async_digest(req, AUTH_TYPE_MD5,
-				    MD5_DIGEST_SIZE, MD5_DIGEST_SIZE,
-				    m->hash);
+	return n2_do_async_digest(req, n2alg->auth_type,
+				  n2alg->hw_op_hashsz, ds,
+				  &rctx->u, 0UL, 0);
 }
  
-static int n2_sha1_async_digest(struct ahash_request *req)
+static int n2_hmac_async_digest(struct ahash_request *req)
 {
+	struct n2_hmac_alg *n2alg = n2_hmac_alg(req->base.tfm);
 	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct sha1_state *s = &rctx->u.sha1;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(tfm);
+	int ds;
  
-	if (unlikely(req->nbytes == 0)) {
-		static const char sha1_zero[SHA1_DIGEST_SIZE] = {
-			0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
-			0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
-			0x07, 0x09
-		};
+	ds = n2alg->derived.digest_size;
+	if (unlikely(req->nbytes == 0) ||
+	    unlikely(ctx->hash_key_len > N2_HASH_KEY_MAX)) {
+		struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
+		struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
  
-		memcpy(req->result, sha1_zero, SHA1_DIGEST_SIZE);
-		return 0;
-	}
-	s->state[0] = SHA1_H0;
-	s->state[1] = SHA1_H1;
-	s->state[2] = SHA1_H2;
-	s->state[3] = SHA1_H3;
-	s->state[4] = SHA1_H4;
+		ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+		rctx->fallback_req.base.flags =
+			req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+		rctx->fallback_req.nbytes = req->nbytes;
+		rctx->fallback_req.src = req->src;
+		rctx->fallback_req.result = req->result;
  
-	return n2_hash_async_digest(req, AUTH_TYPE_SHA1,
-				    SHA1_DIGEST_SIZE, SHA1_DIGEST_SIZE,
-				    s->state);
-}
-
-static int n2_sha256_async_digest(struct ahash_request *req)
-{
-	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct sha256_state *s = &rctx->u.sha256;
-
-	if (req->nbytes == 0) {
-		static const char sha256_zero[SHA256_DIGEST_SIZE] = {
-			0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
-			0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
-			0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
-			0x1b, 0x78, 0x52, 0xb8, 0x55
-		};
-
-		memcpy(req->result, sha256_zero, SHA256_DIGEST_SIZE);
-		return 0;
+		return crypto_ahash_digest(&rctx->fallback_req);
 	}
-	s->state[0] = SHA256_H0;
-	s->state[1] = SHA256_H1;
-	s->state[2] = SHA256_H2;
-	s->state[3] = SHA256_H3;
-	s->state[4] = SHA256_H4;
-	s->state[5] = SHA256_H5;
-	s->state[6] = SHA256_H6;
-	s->state[7] = SHA256_H7;
+	memcpy(&rctx->u, n2alg->derived.hash_init,
+	       n2alg->derived.hw_op_hashsz);
  
-	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
-				    SHA256_DIGEST_SIZE, SHA256_DIGEST_SIZE,
-				    s->state);
+	return n2_do_async_digest(req, n2alg->derived.hmac_type,
+				  n2alg->derived.hw_op_hashsz, ds,
+				  &rctx->u,
+				  __pa(&ctx->hash_key),
+				  ctx->hash_key_len);
 }
  
-static int n2_sha224_async_digest(struct ahash_request *req)
-{
-	struct n2_hash_req_ctx *rctx = ahash_request_ctx(req);
-	struct sha256_state *s = &rctx->u.sha256;
-
-	if (req->nbytes == 0) {
-		static const char sha224_zero[SHA224_DIGEST_SIZE] = {
-			0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
-			0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
-			0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
-			0x2f
-		};
-
-		memcpy(req->result, sha224_zero, SHA224_DIGEST_SIZE);
-		return 0;
-	}
-	s->state[0] = SHA224_H0;
-	s->state[1] = SHA224_H1;
-	s->state[2] = SHA224_H2;
-	s->state[3] = SHA224_H3;
-	s->state[4] = SHA224_H4;
-	s->state[5] = SHA224_H5;
-	s->state[6] = SHA224_H6;
-	s->state[7] = SHA224_H7;
-
-	return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
-				    SHA256_DIGEST_SIZE, SHA224_DIGEST_SIZE,
-				    s->state);
-}
-
 struct n2_cipher_context {
 	int			key_len;
 	int			enc_type;
  
  
  
  
  
  
  
  
@@ -1209,35 +1270,92 @@
  
 struct n2_hash_tmpl {
 	const char	*name;
-	int		(*digest)(struct ahash_request *req);
+	const char	*hash_zero;
+	const u32	*hash_init;
+	u8		hw_op_hashsz;
 	u8		digest_size;
 	u8		block_size;
+	u8		auth_type;
+	u8		hmac_type;
 };
+
+static const char md5_zero[MD5_DIGEST_SIZE] = {
+	0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
+	0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
+};
+static const u32 md5_init[MD5_HASH_WORDS] = {
+	cpu_to_le32(0x67452301),
+	cpu_to_le32(0xefcdab89),
+	cpu_to_le32(0x98badcfe),
+	cpu_to_le32(0x10325476),
+};
+static const char sha1_zero[SHA1_DIGEST_SIZE] = {
+	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
+	0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
+	0x07, 0x09
+};
+static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = {
+	SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4,
+};
+static const char sha256_zero[SHA256_DIGEST_SIZE] = {
+	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
+	0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
+	0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
+	0x1b, 0x78, 0x52, 0xb8, 0x55
+};
+static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = {
+	SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+	SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+};
+static const char sha224_zero[SHA224_DIGEST_SIZE] = {
+	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
+	0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
+	0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
+	0x2f
+};
+static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = {
+	SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+	SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+};
+
 static const struct n2_hash_tmpl hash_tmpls[] = {
 	{ .name		= "md5",
-	  .digest	= n2_md5_async_digest,
+	  .hash_zero	= md5_zero,
+	  .hash_init	= md5_init,
+	  .auth_type	= AUTH_TYPE_MD5,
+	  .hmac_type	= AUTH_TYPE_HMAC_MD5,
+	  .hw_op_hashsz	= MD5_DIGEST_SIZE,
 	  .digest_size	= MD5_DIGEST_SIZE,
 	  .block_size	= MD5_HMAC_BLOCK_SIZE },
 	{ .name		= "sha1",
-	  .digest	= n2_sha1_async_digest,
+	  .hash_zero	= sha1_zero,
+	  .hash_init	= sha1_init,
+	  .auth_type	= AUTH_TYPE_SHA1,
+	  .hmac_type	= AUTH_TYPE_HMAC_SHA1,
+	  .hw_op_hashsz	= SHA1_DIGEST_SIZE,
 	  .digest_size	= SHA1_DIGEST_SIZE,
 	  .block_size	= SHA1_BLOCK_SIZE },
 	{ .name		= "sha256",
-	  .digest	= n2_sha256_async_digest,
+	  .hash_zero	= sha256_zero,
+	  .hash_init	= sha256_init,
+	  .auth_type	= AUTH_TYPE_SHA256,
+	  .hmac_type	= AUTH_TYPE_HMAC_SHA256,
+	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
 	  .digest_size	= SHA256_DIGEST_SIZE,
 	  .block_size	= SHA256_BLOCK_SIZE },
 	{ .name		= "sha224",
-	  .digest	= n2_sha224_async_digest,
+	  .hash_zero	= sha224_zero,
+	  .hash_init	= sha224_init,
+	  .auth_type	= AUTH_TYPE_SHA256,
+	  .hmac_type	= AUTH_TYPE_RESERVED,
+	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
 	  .digest_size	= SHA224_DIGEST_SIZE,
 	  .block_size	= SHA224_BLOCK_SIZE },
 };
 #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
  
-struct n2_ahash_alg {
-	struct list_head	entry;
-	struct ahash_alg	alg;
-};
 static LIST_HEAD(ahash_algs);
+static LIST_HEAD(hmac_algs);
  
 static int algs_registered;
  
  
@@ -1245,12 +1363,18 @@
 {
 	struct n2_cipher_alg *cipher, *cipher_tmp;
 	struct n2_ahash_alg *alg, *alg_tmp;
+	struct n2_hmac_alg *hmac, *hmac_tmp;
  
 	list_for_each_entry_safe(cipher, cipher_tmp, &cipher_algs, entry) {
 		crypto_unregister_alg(&cipher->alg);
 		list_del(&cipher->entry);
 		kfree(cipher);
 	}
+	list_for_each_entry_safe(hmac, hmac_tmp, &hmac_algs, derived.entry) {
+		crypto_unregister_ahash(&hmac->derived.alg);
+		list_del(&hmac->derived.entry);
+		kfree(hmac);
+	}
 	list_for_each_entry_safe(alg, alg_tmp, &ahash_algs, entry) {
 		crypto_unregister_ahash(&alg->alg);
 		list_del(&alg->entry);
  
  
@@ -1290,12 +1414,53 @@
 	list_add(&p->entry, &cipher_algs);
 	err = crypto_register_alg(alg);
 	if (err) {
+		pr_err("%s alg registration failed\n", alg->cra_name);
 		list_del(&p->entry);
 		kfree(p);
+	} else {
+		pr_info("%s alg registered\n", alg->cra_name);
 	}
 	return err;
 }
  
+static int __devinit __n2_register_one_hmac(struct n2_ahash_alg *n2ahash)
+{
+	struct n2_hmac_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	struct ahash_alg *ahash;
+	struct crypto_alg *base;
+	int err;
+
+	if (!p)
+		return -ENOMEM;
+
+	p->child_alg = n2ahash->alg.halg.base.cra_name;
+	memcpy(&p->derived, n2ahash, sizeof(struct n2_ahash_alg));
+	INIT_LIST_HEAD(&p->derived.entry);
+
+	ahash = &p->derived.alg;
+	ahash->digest = n2_hmac_async_digest;
+	ahash->setkey = n2_hmac_async_setkey;
+
+	base = &ahash->halg.base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", p->child_alg);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s-n2", p->child_alg);
+
+	base->cra_ctxsize = sizeof(struct n2_hmac_ctx);
+	base->cra_init = n2_hmac_cra_init;
+	base->cra_exit = n2_hmac_cra_exit;
+
+	list_add(&p->derived.entry, &hmac_algs);
+	err = crypto_register_ahash(ahash);
+	if (err) {
+		pr_err("%s alg registration failed\n", base->cra_name);
+		list_del(&p->derived.entry);
+		kfree(p);
+	} else {
+		pr_info("%s alg registered\n", base->cra_name);
+	}
+	return err;
+}
+
 static int __devinit __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
 {
 	struct n2_ahash_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
  
@@ -1307,12 +1472,19 @@
 	if (!p)
 		return -ENOMEM;
  
+	p->hash_zero = tmpl->hash_zero;
+	p->hash_init = tmpl->hash_init;
+	p->auth_type = tmpl->auth_type;
+	p->hmac_type = tmpl->hmac_type;
+	p->hw_op_hashsz = tmpl->hw_op_hashsz;
+	p->digest_size = tmpl->digest_size;
+
 	ahash = &p->alg;
 	ahash->init = n2_hash_async_init;
 	ahash->update = n2_hash_async_update;
 	ahash->final = n2_hash_async_final;
 	ahash->finup = n2_hash_async_finup;
-	ahash->digest = tmpl->digest;
+	ahash->digest = n2_hash_async_digest;
  
 	halg = &ahash->halg;
 	halg->digestsize = tmpl->digest_size;
  
  
@@ -1331,9 +1503,14 @@
 	list_add(&p->entry, &ahash_algs);
 	err = crypto_register_ahash(ahash);
 	if (err) {
+		pr_err("%s alg registration failed\n", base->cra_name);
 		list_del(&p->entry);
 		kfree(p);
+	} else {
+		pr_info("%s alg registered\n", base->cra_name);
 	}
+	if (!err && p->hmac_type != AUTH_TYPE_RESERVED)
+		err = __n2_register_one_hmac(p);
 	return err;
 }
  
@@ -15,7 +15,6 @@
  
 #define pr_fmt(fmt) "%s: " fmt, __func__
  
-#include <linux/version.h>
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -720,7 +720,6 @@
 #define TALITOS_MDEU_MAX_CONTEXT_SIZE	TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512
  
 struct talitos_ahash_req_ctx {
-	u64 count;
 	u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
 	unsigned int hw_context_size;
 	u8 buf[HASH_MAX_BLOCK_SIZE];
@@ -729,6 +728,7 @@
 	unsigned int first;
 	unsigned int last;
 	unsigned int to_hash_later;
+	u64 nbuf;
 	struct scatterlist bufsl[2];
 	struct scatterlist *psrc;
 };
@@ -1613,6 +1613,7 @@
 	if (!req_ctx->last && req_ctx->to_hash_later) {
 		/* Position any partial block for next update/final/finup */
 		memcpy(req_ctx->buf, req_ctx->bufnext, req_ctx->to_hash_later);
+		req_ctx->nbuf = req_ctx->to_hash_later;
 	}
 	common_nonsnoop_hash_unmap(dev, edesc, areq);
  
@@ -1728,7 +1729,7 @@
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
  
 	/* Initialize the context */
-	req_ctx->count = 0;
+	req_ctx->nbuf = 0;
 	req_ctx->first = 1; /* first indicates h/w must init its context */
 	req_ctx->swinit = 0; /* assume h/w init of context */
 	req_ctx->hw_context_size =
  
  
  
  
  
  
  
  
@@ -1776,52 +1777,54 @@
 			crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
 	unsigned int nbytes_to_hash;
 	unsigned int to_hash_later;
-	unsigned int index;
+	unsigned int nsg;
 	int chained;
  
-	index = req_ctx->count & (blocksize - 1);
-	req_ctx->count += nbytes;
-
-	if (!req_ctx->last && (index + nbytes) < blocksize) {
-		/* Buffer the partial block */
+	if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
+		/* Buffer up to one whole block */
 		sg_copy_to_buffer(areq->src,
 				  sg_count(areq->src, nbytes, &chained),
-				  req_ctx->buf + index, nbytes);
+				  req_ctx->buf + req_ctx->nbuf, nbytes);
+		req_ctx->nbuf += nbytes;
 		return 0;
 	}
  
-	if (index) {
-		/* partial block from previous update; chain it in. */
-		sg_init_table(req_ctx->bufsl, (nbytes) ? 2 : 1);
-		sg_set_buf(req_ctx->bufsl, req_ctx->buf, index);
-		if (nbytes)
-			scatterwalk_sg_chain(req_ctx->bufsl, 2,
-					     areq->src);
+	/* At least (blocksize + 1) bytes are available to hash */
+	nbytes_to_hash = nbytes + req_ctx->nbuf;
+	to_hash_later = nbytes_to_hash & (blocksize - 1);
+
+	if (req_ctx->last)
+		to_hash_later = 0;
+	else if (to_hash_later)
+		/* There is a partial block. Hash the full block(s) now */
+		nbytes_to_hash -= to_hash_later;
+	else {
+		/* Keep one block buffered */
+		nbytes_to_hash -= blocksize;
+		to_hash_later = blocksize;
+	}
+
+	/* Chain in any previously buffered data */
+	if (req_ctx->nbuf) {
+		nsg = (req_ctx->nbuf < nbytes_to_hash) ? 2 : 1;
+		sg_init_table(req_ctx->bufsl, nsg);
+		sg_set_buf(req_ctx->bufsl, req_ctx->buf, req_ctx->nbuf);
+		if (nsg > 1)
+			scatterwalk_sg_chain(req_ctx->bufsl, 2, areq->src);
 		req_ctx->psrc = req_ctx->bufsl;
-	} else {
+	} else
 		req_ctx->psrc = areq->src;
-	}
-	nbytes_to_hash =  index + nbytes;
-	if (!req_ctx->last) {
-		to_hash_later = (nbytes_to_hash & (blocksize - 1));
-		if (to_hash_later) {
-			int nents;
-			/* Must copy to_hash_later bytes from the end
-			 * to bufnext (a partial block) for later.
-			 */
-			nents = sg_count(areq->src, nbytes, &chained);
-			sg_copy_end_to_buffer(areq->src, nents,
-					      req_ctx->bufnext,
-					      to_hash_later,
-					      nbytes - to_hash_later);
  
-			/* Adjust count for what will be hashed now */
-			nbytes_to_hash -= to_hash_later;
-		}
-		req_ctx->to_hash_later = to_hash_later;
+	if (to_hash_later) {
+		int nents = sg_count(areq->src, nbytes, &chained);
+		sg_copy_end_to_buffer(areq->src, nents,
+				      req_ctx->bufnext,
+				      to_hash_later,
+				      nbytes - to_hash_later);
 	}
+	req_ctx->to_hash_later = to_hash_later;
  
-	/* allocate extended descriptor */
+	/* Allocate extended descriptor */
 	edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
@@ -25,7 +25,12 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/kobject.h>
  
+#define PADATA_CPU_SERIAL   0x01
+#define PADATA_CPU_PARALLEL 0x02
+
 /**
  * struct padata_priv -  Embedded to the users data structure.
  *
@@ -59,7 +64,20 @@
 };
  
 /**
- * struct padata_queue - The percpu padata queues.
+* struct padata_serial_queue - The percpu padata serial queue
+*
+* @serial: List to wait for serialization after reordering.
+* @work: work struct for serialization.
+* @pd: Backpointer to the internal control structure.
+*/
+struct padata_serial_queue {
+       struct padata_list    serial;
+       struct work_struct    work;
+       struct parallel_data *pd;
+};
+
+/**
+ * struct padata_parallel_queue - The percpu padata parallel queue
  *
  * @parallel: List to wait for parallelization.
  * @reorder: List to wait for reordering after parallel processing.
  
  
  
  
  
  
@@ -67,44 +85,58 @@
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
  * @pd: Backpointer to the internal control structure.
+ * @work: work struct for parallelization.
  * @num_obj: Number of objects that are processed by this cpu.
  * @cpu_index: Index of the cpu.
  */
-struct padata_queue {
-	struct padata_list	parallel;
-	struct padata_list	reorder;
-	struct padata_list	serial;
-	struct work_struct	pwork;
-	struct work_struct	swork;
-	struct parallel_data    *pd;
-	atomic_t		num_obj;
-	int			cpu_index;
+struct padata_parallel_queue {
+       struct padata_list    parallel;
+       struct padata_list    reorder;
+       struct parallel_data *pd;
+       struct work_struct    work;
+       atomic_t              num_obj;
+       int                   cpu_index;
 };
  
 /**
+ * struct padata_cpumask - The cpumasks for the parallel/serial workers
+ *
+ * @pcpu: cpumask for the parallel workers.
+ * @cbcpu: cpumask for the serial (callback) workers.
+ */
+struct padata_cpumask {
+	cpumask_var_t	pcpu;
+	cpumask_var_t	cbcpu;
+};
+
+/**
  * struct parallel_data - Internal control structure, covers everything
  * that depends on the cpumask in use.
  *
  * @pinst: padata instance.
- * @queue: percpu padata queues.
+ * @pqueue: percpu padata queues used for parallelization.
+ * @squeue: percpu padata queues used for serialuzation.
  * @seq_nr: The sequence number that will be attached to the next object.
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
- * @cpumask: cpumask in use.
+ * @cpumask: The cpumasks in use for parallel and serial workers.
  * @lock: Reorder lock.
+ * @processed: Number of already processed objects.
  * @timer: Reorder timer.
  */
 struct parallel_data {
-	struct padata_instance	*pinst;
-	struct padata_queue	*queue;
-	atomic_t		seq_nr;
-	atomic_t		reorder_objects;
-	atomic_t                refcnt;
-	unsigned int		max_seq_nr;
-	cpumask_var_t		cpumask;
-	spinlock_t              lock;
-	struct timer_list       timer;
+	struct padata_instance		*pinst;
+	struct padata_parallel_queue	*pqueue;
+	struct padata_serial_queue	*squeue;
+	atomic_t			seq_nr;
+	atomic_t			reorder_objects;
+	atomic_t			refcnt;
+	unsigned int			max_seq_nr;
+	struct padata_cpumask		cpumask;
+	spinlock_t                      lock ____cacheline_aligned;
+	unsigned int			processed;
+	struct timer_list		timer;
 };
  
 /**
  
  
  
  
  
@@ -113,32 +145,49 @@
  * @cpu_notifier: cpu hotplug notifier.
  * @wq: The workqueue in use.
  * @pd: The internal control structure.
- * @cpumask: User supplied cpumask.
+ * @cpumask: User supplied cpumasks for parallel and serial works.
+ * @cpumask_change_notifier: Notifiers chain for user-defined notify
+ *            callbacks that will be called when either @pcpu or @cbcpu
+ *            or both cpumasks change.
+ * @kobj: padata instance kernel object.
  * @lock: padata instance lock.
  * @flags: padata flags.
  */
 struct padata_instance {
-	struct notifier_block   cpu_notifier;
-	struct workqueue_struct *wq;
-	struct parallel_data	*pd;
-	cpumask_var_t           cpumask;
-	struct mutex		lock;
-	u8			flags;
-#define	PADATA_INIT		1
-#define	PADATA_RESET		2
+	struct notifier_block		 cpu_notifier;
+	struct workqueue_struct		*wq;
+	struct parallel_data		*pd;
+	struct padata_cpumask		cpumask;
+	struct blocking_notifier_head	 cpumask_change_notifier;
+	struct kobject                   kobj;
+	struct mutex			 lock;
+	u8				 flags;
+#define	PADATA_INIT	1
+#define	PADATA_RESET	2
+#define	PADATA_INVALID	4
 };
  
-extern struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-					    struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc_possible(
+					struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+					    const struct cpumask *pcpumask,
+					    const struct cpumask *cbcpumask);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
-extern int padata_set_cpumask(struct padata_instance *pinst,
+extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
-extern int padata_add_cpu(struct padata_instance *pinst, int cpu);
-extern int padata_remove_cpu(struct padata_instance *pinst, int cpu);
-extern void padata_start(struct padata_instance *pinst);
+extern int padata_set_cpumasks(struct padata_instance *pinst,
+			       cpumask_var_t pcpumask,
+			       cpumask_var_t cbcpumask);
+extern int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
+extern int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
+extern int padata_start(struct padata_instance *pinst);
 extern void padata_stop(struct padata_instance *pinst);
+extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
+					    struct notifier_block *nblock);
+extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+					      struct notifier_block *nblock);
 #endif
@@ -26,18 +26,19 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/sysfs.h>
 #include <linux/rcupdate.h>
  
-#define MAX_SEQ_NR INT_MAX - NR_CPUS
+#define MAX_SEQ_NR (INT_MAX - NR_CPUS)
 #define MAX_OBJ_NUM 1000
  
 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 {
 	int cpu, target_cpu;
  
-	target_cpu = cpumask_first(pd->cpumask);
+	target_cpu = cpumask_first(pd->cpumask.pcpu);
 	for (cpu = 0; cpu < cpu_index; cpu++)
-		target_cpu = cpumask_next(target_cpu, pd->cpumask);
+		target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
  
 	return target_cpu;
 }
  
  
  
  
@@ -53,26 +54,27 @@
 	 * Hash the sequence numbers to the cpus by taking
 	 * seq_nr mod. number of cpus in use.
 	 */
-	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask);
+	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask.pcpu);
  
 	return padata_index_to_cpu(pd, cpu_index);
 }
  
-static void padata_parallel_worker(struct work_struct *work)
+static void padata_parallel_worker(struct work_struct *parallel_work)
 {
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 	struct padata_instance *pinst;
 	LIST_HEAD(local_list);
  
 	local_bh_disable();
-	queue = container_of(work, struct padata_queue, pwork);
-	pd = queue->pd;
+	pqueue = container_of(parallel_work,
+			      struct padata_parallel_queue, work);
+	pd = pqueue->pd;
 	pinst = pd->pinst;
  
-	spin_lock(&queue->parallel.lock);
-	list_replace_init(&queue->parallel.list, &local_list);
-	spin_unlock(&queue->parallel.lock);
+	spin_lock(&pqueue->parallel.lock);
+	list_replace_init(&pqueue->parallel.list, &local_list);
+	spin_unlock(&pqueue->parallel.lock);
  
 	while (!list_empty(&local_list)) {
 		struct padata_priv *padata;
@@ -94,7 +96,7 @@
  * @pinst: padata instance
  * @padata: object to be parallelized
  * @cb_cpu: cpu the serialization callback function will run on,
- *          must be in the cpumask of padata.
+ *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
  *
  * The parallelization callback function will run with BHs off.
  * Note: Every object which is parallelized by padata_do_parallel
  
  
@@ -104,17 +106,20 @@
 		       struct padata_priv *padata, int cb_cpu)
 {
 	int target_cpu, err;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *queue;
 	struct parallel_data *pd;
  
 	rcu_read_lock_bh();
  
 	pd = rcu_dereference(pinst->pd);
  
-	err = 0;
-	if (!(pinst->flags & PADATA_INIT))
+	err = -EINVAL;
+	if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
 		goto out;
  
+	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
+		goto out;
+
 	err =  -EBUSY;
 	if ((pinst->flags & PADATA_RESET))
 		goto out;
@@ -122,11 +127,7 @@
 	if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 		goto out;
  
-	err = -EINVAL;
-	if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
-		goto out;
-
-	err = -EINPROGRESS;
+	err = 0;
 	atomic_inc(&pd->refcnt);
 	padata->pd = pd;
 	padata->cb_cpu = cb_cpu;
  
@@ -137,13 +138,13 @@
 	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
  
 	target_cpu = padata_cpu_hash(padata);
-	queue = per_cpu_ptr(pd->queue, target_cpu);
+	queue = per_cpu_ptr(pd->pqueue, target_cpu);
  
 	spin_lock(&queue->parallel.lock);
 	list_add_tail(&padata->list, &queue->parallel.list);
 	spin_unlock(&queue->parallel.lock);
  
-	queue_work_on(target_cpu, pinst->wq, &queue->pwork);
+	queue_work_on(target_cpu, pinst->wq, &queue->work);
  
 out:
 	rcu_read_unlock_bh();
  
  
  
  
  
  
  
@@ -171,84 +172,52 @@
  */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
-	int cpu, num_cpus, empty, calc_seq_nr;
-	int seq_nr, next_nr, overrun, next_overrun;
-	struct padata_queue *queue, *next_queue;
+	int cpu, num_cpus;
+	int next_nr, next_index;
+	struct padata_parallel_queue *queue, *next_queue;
 	struct padata_priv *padata;
 	struct padata_list *reorder;
  
-	empty = 0;
-	next_nr = -1;
-	next_overrun = 0;
-	next_queue = NULL;
+	num_cpus = cpumask_weight(pd->cpumask.pcpu);
  
-	num_cpus = cpumask_weight(pd->cpumask);
+	/*
+	 * Calculate the percpu reorder queue and the sequence
+	 * number of the next object.
+	 */
+	next_nr = pd->processed;
+	next_index = next_nr % num_cpus;
+	cpu = padata_index_to_cpu(pd, next_index);
+	next_queue = per_cpu_ptr(pd->pqueue, cpu);
  
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		reorder = &queue->reorder;
-
-		/*
-		 * Calculate the seq_nr of the object that should be
-		 * next in this reorder queue.
-		 */
-		overrun = 0;
-		calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
-			       + queue->cpu_index;
-
-		if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
-			calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
-			overrun = 1;
-		}
-
-		if (!list_empty(&reorder->list)) {
-			padata = list_entry(reorder->list.next,
-					    struct padata_priv, list);
-
-			seq_nr  = padata->seq_nr;
-			BUG_ON(calc_seq_nr != seq_nr);
-		} else {
-			seq_nr = calc_seq_nr;
-			empty++;
-		}
-
-		if (next_nr < 0 || seq_nr < next_nr
-		    || (next_overrun && !overrun)) {
-			next_nr = seq_nr;
-			next_overrun = overrun;
-			next_queue = queue;
-		}
+	if (unlikely(next_nr > pd->max_seq_nr)) {
+		next_nr = next_nr - pd->max_seq_nr - 1;
+		next_index = next_nr % num_cpus;
+		cpu = padata_index_to_cpu(pd, next_index);
+		next_queue = per_cpu_ptr(pd->pqueue, cpu);
+		pd->processed = 0;
 	}
  
 	padata = NULL;
  
-	if (empty == num_cpus)
-		goto out;
-
 	reorder = &next_queue->reorder;
  
 	if (!list_empty(&reorder->list)) {
 		padata = list_entry(reorder->list.next,
 				    struct padata_priv, list);
  
-		if (unlikely(next_overrun)) {
-			for_each_cpu(cpu, pd->cpumask) {
-				queue = per_cpu_ptr(pd->queue, cpu);
-				atomic_set(&queue->num_obj, 0);
-			}
-		}
+		BUG_ON(next_nr != padata->seq_nr);
  
 		spin_lock(&reorder->lock);
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
 		spin_unlock(&reorder->lock);
  
-		atomic_inc(&next_queue->num_obj);
+		pd->processed++;
  
 		goto out;
 	}
  
-	queue = per_cpu_ptr(pd->queue, smp_processor_id());
+	queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
 	if (queue->cpu_index == next_queue->cpu_index) {
 		padata = ERR_PTR(-ENODATA);
 		goto out;
@@ -262,7 +231,7 @@
 static void padata_reorder(struct parallel_data *pd)
 {
 	struct padata_priv *padata;
-	struct padata_queue *queue;
+	struct padata_serial_queue *squeue;
 	struct padata_instance *pinst = pd->pinst;
  
 	/*
  
  
@@ -301,13 +270,13 @@
 			return;
 		}
  
-		queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
+		squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu);
  
-		spin_lock(&queue->serial.lock);
-		list_add_tail(&padata->list, &queue->serial.list);
-		spin_unlock(&queue->serial.lock);
+		spin_lock(&squeue->serial.lock);
+		list_add_tail(&padata->list, &squeue->serial.list);
+		spin_unlock(&squeue->serial.lock);
  
-		queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
+		queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work);
 	}
  
 	spin_unlock_bh(&pd->lock);
  
  
  
@@ -333,19 +302,19 @@
 	padata_reorder(pd);
 }
  
-static void padata_serial_worker(struct work_struct *work)
+static void padata_serial_worker(struct work_struct *serial_work)
 {
-	struct padata_queue *queue;
+	struct padata_serial_queue *squeue;
 	struct parallel_data *pd;
 	LIST_HEAD(local_list);
  
 	local_bh_disable();
-	queue = container_of(work, struct padata_queue, swork);
-	pd = queue->pd;
+	squeue = container_of(serial_work, struct padata_serial_queue, work);
+	pd = squeue->pd;
  
-	spin_lock(&queue->serial.lock);
-	list_replace_init(&queue->serial.list, &local_list);
-	spin_unlock(&queue->serial.lock);
+	spin_lock(&squeue->serial.lock);
+	list_replace_init(&squeue->serial.list, &local_list);
+	spin_unlock(&squeue->serial.lock);
  
 	while (!list_empty(&local_list)) {
 		struct padata_priv *padata;
  
  
  
@@ -372,18 +341,18 @@
 void padata_do_serial(struct padata_priv *padata)
 {
 	int cpu;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
  
 	pd = padata->pd;
  
 	cpu = get_cpu();
-	queue = per_cpu_ptr(pd->queue, cpu);
+	pqueue = per_cpu_ptr(pd->pqueue, cpu);
  
-	spin_lock(&queue->reorder.lock);
+	spin_lock(&pqueue->reorder.lock);
 	atomic_inc(&pd->reorder_objects);
-	list_add_tail(&padata->list, &queue->reorder.list);
-	spin_unlock(&queue->reorder.lock);
+	list_add_tail(&padata->list, &pqueue->reorder.list);
+	spin_unlock(&pqueue->reorder.lock);
  
 	put_cpu();
  
  
  
  
  
  
  
  
  
  
  
  
@@ -391,52 +360,89 @@
 }
 EXPORT_SYMBOL(padata_do_serial);
  
-/* Allocate and initialize the internal cpumask dependend resources. */
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
-					     const struct cpumask *cpumask)
+static int padata_setup_cpumasks(struct parallel_data *pd,
+				 const struct cpumask *pcpumask,
+				 const struct cpumask *cbcpumask)
 {
-	int cpu, cpu_index, num_cpus;
-	struct padata_queue *queue;
-	struct parallel_data *pd;
+	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+		return -ENOMEM;
  
-	cpu_index = 0;
+	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
+	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
+		free_cpumask_var(pd->cpumask.cbcpu);
+		return -ENOMEM;
+	}
  
-	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
-	if (!pd)
-		goto err;
+	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
+	return 0;
+}
  
-	pd->queue = alloc_percpu(struct padata_queue);
-	if (!pd->queue)
-		goto err_free_pd;
+static void __padata_list_init(struct padata_list *pd_list)
+{
+	INIT_LIST_HEAD(&pd_list->list);
+	spin_lock_init(&pd_list->lock);
+}
  
-	if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
-		goto err_free_queue;
+/* Initialize all percpu queues used by serial workers */
+static void padata_init_squeues(struct parallel_data *pd)
+{
+	int cpu;
+	struct padata_serial_queue *squeue;
  
-	cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+	for_each_cpu(cpu, pd->cpumask.cbcpu) {
+		squeue = per_cpu_ptr(pd->squeue, cpu);
+		squeue->pd = pd;
+		__padata_list_init(&squeue->serial);
+		INIT_WORK(&squeue->work, padata_serial_worker);
+	}
+}
  
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
+/* Initialize all percpu queues used by parallel workers */
+static void padata_init_pqueues(struct parallel_data *pd)
+{
+	int cpu_index, num_cpus, cpu;
+	struct padata_parallel_queue *pqueue;
  
-		queue->pd = pd;
-
-		queue->cpu_index = cpu_index;
+	cpu_index = 0;
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
+		pqueue = per_cpu_ptr(pd->pqueue, cpu);
+		pqueue->pd = pd;
+		pqueue->cpu_index = cpu_index;
 		cpu_index++;
  
-		INIT_LIST_HEAD(&queue->reorder.list);
-		INIT_LIST_HEAD(&queue->parallel.list);
-		INIT_LIST_HEAD(&queue->serial.list);
-		spin_lock_init(&queue->reorder.lock);
-		spin_lock_init(&queue->parallel.lock);
-		spin_lock_init(&queue->serial.lock);
-
-		INIT_WORK(&queue->pwork, padata_parallel_worker);
-		INIT_WORK(&queue->swork, padata_serial_worker);
-		atomic_set(&queue->num_obj, 0);
+		__padata_list_init(&pqueue->reorder);
+		__padata_list_init(&pqueue->parallel);
+		INIT_WORK(&pqueue->work, padata_parallel_worker);
+		atomic_set(&pqueue->num_obj, 0);
 	}
  
-	num_cpus = cpumask_weight(pd->cpumask);
-	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+	num_cpus = cpumask_weight(pd->cpumask.pcpu);
+	pd->max_seq_nr = num_cpus ? (MAX_SEQ_NR / num_cpus) * num_cpus - 1 : 0;
+}
  
+/* Allocate and initialize the internal cpumask dependend resources. */
+static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+					     const struct cpumask *pcpumask,
+					     const struct cpumask *cbcpumask)
+{
+	struct parallel_data *pd;
+
+	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
+	if (!pd)
+		goto err;
+
+	pd->pqueue = alloc_percpu(struct padata_parallel_queue);
+	if (!pd->pqueue)
+		goto err_free_pd;
+
+	pd->squeue = alloc_percpu(struct padata_serial_queue);
+	if (!pd->squeue)
+		goto err_free_pqueue;
+	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
+		goto err_free_squeue;
+
+	padata_init_pqueues(pd);
+	padata_init_squeues(pd);
 	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
@@ -446,8 +452,10 @@
  
 	return pd;
  
-err_free_queue:
-	free_percpu(pd->queue);
+err_free_squeue:
+	free_percpu(pd->squeue);
+err_free_pqueue:
+	free_percpu(pd->pqueue);
 err_free_pd:
 	kfree(pd);
 err:
@@ -456,8 +464,10 @@
  
 static void padata_free_pd(struct parallel_data *pd)
 {
-	free_cpumask_var(pd->cpumask);
-	free_percpu(pd->queue);
+	free_cpumask_var(pd->cpumask.pcpu);
+	free_cpumask_var(pd->cpumask.cbcpu);
+	free_percpu(pd->pqueue);
+	free_percpu(pd->squeue);
 	kfree(pd);
 }
  
  
@@ -465,11 +475,12 @@
 static void padata_flush_queues(struct parallel_data *pd)
 {
 	int cpu;
-	struct padata_queue *queue;
+	struct padata_parallel_queue *pqueue;
+	struct padata_serial_queue *squeue;
  
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		flush_work(&queue->pwork);
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
+		pqueue = per_cpu_ptr(pd->pqueue, cpu);
+		flush_work(&pqueue->work);
 	}
  
 	del_timer_sync(&pd->timer);
  
  
@@ -477,19 +488,39 @@
 	if (atomic_read(&pd->reorder_objects))
 		padata_reorder(pd);
  
-	for_each_cpu(cpu, pd->cpumask) {
-		queue = per_cpu_ptr(pd->queue, cpu);
-		flush_work(&queue->swork);
+	for_each_cpu(cpu, pd->cpumask.cbcpu) {
+		squeue = per_cpu_ptr(pd->squeue, cpu);
+		flush_work(&squeue->work);
 	}
  
 	BUG_ON(atomic_read(&pd->refcnt) != 0);
 }
  
+static void __padata_start(struct padata_instance *pinst)
+{
+	pinst->flags |= PADATA_INIT;
+}
+
+static void __padata_stop(struct padata_instance *pinst)
+{
+	if (!(pinst->flags & PADATA_INIT))
+		return;
+
+	pinst->flags &= ~PADATA_INIT;
+
+	synchronize_rcu();
+
+	get_online_cpus();
+	padata_flush_queues(pinst->pd);
+	put_online_cpus();
+}
+
 /* Replace the internal control stucture with a new one. */
 static void padata_replace(struct padata_instance *pinst,
 			   struct parallel_data *pd_new)
 {
 	struct parallel_data *pd_old = pinst->pd;
+	int notification_mask = 0;
  
 	pinst->flags |= PADATA_RESET;
  
  
  
  
  
  
  
  
  
  
  
  
  
  
@@ -497,41 +528,162 @@
  
 	synchronize_rcu();
  
+	if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
+		notification_mask |= PADATA_CPU_PARALLEL;
+	if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
+		notification_mask |= PADATA_CPU_SERIAL;
+
 	padata_flush_queues(pd_old);
 	padata_free_pd(pd_old);
  
+	if (notification_mask)
+		blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
+					     notification_mask,
+					     &pd_new->cpumask);
+
 	pinst->flags &= ~PADATA_RESET;
 }
  
 /**
- * padata_set_cpumask - set the cpumask that padata should use
+ * padata_register_cpumask_notifier - Registers a notifier that will be called
+ *                             if either pcpu or cbcpu or both cpumasks change.
  *
- * @pinst: padata instance
- * @cpumask: the cpumask to use
+ * @pinst: A poineter to padata instance
+ * @nblock: A pointer to notifier block.
  */
-int padata_set_cpumask(struct padata_instance *pinst,
-			cpumask_var_t cpumask)
+int padata_register_cpumask_notifier(struct padata_instance *pinst,
+				     struct notifier_block *nblock)
 {
+	return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
+						nblock);
+}
+EXPORT_SYMBOL(padata_register_cpumask_notifier);
+
+/**
+ * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
+ *        registered earlier  using padata_register_cpumask_notifier
+ *
+ * @pinst: A pointer to data instance.
+ * @nlock: A pointer to notifier block.
+ */
+int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+				       struct notifier_block *nblock)
+{
+	return blocking_notifier_chain_unregister(
+		&pinst->cpumask_change_notifier,
+		nblock);
+}
+EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
+
+
+/* If cpumask contains no active cpu, we mark the instance as invalid. */
+static bool padata_validate_cpumask(struct padata_instance *pinst,
+				    const struct cpumask *cpumask)
+{
+	if (!cpumask_intersects(cpumask, cpu_active_mask)) {
+		pinst->flags |= PADATA_INVALID;
+		return false;
+	}
+
+	pinst->flags &= ~PADATA_INVALID;
+	return true;
+}
+
+static int __padata_set_cpumasks(struct padata_instance *pinst,
+				 cpumask_var_t pcpumask,
+				 cpumask_var_t cbcpumask)
+{
+	int valid;
 	struct parallel_data *pd;
-	int err = 0;
  
+	valid = padata_validate_cpumask(pinst, pcpumask);
+	if (!valid) {
+		__padata_stop(pinst);
+		goto out_replace;
+	}
+
+	valid = padata_validate_cpumask(pinst, cbcpumask);
+	if (!valid)
+		__padata_stop(pinst);
+
+out_replace:
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
+	if (!pd)
+		return -ENOMEM;
+
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
+
+	padata_replace(pinst, pd);
+
+	if (valid)
+		__padata_start(pinst);
+
+	return 0;
+}
+
+/**
+ * padata_set_cpumasks - Set both parallel and serial cpumasks. The first
+ *                       one is used by parallel workers and the second one
+ *                       by the wokers doing serialization.
+ *
+ * @pinst: padata instance
+ * @pcpumask: the cpumask to use for parallel workers
+ * @cbcpumask: the cpumsak to use for serial workers
+ */
+int padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t pcpumask,
+			cpumask_var_t cbcpumask)
+{
+	int err;
+
 	mutex_lock(&pinst->lock);
+	get_online_cpus();
  
+	err = __padata_set_cpumasks(pinst, pcpumask, cbcpumask);
+
+	put_online_cpus();
+	mutex_unlock(&pinst->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(padata_set_cpumasks);
+
+/**
+ * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
+ *                     equivalent to @cpumask.
+ *
+ * @pinst: padata instance
+ * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
+ *                to parallel and serial cpumasks respectively.
+ * @cpumask: the cpumask to use
+ */
+int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+		       cpumask_var_t cpumask)
+{
+	struct cpumask *serial_mask, *parallel_mask;
+	int err = -EINVAL;
+
+	mutex_lock(&pinst->lock);
 	get_online_cpus();
  
-	pd = padata_alloc_pd(pinst, cpumask);
-	if (!pd) {
-		err = -ENOMEM;
-		goto out;
+	switch (cpumask_type) {
+	case PADATA_CPU_PARALLEL:
+		serial_mask = pinst->cpumask.cbcpu;
+		parallel_mask = cpumask;
+		break;
+	case PADATA_CPU_SERIAL:
+		parallel_mask = pinst->cpumask.pcpu;
+		serial_mask = cpumask;
+		break;
+	default:
+		 goto out;
 	}
  
-	cpumask_copy(pinst->cpumask, cpumask);
+	err =  __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
  
-	padata_replace(pinst, pd);
-
 out:
 	put_online_cpus();
-
 	mutex_unlock(&pinst->lock);
  
 	return err;
  
  
  
  
  
  
@@ -543,30 +695,48 @@
 	struct parallel_data *pd;
  
 	if (cpumask_test_cpu(cpu, cpu_active_mask)) {
-		pd = padata_alloc_pd(pinst, pinst->cpumask);
+		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+				     pinst->cpumask.cbcpu);
 		if (!pd)
 			return -ENOMEM;
  
 		padata_replace(pinst, pd);
+
+		if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
+		    padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
+			__padata_start(pinst);
 	}
  
 	return 0;
 }
  
-/**
- * padata_add_cpu - add a cpu to the padata cpumask
+ /**
+ * padata_add_cpu - add a cpu to one or both(parallel and serial)
+ *                  padata cpumasks.
  *
  * @pinst: padata instance
  * @cpu: cpu to add
+ * @mask: bitmask of flags specifying to which cpumask @cpu shuld be added.
+ *        The @mask may be any combination of the following flags:
+ *          PADATA_CPU_SERIAL   - serial cpumask
+ *          PADATA_CPU_PARALLEL - parallel cpumask
  */
-int padata_add_cpu(struct padata_instance *pinst, int cpu)
+
+int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask)
 {
 	int err;
  
+	if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+		return -EINVAL;
+
 	mutex_lock(&pinst->lock);
  
 	get_online_cpus();
-	cpumask_set_cpu(cpu, pinst->cpumask);
+	if (mask & PADATA_CPU_SERIAL)
+		cpumask_set_cpu(cpu, pinst->cpumask.cbcpu);
+	if (mask & PADATA_CPU_PARALLEL)
+		cpumask_set_cpu(cpu, pinst->cpumask.pcpu);
+
 	err = __padata_add_cpu(pinst, cpu);
 	put_online_cpus();
  
  
@@ -578,10 +748,16 @@
  
 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 {
-	struct parallel_data *pd;
+	struct parallel_data *pd = NULL;
  
 	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
-		pd = padata_alloc_pd(pinst, pinst->cpumask);
+
+		if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
+		    !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
+			__padata_stop(pinst);
+
+		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+				     pinst->cpumask.cbcpu);
 		if (!pd)
 			return -ENOMEM;
  
  
  
  
  
@@ -591,20 +767,32 @@
 	return 0;
 }
  
-/**
- * padata_remove_cpu - remove a cpu from the padata cpumask
+ /**
+ * padata_remove_cpu - remove a cpu from the one or both(serial and paralell)
+ *                     padata cpumasks.
  *
  * @pinst: padata instance
  * @cpu: cpu to remove
+ * @mask: bitmask specifying from which cpumask @cpu should be removed
+ *        The @mask may be any combination of the following flags:
+ *          PADATA_CPU_SERIAL   - serial cpumask
+ *          PADATA_CPU_PARALLEL - parallel cpumask
  */
-int padata_remove_cpu(struct padata_instance *pinst, int cpu)
+int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
 {
 	int err;
  
+	if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+		return -EINVAL;
+
 	mutex_lock(&pinst->lock);
  
 	get_online_cpus();
-	cpumask_clear_cpu(cpu, pinst->cpumask);
+	if (mask & PADATA_CPU_SERIAL)
+		cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
+	if (mask & PADATA_CPU_PARALLEL)
+		cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
+
 	err = __padata_remove_cpu(pinst, cpu);
 	put_online_cpus();
  
  
  
  
@@ -619,11 +807,20 @@
  *
  * @pinst: padata instance to start
  */
-void padata_start(struct padata_instance *pinst)
+int padata_start(struct padata_instance *pinst)
 {
+	int err = 0;
+
 	mutex_lock(&pinst->lock);
-	pinst->flags |= PADATA_INIT;
+
+	if (pinst->flags & PADATA_INVALID)
+		err =-EINVAL;
+
+	 __padata_start(pinst);
+
 	mutex_unlock(&pinst->lock);
+
+	return err;
 }
 EXPORT_SYMBOL(padata_start);
  
  
@@ -635,12 +832,20 @@
 void padata_stop(struct padata_instance *pinst)
 {
 	mutex_lock(&pinst->lock);
-	pinst->flags &= ~PADATA_INIT;
+	__padata_stop(pinst);
 	mutex_unlock(&pinst->lock);
 }
 EXPORT_SYMBOL(padata_stop);
  
 #ifdef CONFIG_HOTPLUG_CPU
+
+static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
+{
+	return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
+		cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
+}
+
+
 static int padata_cpu_callback(struct notifier_block *nfb,
 			       unsigned long action, void *hcpu)
 {
@@ -653,7 +858,7 @@
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		err = __padata_add_cpu(pinst, cpu);
@@ -664,7 +869,7 @@
  
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		err = __padata_remove_cpu(pinst, cpu);
@@ -675,7 +880,7 @@
  
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		__padata_remove_cpu(pinst, cpu);
@@ -683,7 +888,7 @@
  
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+		if (!pinst_has_cpu(pinst, cpu))
 			break;
 		mutex_lock(&pinst->lock);
 		__padata_add_cpu(pinst, cpu);
  
  
  
  
  
  
  
  
  
  
@@ -694,36 +899,202 @@
 }
 #endif
  
+static void __padata_free(struct padata_instance *pinst)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
+	padata_stop(pinst);
+	padata_free_pd(pinst->pd);
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
+	kfree(pinst);
+}
+
+#define kobj2pinst(_kobj)					\
+	container_of(_kobj, struct padata_instance, kobj)
+#define attr2pentry(_attr)					\
+	container_of(_attr, struct padata_sysfs_entry, attr)
+
+static void padata_sysfs_release(struct kobject *kobj)
+{
+	struct padata_instance *pinst = kobj2pinst(kobj);
+	__padata_free(pinst);
+}
+
+struct padata_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
+	ssize_t (*store)(struct padata_instance *, struct attribute *,
+			 const char *, size_t);
+};
+
+static ssize_t show_cpumask(struct padata_instance *pinst,
+			    struct attribute *attr,  char *buf)
+{
+	struct cpumask *cpumask;
+	ssize_t len;
+
+	mutex_lock(&pinst->lock);
+	if (!strcmp(attr->name, "serial_cpumask"))
+		cpumask = pinst->cpumask.cbcpu;
+	else
+		cpumask = pinst->cpumask.pcpu;
+
+	len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask),
+			       nr_cpu_ids);
+	if (PAGE_SIZE - len < 2)
+		len = -EINVAL;
+	else
+		len += sprintf(buf + len, "\n");
+
+	mutex_unlock(&pinst->lock);
+	return len;
+}
+
+static ssize_t store_cpumask(struct padata_instance *pinst,
+			     struct attribute *attr,
+			     const char *buf, size_t count)
+{
+	cpumask_var_t new_cpumask;
+	ssize_t ret;
+	int mask_type;
+
+	if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
+			   nr_cpumask_bits);
+	if (ret < 0)
+		goto out;
+
+	mask_type = !strcmp(attr->name, "serial_cpumask") ?
+		PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
+	ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
+	if (!ret)
+		ret = count;
+
+out:
+	free_cpumask_var(new_cpumask);
+	return ret;
+}
+
+#define PADATA_ATTR_RW(_name, _show_name, _store_name)		\
+	static struct padata_sysfs_entry _name##_attr =		\
+		__ATTR(_name, 0644, _show_name, _store_name)
+#define PADATA_ATTR_RO(_name, _show_name)		\
+	static struct padata_sysfs_entry _name##_attr = \
+		__ATTR(_name, 0400, _show_name, NULL)
+
+PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
+PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
+
+/*
+ * Padata sysfs provides the following objects:
+ * serial_cpumask   [RW] - cpumask for serial workers
+ * parallel_cpumask [RW] - cpumask for parallel workers
+ */
+static struct attribute *padata_default_attrs[] = {
+	&serial_cpumask_attr.attr,
+	&parallel_cpumask_attr.attr,
+	NULL,
+};
+
+static ssize_t padata_sysfs_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct padata_instance *pinst;
+	struct padata_sysfs_entry *pentry;
+	ssize_t ret = -EIO;
+
+	pinst = kobj2pinst(kobj);
+	pentry = attr2pentry(attr);
+	if (pentry->show)
+		ret = pentry->show(pinst, attr, buf);
+
+	return ret;
+}
+
+static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct padata_instance *pinst;
+	struct padata_sysfs_entry *pentry;
+	ssize_t ret = -EIO;
+
+	pinst = kobj2pinst(kobj);
+	pentry = attr2pentry(attr);
+	if (pentry->show)
+		ret = pentry->store(pinst, attr, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops padata_sysfs_ops = {
+	.show = padata_sysfs_show,
+	.store = padata_sysfs_store,
+};
+
+static struct kobj_type padata_attr_type = {
+	.sysfs_ops = &padata_sysfs_ops,
+	.default_attrs = padata_default_attrs,
+	.release = padata_sysfs_release,
+};
+
 /**
- * padata_alloc - allocate and initialize a padata instance
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ *                         Use the cpu_possible_mask for serial and
+ *                         parallel workers.
  *
- * @cpumask: cpumask that padata uses for parallelization
  * @wq: workqueue to use for the allocated padata instance
  */
-struct padata_instance *padata_alloc(const struct cpumask *cpumask,
-				     struct workqueue_struct *wq)
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
 {
+	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
+/**
+ * padata_alloc - allocate and initialize a padata instance and specify
+ *                cpumasks for serial and parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ * @pcpumask: cpumask that will be used for padata parallelization
+ * @cbcpumask: cpumask that will be used for padata serialization
+ */
+struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+				     const struct cpumask *pcpumask,
+				     const struct cpumask *cbcpumask)
+{
 	struct padata_instance *pinst;
-	struct parallel_data *pd;
+	struct parallel_data *pd = NULL;
  
 	pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
 	if (!pinst)
 		goto err;
  
 	get_online_cpus();
+	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+		goto err_free_inst;
+	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
+		free_cpumask_var(pinst->cpumask.pcpu);
+		goto err_free_inst;
+	}
+	if (!padata_validate_cpumask(pinst, pcpumask) ||
+	    !padata_validate_cpumask(pinst, cbcpumask))
+		goto err_free_masks;
  
-	pd = padata_alloc_pd(pinst, cpumask);
+	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 	if (!pd)
-		goto err_free_inst;
+		goto err_free_masks;
  
-	if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL))
-		goto err_free_pd;
-
 	rcu_assign_pointer(pinst->pd, pd);
  
 	pinst->wq = wq;
  
-	cpumask_copy(pinst->cpumask, cpumask);
+	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
  
 	pinst->flags = 0;
  
  
@@ -735,12 +1106,15 @@
  
 	put_online_cpus();
  
+	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
+	kobject_init(&pinst->kobj, &padata_attr_type);
 	mutex_init(&pinst->lock);
  
 	return pinst;
  
-err_free_pd:
-	padata_free_pd(pd);
+err_free_masks:
+	free_cpumask_var(pinst->cpumask.pcpu);
+	free_cpumask_var(pinst->cpumask.cbcpu);
 err_free_inst:
 	kfree(pinst);
 	put_online_cpus();
@@ -756,20 +1130,7 @@
  */
 void padata_free(struct padata_instance *pinst)
 {
-	padata_stop(pinst);
-
-	synchronize_rcu();
-
-#ifdef CONFIG_HOTPLUG_CPU
-	unregister_hotcpu_notifier(&pinst->cpu_notifier);
-#endif
-	get_online_cpus();
-	padata_flush_queues(pinst->pd);
-	put_online_cpus();
-
-	padata_free_pd(pinst->pd);
-	free_cpumask_var(pinst->cpumask);
-	kfree(pinst);
+	kobject_put(&pinst->kobj);
 }
 EXPORT_SYMBOL(padata_free);
1	1	The padata parallel execution mechanism
2		-Last updated for 2.6.34
	2	+Last updated for 2.6.36
3	3
4	4	Padata is a mechanism by which the kernel can farm work out to be done in
5	5	parallel on multiple CPUs while retaining the ordering of tasks. It was
6	6
7	7
8	8
9	9
10	10
11	11
12	12
13	13
...	...	@@ -13,32 +13,87 @@
13	13
14	14	#include <linux/padata.h>
15	15
16		- struct padata_instance padata_alloc(const struct cpumask cpumask,
17		- struct workqueue_struct *wq);
	16	+ struct padata_instance padata_alloc(struct workqueue_struct wq,
	17	+ const struct cpumask *pcpumask,
	18	+ const struct cpumask *cbcpumask);
18	19
19		-The cpumask describes which processors will be used to execute work
20		-submitted to this instance. The workqueue wq is where the work will
21		-actually be done; it should be a multithreaded queue, naturally.
	20	+The pcpumask describes which processors will be used to execute work
	21	+submitted to this instance in parallel. The cbcpumask defines which
	22	+processors are allowed to use as the serialization callback processor.
	23	+The workqueue wq is where the work will actually be done; it should be
	24	+a multithreaded queue, naturally.
22	25
	26	+To allocate a padata instance with the cpu_possible_mask for both
	27	+cpumasks this helper function can be used:
	28	+
	29	+ struct padata_instance padata_alloc_possible(struct workqueue_struct wq);
	30	+
	31	+Note: Padata maintains two kinds of cpumasks internally. The user supplied
	32	+cpumasks, submitted by padata_alloc/padata_alloc_possible and the 'usable'
	33	+cpumasks. The usable cpumasks are always the subset of active cpus in the
	34	+user supplied cpumasks, these are the cpumasks padata actually use. So
	35	+it is legal to supply a cpumask to padata that contains offline cpus.
	36	+Once a offline cpu in the user supplied cpumask comes online, padata
	37	+is going to use it.
	38	+
23	39	There are functions for enabling and disabling the instance:
24	40
25		- void padata_start(struct padata_instance *pinst);
	41	+ int padata_start(struct padata_instance *pinst);
26	42	void padata_stop(struct padata_instance *pinst);
27	43
28		-These functions literally do nothing beyond setting or clearing the
29		-"padata_start() was called" flag; if that flag is not set, other functions
30		-will refuse to work.
	44	+These functions are setting or clearing the "PADATA_INIT" flag;
	45	+if that flag is not set, other functions will refuse to work.
	46	+padata_start returns zero on success (flag set) or -EINVAL if the
	47	+padata cpumask contains no active cpu (flag not set).
	48	+padata_stop clears the flag and blocks until the padata instance
	49	+is unused.
31	50
32	51	The list of CPUs to be used can be adjusted with these functions:
33	52
34		- int padata_set_cpumask(struct padata_instance *pinst,
	53	+ int padata_set_cpumasks(struct padata_instance *pinst,
	54	+ cpumask_var_t pcpumask,
	55	+ cpumask_var_t cbcpumask);
	56	+ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
35	57	cpumask_var_t cpumask);
36		- int padata_add_cpu(struct padata_instance *pinst, int cpu);
37		- int padata_remove_cpu(struct padata_instance *pinst, int cpu);
	58	+ int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
	59	+ int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
38	60
39		-Changing the CPU mask has the look of an expensive operation, though, so it
40		-probably should not be done with great frequency.
	61	+Changing the CPU masks are expensive operations, though, so it should not be
	62	+done with great frequency.
41	63
	64	+It's possible to change both cpumasks of a padata instance with
	65	+padata_set_cpumasks by specifying the cpumasks for parallel execution (pcpumask)
	66	+and for the serial callback function (cbcpumask). padata_set_cpumask is to
	67	+change just one of the cpumasks. Here cpumask_type is one of PADATA_CPU_SERIAL,
	68	+PADATA_CPU_PARALLEL and cpumask specifies the new cpumask to use.
	69	+To simply add or remove one cpu from a certain cpumask the functions
	70	+padata_add_cpu/padata_remove_cpu are used. cpu specifies the cpu to add or
	71	+remove and mask is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL.
	72	+
	73	+If a user is interested in padata cpumask changes, he can register to
	74	+the padata cpumask change notifier:
	75	+
	76	+ int padata_register_cpumask_notifier(struct padata_instance *pinst,
	77	+ struct notifier_block *nblock);
	78	+
	79	+To unregister from that notifier:
	80	+
	81	+ int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
	82	+ struct notifier_block *nblock);
	83	+
	84	+The padata cpumask change notifier notifies about changes of the usable
	85	+cpumasks, i.e. the subset of active cpus in the user supplied cpumask.
	86	+
	87	+Padata calls the notifier chain with:
	88	+
	89	+ blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
	90	+ notification_mask,
	91	+ &pd_new->cpumask);
	92	+
	93	+Here cpumask_change_notifier is registered notifier, notification_mask
	94	+is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL and cpumask is a pointer
	95	+to a struct padata_cpumask that contains the new cpumask informations.
	96	+
42	97	Actually submitting work to the padata instance requires the creation of a
43	98	padata_priv structure:
44	99
...	...	@@ -50,7 +105,7 @@
50	105
51	106	This structure will almost certainly be embedded within some larger
52	107	structure specific to the work to be done. Most its fields are private to
53		-padata, but the structure should be zeroed at initialization time, and the
	108	+padata, but the structure should be zeroed at initialisation time, and the
54	109	parallel() and serial() functions should be provided. Those functions will
55	110	be called in the process of getting the work done as we will see
56	111	momentarily.
...	...	@@ -63,12 +118,10 @@
63	118	The pinst and padata structures must be set up as described above; cb_cpu
64	119	specifies which CPU will be used for the final callback when the work is
65	120	done; it must be in the current instance's CPU mask. The return value from
66		-padata_do_parallel() is a little strange; zero is an error return
67		-indicating that the caller forgot the padata_start() formalities. -EBUSY
68		-means that somebody, somewhere else is messing with the instance's CPU
69		-mask, while -EINVAL is a complaint about cb_cpu not being in that CPU mask.
70		-If all goes well, this function will return -EINPROGRESS, indicating that
71		-the work is in progress.
	121	+padata_do_parallel() is zero on success, indicating that the work is in
	122	+progress. -EBUSY means that somebody, somewhere else is messing with the
	123	+instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being
	124	+in that CPU mask or about a not running instance.
72	125
73	126	Each task submitted to padata_do_parallel() will, in turn, be passed to
74	127	exactly one call to the above-mentioned parallel() function, on one CPU, so
...	...	@@ -5,7 +5,7 @@
5	5	obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
6	6	obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
7	7	obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
8		-obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o des_check_key.o
	8	+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
9	9	obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
10	10	obj-$(CONFIG_S390_PRNG) += prng.o
...	...	@@ -15,5 +15,5 @@
15	15
16	16	extern int crypto_des_check_key(const u8, unsigned int, u32);
17	17
18		-#endif //__CRYPTO_DES_H__
	18	+#endif /__CRYPTO_DES_H__/
...	...	@@ -14,32 +14,21 @@
14	14	*
15	15	*/
16	16
17		-#include <crypto/algapi.h>
18	17	#include <linux/init.h>
19	18	#include <linux/module.h>
	19	+#include <linux/crypto.h>
	20	+#include <crypto/algapi.h>
	21	+#include <crypto/des.h>
20	22
21	23	#include "crypt_s390.h"
22		-#include "crypto_des.h"
23	24
24		-#define DES_BLOCK_SIZE 8
25		-#define DES_KEY_SIZE 8
26		-
27		-#define DES3_128_KEY_SIZE (2 * DES_KEY_SIZE)
28		-#define DES3_128_BLOCK_SIZE DES_BLOCK_SIZE
29		-
30	25	#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE)
31		-#define DES3_192_BLOCK_SIZE DES_BLOCK_SIZE
32	26
33	27	struct crypt_s390_des_ctx {
34	28	u8 iv[DES_BLOCK_SIZE];
35	29	u8 key[DES_KEY_SIZE];
36	30	};
37	31
38		-struct crypt_s390_des3_128_ctx {
39		- u8 iv[DES_BLOCK_SIZE];
40		- u8 key[DES3_128_KEY_SIZE];
41		-};
42		-
43	32	struct crypt_s390_des3_192_ctx {
44	33	u8 iv[DES_BLOCK_SIZE];
45	34	u8 key[DES3_192_KEY_SIZE];
46	35
...	...	@@ -50,13 +39,16 @@
50	39	{
51	40	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
52	41	u32 *flags = &tfm->crt_flags;
53		- int ret;
	42	+ u32 tmp[DES_EXPKEY_WORDS];
54	43
55		- /* test if key is valid (not a weak key) */
56		- ret = crypto_des_check_key(key, keylen, flags);
57		- if (ret == 0)
58		- memcpy(dctx->key, key, keylen);
59		- return ret;
	44	+ /* check for weak keys */
	45	+ if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
	46	+ *flags \|= CRYPTO_TFM_RES_WEAK_KEY;
	47	+ return -EINVAL;
	48	+ }
	49	+
	50	+ memcpy(dctx->key, key, keylen);
	51	+ return 0;
60	52	}
61	53
62	54	static void des_encrypt(struct crypto_tfm tfm, u8 out, const u8 *in)
...	...	@@ -237,165 +229,6 @@
237	229	* complementation keys. Any weakness is obviated by the use of
238	230	* multiple keys.
239	231	*
240		- * However, if the two independent 64-bit keys are equal,
241		- * then the DES3 operation is simply the same as DES.
242		- * Implementers MUST reject keys that exhibit this property.
243		- *
244		- */
245		-static int des3_128_setkey(struct crypto_tfm tfm, const u8 key,
246		- unsigned int keylen)
247		-{
248		- int i, ret;
249		- struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
250		- const u8 *temp_key = key;
251		- u32 *flags = &tfm->crt_flags;
252		-
253		- if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE)) &&
254		- (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
255		- *flags \|= CRYPTO_TFM_RES_WEAK_KEY;
256		- return -EINVAL;
257		- }
258		- for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) {
259		- ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
260		- if (ret < 0)
261		- return ret;
262		- }
263		- memcpy(dctx->key, key, keylen);
264		- return 0;
265		-}
266		-
267		-static void des3_128_encrypt(struct crypto_tfm tfm, u8 dst, const u8 *src)
268		-{
269		- struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
270		-
271		- crypt_s390_km(KM_TDEA_128_ENCRYPT, dctx->key, dst, (void*)src,
272		- DES3_128_BLOCK_SIZE);
273		-}
274		-
275		-static void des3_128_decrypt(struct crypto_tfm tfm, u8 dst, const u8 *src)
276		-{
277		- struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
278		-
279		- crypt_s390_km(KM_TDEA_128_DECRYPT, dctx->key, dst, (void*)src,
280		- DES3_128_BLOCK_SIZE);
281		-}
282		-
283		-static struct crypto_alg des3_128_alg = {
284		- .cra_name = "des3_ede128",
285		- .cra_driver_name = "des3_ede128-s390",
286		- .cra_priority = CRYPT_S390_PRIORITY,
287		- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
288		- .cra_blocksize = DES3_128_BLOCK_SIZE,
289		- .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx),
290		- .cra_module = THIS_MODULE,
291		- .cra_list = LIST_HEAD_INIT(des3_128_alg.cra_list),
292		- .cra_u = {
293		- .cipher = {
294		- .cia_min_keysize = DES3_128_KEY_SIZE,
295		- .cia_max_keysize = DES3_128_KEY_SIZE,
296		- .cia_setkey = des3_128_setkey,
297		- .cia_encrypt = des3_128_encrypt,
298		- .cia_decrypt = des3_128_decrypt,
299		- }
300		- }
301		-};
302		-
303		-static int ecb_des3_128_encrypt(struct blkcipher_desc *desc,
304		- struct scatterlist *dst,
305		- struct scatterlist *src, unsigned int nbytes)
306		-{
307		- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
308		- struct blkcipher_walk walk;
309		-
310		- blkcipher_walk_init(&walk, dst, src, nbytes);
311		- return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk);
312		-}
313		-
314		-static int ecb_des3_128_decrypt(struct blkcipher_desc *desc,
315		- struct scatterlist *dst,
316		- struct scatterlist *src, unsigned int nbytes)
317		-{
318		- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
319		- struct blkcipher_walk walk;
320		-
321		- blkcipher_walk_init(&walk, dst, src, nbytes);
322		- return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk);
323		-}
324		-
325		-static struct crypto_alg ecb_des3_128_alg = {
326		- .cra_name = "ecb(des3_ede128)",
327		- .cra_driver_name = "ecb-des3_ede128-s390",
328		- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
329		- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
330		- .cra_blocksize = DES3_128_BLOCK_SIZE,
331		- .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx),
332		- .cra_type = &crypto_blkcipher_type,
333		- .cra_module = THIS_MODULE,
334		- .cra_list = LIST_HEAD_INIT(
335		- ecb_des3_128_alg.cra_list),
336		- .cra_u = {
337		- .blkcipher = {
338		- .min_keysize = DES3_128_KEY_SIZE,
339		- .max_keysize = DES3_128_KEY_SIZE,
340		- .setkey = des3_128_setkey,
341		- .encrypt = ecb_des3_128_encrypt,
342		- .decrypt = ecb_des3_128_decrypt,
343		- }
344		- }
345		-};
346		-
347		-static int cbc_des3_128_encrypt(struct blkcipher_desc *desc,
348		- struct scatterlist *dst,
349		- struct scatterlist *src, unsigned int nbytes)
350		-{
351		- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
352		- struct blkcipher_walk walk;
353		-
354		- blkcipher_walk_init(&walk, dst, src, nbytes);
355		- return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk);
356		-}
357		-
358		-static int cbc_des3_128_decrypt(struct blkcipher_desc *desc,
359		- struct scatterlist *dst,
360		- struct scatterlist *src, unsigned int nbytes)
361		-{
362		- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
363		- struct blkcipher_walk walk;
364		-
365		- blkcipher_walk_init(&walk, dst, src, nbytes);
366		- return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk);
367		-}
368		-
369		-static struct crypto_alg cbc_des3_128_alg = {
370		- .cra_name = "cbc(des3_ede128)",
371		- .cra_driver_name = "cbc-des3_ede128-s390",
372		- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
373		- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
374		- .cra_blocksize = DES3_128_BLOCK_SIZE,
375		- .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx),
376		- .cra_type = &crypto_blkcipher_type,
377		- .cra_module = THIS_MODULE,
378		- .cra_list = LIST_HEAD_INIT(
379		- cbc_des3_128_alg.cra_list),
380		- .cra_u = {
381		- .blkcipher = {
382		- .min_keysize = DES3_128_KEY_SIZE,
383		- .max_keysize = DES3_128_KEY_SIZE,
384		- .ivsize = DES3_128_BLOCK_SIZE,
385		- .setkey = des3_128_setkey,
386		- .encrypt = cbc_des3_128_encrypt,
387		- .decrypt = cbc_des3_128_decrypt,
388		- }
389		- }
390		-};
391		-
392		-/*
393		- * RFC2451:
394		- *
395		- * For DES-EDE3, there is no known need to reject weak or
396		- * complementation keys. Any weakness is obviated by the use of
397		- * multiple keys.
398		- *
399	232	* However, if the first two or last two independent 64-bit keys are
400	233	* equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
401	234	* same as DES. Implementers MUST reject keys that exhibit this
402	235
...	...	@@ -405,9 +238,7 @@
405	238	static int des3_192_setkey(struct crypto_tfm tfm, const u8 key,
406	239	unsigned int keylen)
407	240	{
408		- int i, ret;
409	241	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
410		- const u8 *temp_key = key;
411	242	u32 *flags = &tfm->crt_flags;
412	243
413	244	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
...	...	@@ -417,11 +248,6 @@
417	248	*flags \|= CRYPTO_TFM_RES_WEAK_KEY;
418	249	return -EINVAL;
419	250	}
420		- for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) {
421		- ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
422		- if (ret < 0)
423		- return ret;
424		- }
425	251	memcpy(dctx->key, key, keylen);
426	252	return 0;
427	253	}
...	...	@@ -431,7 +257,7 @@
431	257	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
432	258
433	259	crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
434		- DES3_192_BLOCK_SIZE);
	260	+ DES_BLOCK_SIZE);
435	261	}
436	262
437	263	static void des3_192_decrypt(struct crypto_tfm tfm, u8 dst, const u8 *src)
...	...	@@ -439,7 +265,7 @@
439	265	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
440	266
441	267	crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
442		- DES3_192_BLOCK_SIZE);
	268	+ DES_BLOCK_SIZE);
443	269	}
444	270
445	271	static struct crypto_alg des3_192_alg = {
...	...	@@ -447,7 +273,7 @@
447	273	.cra_driver_name = "des3_ede-s390",
448	274	.cra_priority = CRYPT_S390_PRIORITY,
449	275	.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
450		- .cra_blocksize = DES3_192_BLOCK_SIZE,
	276	+ .cra_blocksize = DES_BLOCK_SIZE,
451	277	.cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
452	278	.cra_module = THIS_MODULE,
453	279	.cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list),
...	...	@@ -489,7 +315,7 @@
489	315	.cra_driver_name = "ecb-des3_ede-s390",
490	316	.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
491	317	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
492		- .cra_blocksize = DES3_192_BLOCK_SIZE,
	318	+ .cra_blocksize = DES_BLOCK_SIZE,
493	319	.cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
494	320	.cra_type = &crypto_blkcipher_type,
495	321	.cra_module = THIS_MODULE,
...	...	@@ -533,7 +359,7 @@
533	359	.cra_driver_name = "cbc-des3_ede-s390",
534	360	.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
535	361	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
536		- .cra_blocksize = DES3_192_BLOCK_SIZE,
	362	+ .cra_blocksize = DES_BLOCK_SIZE,
537	363	.cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
538	364	.cra_type = &crypto_blkcipher_type,
539	365	.cra_module = THIS_MODULE,
...	...	@@ -543,7 +369,7 @@
543	369	.blkcipher = {
544	370	.min_keysize = DES3_192_KEY_SIZE,
545	371	.max_keysize = DES3_192_KEY_SIZE,
546		- .ivsize = DES3_192_BLOCK_SIZE,
	372	+ .ivsize = DES_BLOCK_SIZE,
547	373	.setkey = des3_192_setkey,
548	374	.encrypt = cbc_des3_192_encrypt,
549	375	.decrypt = cbc_des3_192_decrypt,
550	376
...	...	@@ -553,10 +379,9 @@
553	379
554	380	static int des_s390_init(void)
555	381	{
556		- int ret = 0;
	382	+ int ret;
557	383
558	384	if (!crypt_s390_func_available(KM_DEA_ENCRYPT) \|\|
559		- !crypt_s390_func_available(KM_TDEA_128_ENCRYPT) \|\|
560	385	!crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
561	386	return -EOPNOTSUPP;
562	387
...	...	@@ -569,17 +394,6 @@
569	394	ret = crypto_register_alg(&cbc_des_alg);
570	395	if (ret)
571	396	goto cbc_des_err;
572		-
573		- ret = crypto_register_alg(&des3_128_alg);
574		- if (ret)
575		- goto des3_128_err;
576		- ret = crypto_register_alg(&ecb_des3_128_alg);
577		- if (ret)
578		- goto ecb_des3_128_err;
579		- ret = crypto_register_alg(&cbc_des3_128_alg);
580		- if (ret)
581		- goto cbc_des3_128_err;
582		-
583	397	ret = crypto_register_alg(&des3_192_alg);
584	398	if (ret)
585	399	goto des3_192_err;
...	...	@@ -589,7 +403,6 @@
589	403	ret = crypto_register_alg(&cbc_des3_192_alg);
590	404	if (ret)
591	405	goto cbc_des3_192_err;
592		-
593	406	out:
594	407	return ret;
595	408
...	...	@@ -598,12 +411,6 @@
598	411	ecb_des3_192_err:
599	412	crypto_unregister_alg(&des3_192_alg);
600	413	des3_192_err:
601		- crypto_unregister_alg(&cbc_des3_128_alg);
602		-cbc_des3_128_err:
603		- crypto_unregister_alg(&ecb_des3_128_alg);
604		-ecb_des3_128_err:
605		- crypto_unregister_alg(&des3_128_alg);
606		-des3_128_err:
607	414	crypto_unregister_alg(&cbc_des_alg);
608	415	cbc_des_err:
609	416	crypto_unregister_alg(&ecb_des_alg);
610	417
611	418
...	...	@@ -613,21 +420,18 @@
613	420	goto out;
614	421	}
615	422
616		-static void __exit des_s390_fini(void)
	423	+static void __exit des_s390_exit(void)
617	424	{
618	425	crypto_unregister_alg(&cbc_des3_192_alg);
619	426	crypto_unregister_alg(&ecb_des3_192_alg);
620	427	crypto_unregister_alg(&des3_192_alg);
621		- crypto_unregister_alg(&cbc_des3_128_alg);
622		- crypto_unregister_alg(&ecb_des3_128_alg);
623		- crypto_unregister_alg(&des3_128_alg);
624	428	crypto_unregister_alg(&cbc_des_alg);
625	429	crypto_unregister_alg(&ecb_des_alg);
626	430	crypto_unregister_alg(&des_alg);
627	431	}
628	432
629	433	module_init(des_s390_init);
630		-module_exit(des_s390_fini);
	434	+module_exit(des_s390_exit);
631	435
632	436	MODULE_ALIAS("des");
633	437	MODULE_ALIAS("des3_ede");
...	...	@@ -80,6 +80,11 @@
80	80
81	81	config CRYPTO_PCOMP
82	82	tristate
	83	+ select CRYPTO_PCOMP2
	84	+ select CRYPTO_ALGAPI
	85	+
	86	+config CRYPTO_PCOMP2
	87	+ tristate
83	88	select CRYPTO_ALGAPI2
84	89
85	90	config CRYPTO_MANAGER
...	...	@@ -94,7 +99,15 @@
94	99	select CRYPTO_AEAD2
95	100	select CRYPTO_HASH2
96	101	select CRYPTO_BLKCIPHER2
97		- select CRYPTO_PCOMP
	102	+ select CRYPTO_PCOMP2
	103	+
	104	+config CRYPTO_MANAGER_TESTS
	105	+ bool "Run algolithms' self-tests"
	106	+ default y
	107	+ depends on CRYPTO_MANAGER2
	108	+ help
	109	+ Run cryptomanager's tests for the new crypto algorithms being
	110	+ registered.
98	111
99	112	config CRYPTO_GF128MUL
100	113	tristate "GF(2^128) multiplication functions (EXPERIMENTAL)"
...	...	@@ -26,7 +26,7 @@
26	26	crypto_hash-objs += shash.o
27	27	obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
28	28
29		-obj-$(CONFIG_CRYPTO_PCOMP) += pcompress.o
	29	+obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
30	30
31	31	cryptomgr-objs := algboss.o testmgr.o
32	32
...	...	@@ -61,7 +61,7 @@
61	61	obj-$(CONFIG_CRYPTO_DES) += des_generic.o
62	62	obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
63	63	obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
64		-obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
	64	+obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
65	65	obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
66	66	obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
67	67	obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
...	...	@@ -206,6 +206,7 @@
206	206	return NOTIFY_OK;
207	207	}
208	208
	209	+#ifdef CONFIG_CRYPTO_MANAGER_TESTS
209	210	static int cryptomgr_test(void *data)
210	211	{
211	212	struct crypto_test_param *param = data;
...	...	@@ -266,6 +267,7 @@
266	267	err:
267	268	return NOTIFY_OK;
268	269	}
	270	+#endif /* CONFIG_CRYPTO_MANAGER_TESTS */
269	271
270	272	static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
271	273	void *data)
272	274
...	...	@@ -273,8 +275,10 @@
273	275	switch (msg) {
274	276	case CRYPTO_MSG_ALG_REQUEST:
275	277	return cryptomgr_schedule_probe(data);
	278	+#ifdef CONFIG_CRYPTO_MANAGER_TESTS
276	279	case CRYPTO_MSG_ALG_REGISTER:
277	280	return cryptomgr_schedule_test(data);
	281	+#endif
278	282	}
279	283
280	284	return NOTIFY_DONE;
...	...	@@ -616,7 +616,7 @@
616	616	auth = ahash_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
617	617	CRYPTO_ALG_TYPE_AHASH_MASK);
618	618	if (IS_ERR(auth))
619		- return ERR_PTR(PTR_ERR(auth));
	619	+ return ERR_CAST(auth);
620	620
621	621	auth_base = &auth->base;
622	622
...	...	@@ -185,7 +185,7 @@
185	185	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
186	186	CRYPTO_ALG_TYPE_MASK);
187	187	if (IS_ERR(alg))
188		- return ERR_PTR(PTR_ERR(alg));
	188	+ return ERR_CAST(alg);
189	189
190	190	/* Block size must be >= 4 bytes. */
191	191	err = -EINVAL;
...	...	@@ -24,13 +24,41 @@
24	24	#include <linux/init.h>
25	25	#include <linux/module.h>
26	26	#include <linux/slab.h>
	27	+#include <linux/notifier.h>
	28	+#include <linux/kobject.h>
	29	+#include <linux/cpu.h>
27	30	#include <crypto/pcrypt.h>
28	31
29		-static struct padata_instance *pcrypt_enc_padata;
30		-static struct padata_instance *pcrypt_dec_padata;
31		-static struct workqueue_struct *encwq;
32		-static struct workqueue_struct *decwq;
	32	+struct padata_pcrypt {
	33	+ struct padata_instance *pinst;
	34	+ struct workqueue_struct *wq;
33	35
	36	+ /*
	37	+ * Cpumask for callback CPUs. It should be
	38	+ * equal to serial cpumask of corresponding padata instance,
	39	+ * so it is updated when padata notifies us about serial
	40	+ * cpumask change.
	41	+ *
	42	+ * cb_cpumask is protected by RCU. This fact prevents us from
	43	+ * using cpumask_var_t directly because the actual type of
	44	+ * cpumsak_var_t depends on kernel configuration(particularly on
	45	+ * CONFIG_CPUMASK_OFFSTACK macro). Depending on the configuration
	46	+ * cpumask_var_t may be either a pointer to the struct cpumask
	47	+ * or a variable allocated on the stack. Thus we can not safely use
	48	+ * cpumask_var_t with RCU operations such as rcu_assign_pointer or
	49	+ * rcu_dereference. So cpumask_var_t is wrapped with struct
	50	+ * pcrypt_cpumask which makes possible to use it with RCU.
	51	+ */
	52	+ struct pcrypt_cpumask {
	53	+ cpumask_var_t mask;
	54	+ } *cb_cpumask;
	55	+ struct notifier_block nblock;
	56	+};
	57	+
	58	+static struct padata_pcrypt pencrypt;
	59	+static struct padata_pcrypt pdecrypt;
	60	+static struct kset *pcrypt_kset;
	61	+
34	62	struct pcrypt_instance_ctx {
35	63	struct crypto_spawn spawn;
36	64	unsigned int tfm_count;
37	65
38	66
39	67
40	68
41	69
42	70
...	...	@@ -42,25 +70,32 @@
42	70	};
43	71
44	72	static int pcrypt_do_parallel(struct padata_priv padata, unsigned int cb_cpu,
45		- struct padata_instance *pinst)
	73	+ struct padata_pcrypt *pcrypt)
46	74	{
47	75	unsigned int cpu_index, cpu, i;
	76	+ struct pcrypt_cpumask *cpumask;
48	77
49	78	cpu = *cb_cpu;
50	79
51		- if (cpumask_test_cpu(cpu, cpu_active_mask))
	80	+ rcu_read_lock_bh();
	81	+ cpumask = rcu_dereference(pcrypt->cb_cpumask);
	82	+ if (cpumask_test_cpu(cpu, cpumask->mask))
52	83	goto out;
53	84
54		- cpu_index = cpu % cpumask_weight(cpu_active_mask);
	85	+ if (!cpumask_weight(cpumask->mask))
	86	+ goto out;
55	87
56		- cpu = cpumask_first(cpu_active_mask);
	88	+ cpu_index = cpu % cpumask_weight(cpumask->mask);
	89	+
	90	+ cpu = cpumask_first(cpumask->mask);
57	91	for (i = 0; i < cpu_index; i++)
58		- cpu = cpumask_next(cpu, cpu_active_mask);
	92	+ cpu = cpumask_next(cpu, cpumask->mask);
59	93
60	94	*cb_cpu = cpu;
61	95
62	96	out:
63		- return padata_do_parallel(pinst, padata, cpu);
	97	+ rcu_read_unlock_bh();
	98	+ return padata_do_parallel(pcrypt->pinst, padata, cpu);
64	99	}
65	100
66	101	static int pcrypt_aead_setkey(struct crypto_aead *parent,
...	...	@@ -142,11 +177,9 @@
142	177	req->cryptlen, req->iv);
143	178	aead_request_set_assoc(creq, req->assoc, req->assoclen);
144	179
145		- err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
146		- if (err)
147		- return err;
148		- else
149		- err = crypto_aead_encrypt(creq);
	180	+ err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
	181	+ if (!err)
	182	+ return -EINPROGRESS;
150	183
151	184	return err;
152	185	}
...	...	@@ -186,11 +219,9 @@
186	219	req->cryptlen, req->iv);
187	220	aead_request_set_assoc(creq, req->assoc, req->assoclen);
188	221
189		- err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_dec_padata);
190		- if (err)
191		- return err;
192		- else
193		- err = crypto_aead_decrypt(creq);
	222	+ err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
	223	+ if (!err)
	224	+ return -EINPROGRESS;
194	225
195	226	return err;
196	227	}
...	...	@@ -232,11 +263,9 @@
232	263	aead_givcrypt_set_assoc(creq, areq->assoc, areq->assoclen);
233	264	aead_givcrypt_set_giv(creq, req->giv, req->seq);
234	265
235		- err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
236		- if (err)
237		- return err;
238		- else
239		- err = crypto_aead_givencrypt(creq);
	266	+ err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
	267	+ if (!err)
	268	+ return -EINPROGRESS;
240	269
241	270	return err;
242	271	}
...	...	@@ -376,6 +405,115 @@
376	405	kfree(inst);
377	406	}
378	407
	408	+static int pcrypt_cpumask_change_notify(struct notifier_block *self,
	409	+ unsigned long val, void *data)
	410	+{
	411	+ struct padata_pcrypt *pcrypt;
	412	+ struct pcrypt_cpumask new_mask, old_mask;
	413	+ struct padata_cpumask cpumask = (struct padata_cpumask )data;
	414	+
	415	+ if (!(val & PADATA_CPU_SERIAL))
	416	+ return 0;
	417	+
	418	+ pcrypt = container_of(self, struct padata_pcrypt, nblock);
	419	+ new_mask = kmalloc(sizeof(*new_mask), GFP_KERNEL);
	420	+ if (!new_mask)
	421	+ return -ENOMEM;
	422	+ if (!alloc_cpumask_var(&new_mask->mask, GFP_KERNEL)) {
	423	+ kfree(new_mask);
	424	+ return -ENOMEM;
	425	+ }
	426	+
	427	+ old_mask = pcrypt->cb_cpumask;
	428	+
	429	+ cpumask_copy(new_mask->mask, cpumask->cbcpu);
	430	+ rcu_assign_pointer(pcrypt->cb_cpumask, new_mask);
	431	+ synchronize_rcu_bh();
	432	+
	433	+ free_cpumask_var(old_mask->mask);
	434	+ kfree(old_mask);
	435	+ return 0;
	436	+}
	437	+
	438	+static int pcrypt_sysfs_add(struct padata_instance pinst, const char name)
	439	+{
	440	+ int ret;
	441	+
	442	+ pinst->kobj.kset = pcrypt_kset;
	443	+ ret = kobject_add(&pinst->kobj, NULL, name);
	444	+ if (!ret)
	445	+ kobject_uevent(&pinst->kobj, KOBJ_ADD);
	446	+
	447	+ return ret;
	448	+}
	449	+
	450	+static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
	451	+ const char *name)
	452	+{
	453	+ int ret = -ENOMEM;
	454	+ struct pcrypt_cpumask *mask;
	455	+
	456	+ get_online_cpus();
	457	+
	458	+ pcrypt->wq = create_workqueue(name);
	459	+ if (!pcrypt->wq)
	460	+ goto err;
	461	+
	462	+ pcrypt->pinst = padata_alloc_possible(pcrypt->wq);
	463	+ if (!pcrypt->pinst)
	464	+ goto err_destroy_workqueue;
	465	+
	466	+ mask = kmalloc(sizeof(*mask), GFP_KERNEL);
	467	+ if (!mask)
	468	+ goto err_free_padata;
	469	+ if (!alloc_cpumask_var(&mask->mask, GFP_KERNEL)) {
	470	+ kfree(mask);
	471	+ goto err_free_padata;
	472	+ }
	473	+
	474	+ cpumask_and(mask->mask, cpu_possible_mask, cpu_active_mask);
	475	+ rcu_assign_pointer(pcrypt->cb_cpumask, mask);
	476	+
	477	+ pcrypt->nblock.notifier_call = pcrypt_cpumask_change_notify;
	478	+ ret = padata_register_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
	479	+ if (ret)
	480	+ goto err_free_cpumask;
	481	+
	482	+ ret = pcrypt_sysfs_add(pcrypt->pinst, name);
	483	+ if (ret)
	484	+ goto err_unregister_notifier;
	485	+
	486	+ put_online_cpus();
	487	+
	488	+ return ret;
	489	+
	490	+err_unregister_notifier:
	491	+ padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
	492	+err_free_cpumask:
	493	+ free_cpumask_var(mask->mask);
	494	+ kfree(mask);
	495	+err_free_padata:
	496	+ padata_free(pcrypt->pinst);
	497	+err_destroy_workqueue:
	498	+ destroy_workqueue(pcrypt->wq);
	499	+err:
	500	+ put_online_cpus();
	501	+
	502	+ return ret;
	503	+}
	504	+
	505	+static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
	506	+{
	507	+ kobject_put(&pcrypt->pinst->kobj);
	508	+ free_cpumask_var(pcrypt->cb_cpumask->mask);
	509	+ kfree(pcrypt->cb_cpumask);
	510	+
	511	+ padata_stop(pcrypt->pinst);
	512	+ padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
	513	+ destroy_workqueue(pcrypt->wq);
	514	+ padata_free(pcrypt->pinst);
	515	+}
	516	+
379	517	static struct crypto_template pcrypt_tmpl = {
380	518	.name = "pcrypt",
381	519	.alloc = pcrypt_alloc,
382	520
383	521
384	522
385	523
386	524
387	525
388	526
389	527
...	...	@@ -385,52 +523,39 @@
385	523
386	524	static int __init pcrypt_init(void)
387	525	{
388		- encwq = create_workqueue("pencrypt");
389		- if (!encwq)
	526	+ int err = -ENOMEM;
	527	+
	528	+ pcrypt_kset = kset_create_and_add("pcrypt", NULL, kernel_kobj);
	529	+ if (!pcrypt_kset)
390	530	goto err;
391	531
392		- decwq = create_workqueue("pdecrypt");
393		- if (!decwq)
394		- goto err_destroy_encwq;
	532	+ err = pcrypt_init_padata(&pencrypt, "pencrypt");
	533	+ if (err)
	534	+ goto err_unreg_kset;
395	535
	536	+ err = pcrypt_init_padata(&pdecrypt, "pdecrypt");
	537	+ if (err)
	538	+ goto err_deinit_pencrypt;
396	539
397		- pcrypt_enc_padata = padata_alloc(cpu_possible_mask, encwq);
398		- if (!pcrypt_enc_padata)
399		- goto err_destroy_decwq;
	540	+ padata_start(pencrypt.pinst);
	541	+ padata_start(pdecrypt.pinst);
400	542
401		- pcrypt_dec_padata = padata_alloc(cpu_possible_mask, decwq);
402		- if (!pcrypt_dec_padata)
403		- goto err_free_padata;
404		-
405		- padata_start(pcrypt_enc_padata);
406		- padata_start(pcrypt_dec_padata);
407		-
408	543	return crypto_register_template(&pcrypt_tmpl);
409	544
410		-err_free_padata:
411		- padata_free(pcrypt_enc_padata);
412		-
413		-err_destroy_decwq:
414		- destroy_workqueue(decwq);
415		-
416		-err_destroy_encwq:
417		- destroy_workqueue(encwq);
418		-
	545	+err_deinit_pencrypt:
	546	+ pcrypt_fini_padata(&pencrypt);
	547	+err_unreg_kset:
	548	+ kset_unregister(pcrypt_kset);
419	549	err:
420		- return -ENOMEM;
	550	+ return err;
421	551	}
422	552
423	553	static void __exit pcrypt_exit(void)
424	554	{
425		- padata_stop(pcrypt_enc_padata);
426		- padata_stop(pcrypt_dec_padata);
	555	+ pcrypt_fini_padata(&pencrypt);
	556	+ pcrypt_fini_padata(&pdecrypt);
427	557
428		- destroy_workqueue(encwq);
429		- destroy_workqueue(decwq);
430		-
431		- padata_free(pcrypt_enc_padata);
432		- padata_free(pcrypt_dec_padata);
433		-
	558	+ kset_unregister(pcrypt_kset);
434	559	crypto_unregister_template(&pcrypt_tmpl);
435	560	}
436	561