/[pkg-loop-aes]/upstream/loop-aes/current/kernel-2.4.36.diff
ViewVC logotype

Contents of /upstream/loop-aes/current/kernel-2.4.36.diff

Parent Directory Parent Directory | Revision Log Revision Log


Revision 376 - (hide annotations) (download)
Wed Sep 8 19:41:38 2004 UTC (8 years, 9 months ago) by max
Original Path: loop-aes-source/vendor/current/kernel-2.4.27.diff
File size: 200248 byte(s)
Load /tmp/tmp.9rEYKU/loop-aes-source-2.2a into
loop-aes-source/vendor/current.
1 max 278 Before this patch can be applied to kernel, drivers/block/loop.c and
2     include/linux/loop.h source files must be removed:
3    
4     rm -f drivers/block/loop.c include/linux/loop.h
5    
6 max 376 diff -urN linux-2.4.27-noloop/Documentation/Configure.help linux-2.4.27-AES/Documentation/Configure.help
7     --- linux-2.4.27-noloop/Documentation/Configure.help Sun Aug 8 14:15:08 2004
8     +++ linux-2.4.27-AES/Documentation/Configure.help Sun Aug 15 16:39:41 2004
9     @@ -620,6 +620,21 @@
10 max 21
11     If unsure, say N.
12    
13     +AES encrypted loop device support
14     +CONFIG_BLK_DEV_LOOP_AES
15     + If you want to use AES encryption algorithm to encrypt loop devices,
16     + say Y here. If you don't know what to do here, say N.
17     +
18 max 376 +loop encryption key scrubbing support
19     +CONFIG_BLK_DEV_LOOP_KEYSCRUB
20     + Loop encryption key scrubbing moves and inverts key bits in
21     + kernel RAM so that the thin oxide which forms the storage
22     + capacitor dielectric of DRAM cells is not permitted to develop
23     + detectable property. For more info, see Peter Gutmann's paper:
24     + http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
25     +
26     + Paranoid tinfoil hat crowd say Y here, everyone else say N.
27     +
28 max 21 ATA/IDE/MFM/RLL support
29     CONFIG_IDE
30     If you say Y here, your kernel will be able to manage low cost mass
31 max 376 diff -urN linux-2.4.27-noloop/drivers/block/Config.in linux-2.4.27-AES/drivers/block/Config.in
32     --- linux-2.4.27-noloop/drivers/block/Config.in Sun Aug 8 14:15:13 2004
33     +++ linux-2.4.27-AES/drivers/block/Config.in Sun Aug 15 16:39:41 2004
34     @@ -42,6 +42,10 @@
35 max 348 dep_tristate 'Promise SATA SX8 support' CONFIG_BLK_DEV_SX8 $CONFIG_PCI
36 max 21
37     tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
38     +if [ "$CONFIG_BLK_DEV_LOOP" != "n" ]; then
39     + bool ' AES encrypted loop device support' CONFIG_BLK_DEV_LOOP_AES
40 max 376 + bool ' loop encryption key scrubbing support' CONFIG_BLK_DEV_LOOP_KEYSCRUB
41 max 21 +fi
42     dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
43    
44     tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
45 max 376 diff -urN linux-2.4.27-noloop/drivers/block/loop.c linux-2.4.27-AES/drivers/block/loop.c
46     --- linux-2.4.27-noloop/drivers/block/loop.c Thu Jan 1 02:00:00 1970
47     +++ linux-2.4.27-AES/drivers/block/loop.c Sun Aug 15 16:39:41 2004
48     @@ -0,0 +1,2018 @@
49 max 278 +/*
50     + * linux/drivers/block/loop.c
51 max 254 + *
52 max 278 + * Written by Theodore Ts'o, 3/29/93
53     + *
54     + * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
55     + * permitted under the GNU General Public License.
56     + *
57     + * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
58     + * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
59     + *
60     + * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
61     + * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
62     + *
63     + * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
64     + *
65     + * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
66     + *
67     + * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
68     + *
69     + * Loadable modules and other fixes by AK, 1998
70     + *
71     + * Make real block number available to downstream transfer functions, enables
72 max 254 + * CBC (and relatives) mode encryption requiring unique IVs per data block.
73 max 278 + * Reed H. Petty, rhp@draper.net
74 max 254 + *
75 max 278 + * Maximum number of loop devices now dynamic via max_loop module parameter.
76     + * Russell Kroll <rkroll@exploits.org> 19990701
77     + *
78     + * Maximum number of loop devices when compiled-in now selectable by passing
79     + * max_loop=<1-255> to the kernel on boot.
80     + * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
81     + *
82     + * Completely rewrite request handling to be make_request_fn style and
83     + * non blocking, pushing work to a helper thread. Lots of fixes from
84     + * Al Viro too.
85     + * Jens Axboe <axboe@suse.de>, Nov 2000
86     + *
87     + * Support up to 256 loop devices
88     + * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
89     + *
90 max 21 + * AES transfer added. IV is now passed as (512 byte) sector number.
91     + * Jari Ruusu, May 18 2001
92     + *
93     + * External encryption module locking bug fixed.
94     + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
95     + *
96     + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
97     + * Jari Ruusu, September 2 2001
98 max 278 + *
99 max 21 + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
100     + * Jari Ruusu, May 23 2002
101     + *
102     + * Backported struct loop_info64 ioctls from 2.6 kernels (64 bit offsets and
103     + * 64 bit sizelimits). Added support for removing offset from IV computations.
104     + * Jari Ruusu, September 21 2003
105     + *
106     + * Added support for MD5 IV computation and multi-key operation.
107     + * Jari Ruusu, October 8 2003
108     + *
109     + *
110 max 254 + * Still To Fix:
111     + * - Advisory locking is ignored here.
112     + * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
113     + */
114 max 278 +
115     +#include <linux/config.h>
116     +#include <linux/module.h>
117     +
118     +#include <linux/sched.h>
119     +#include <linux/fs.h>
120     +#include <linux/file.h>
121     +#include <linux/stat.h>
122     +#include <linux/errno.h>
123     +#include <linux/major.h>
124     +#include <linux/wait.h>
125     +#include <linux/blk.h>
126     +#include <linux/blkpg.h>
127     +#include <linux/init.h>
128     +#include <linux/devfs_fs_kernel.h>
129     +#include <linux/smp_lock.h>
130     +#include <linux/swap.h>
131     +#include <linux/slab.h>
132 max 376 +#include <linux/spinlock.h>
133 max 278 +
134     +#include <asm/uaccess.h>
135 max 21 +#include <asm/byteorder.h>
136 max 278 +
137     +#include <linux/loop.h>
138 max 21 +#include "../misc/aes.h"
139     +#include "../misc/md5.h"
140 max 278 +
141     +#define MAJOR_NR LOOP_MAJOR
142     +
143     +static int max_loop = 8;
144     +static struct loop_device *loop_dev;
145     +static int *loop_sizes;
146     +static int *loop_blksizes;
147 max 21 +static int *loop_hardsizes;
148 max 278 +static devfs_handle_t devfs_handle; /* For the directory */
149     +
150     +#if defined(__x86_64__) && defined(CONFIG_IA32_EMULATION)
151     +# include <asm/ioctl32.h>
152     +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl)
153     +#endif
154 max 376 +#if (defined(__sparc__) || defined(__sparc64__)) && defined(CONFIG_SPARC32_COMPAT)
155     + extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *));
156     + extern int unregister_ioctl32_conversion(unsigned int cmd);
157     + extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
158     +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl)
159     +#endif
160 max 278 +
161     +/*
162     + * Transfer functions
163     + */
164     +static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
165     + char *loop_buf, int size, int real_block)
166     +{
167 max 21 + /* this code is only called from file backed loop */
168     + /* and that code expects this function to be no-op */
169 max 278 +
170 max 21 + if (current->need_resched)
171     + {set_current_state(TASK_RUNNING);schedule();}
172 max 278 + return 0;
173     +}
174     +
175     +static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
176     + char *loop_buf, int size, int real_block)
177     +{
178     + char *in, *out, *key;
179     + int i, keysize;
180     +
181     + if (cmd == READ) {
182     + in = raw_buf;
183     + out = loop_buf;
184     + } else {
185     + in = loop_buf;
186     + out = raw_buf;
187     + }
188     +
189     + key = lo->lo_encrypt_key;
190     + keysize = lo->lo_encrypt_key_size;
191     + for (i = 0; i < size; i++)
192     + *out++ = *in++ ^ key[(i & 511) % keysize];
193 max 21 + if (current->need_resched)
194     + {set_current_state(TASK_RUNNING);schedule();}
195 max 278 + return 0;
196     +}
197     +
198     +static int none_status(struct loop_device *lo, struct loop_info *info)
199     +{
200     + return 0;
201     +}
202     +
203     +static int xor_status(struct loop_device *lo, struct loop_info *info)
204     +{
205     + if (info->lo_encrypt_key_size <= 0)
206     + return -EINVAL;
207     + return 0;
208     +}
209     +
210 max 254 +struct loop_func_table none_funcs = {
211 max 278 + number: LO_CRYPT_NONE,
212     + transfer: transfer_none,
213     + init: none_status,
214     +};
215     +
216 max 254 +struct loop_func_table xor_funcs = {
217 max 278 + number: LO_CRYPT_XOR,
218     + transfer: transfer_xor,
219 max 254 + init: xor_status,
220 max 278 +};
221     +
222 max 21 +#if CONFIG_BLK_DEV_LOOP_AES
223 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
224     +# define KEY_ALLOC_COUNT 128
225     +#else
226     +# define KEY_ALLOC_COUNT 64
227     +#endif
228     +
229 max 21 +typedef struct {
230 max 376 + aes_context *keyPtr[KEY_ALLOC_COUNT];
231 max 21 + unsigned keyMask;
232 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
233     + rwlock_t rwlock;
234     + unsigned reversed;
235     + unsigned blocked;
236     + struct timer_list timer;
237     +#endif
238 max 21 +} AESmultiKey;
239     +
240 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
241     +static void keyScrubWork(AESmultiKey *m)
242     +{
243     + aes_context *a0, *a1;
244     + u_int32_t *p;
245     + int x, y, z;
246     +
247     + z = m->keyMask + 1;
248     + for(x = 0; x < z; x++) {
249     + a0 = m->keyPtr[x];
250     + a1 = m->keyPtr[x + z];
251     + memcpy(a1, a0, sizeof(aes_context));
252     + m->keyPtr[x] = a1;
253     + m->keyPtr[x + z] = a0;
254     + p = (u_int32_t *) a0;
255     + y = sizeof(aes_context) / sizeof(u_int32_t);
256     + while(y > 0) {
257     + *p ^= 0xFFFFFFFF;
258     + p++;
259     + y--;
260     + }
261     + }
262     + m->reversed ^= 1;
263     +
264     + /* try to flush dirty cache data to RAM */
265     +#if defined(CONFIG_X86_64) || (defined(CONFIG_X86) && !defined(CONFIG_M386) && !defined(CONFIG_CPU_386))
266     + __asm__ __volatile__ ("wbinvd": : :"memory");
267     +#else
268     + mb();
269     +#endif
270     +}
271     +
272     +/* called only from loop thread process context */
273     +static void keyScrubThreadFn(AESmultiKey *m)
274     +{
275     + write_lock(&m->rwlock);
276     + if(!m->blocked) keyScrubWork(m);
277     + write_unlock(&m->rwlock);
278     +}
279     +
280     +static void keyScrubTimerInit(struct loop_device *lo)
281     +{
282     + AESmultiKey *m;
283     + unsigned long expire;
284     + static void keyScrubTimerFn(unsigned long);
285     +
286     + m = (AESmultiKey *)lo->key_data;
287     + expire = jiffies + HZ;
288     + init_timer(&m->timer);
289     + m->timer.expires = expire;
290     + m->timer.data = (unsigned long)lo;
291     + m->timer.function = keyScrubTimerFn;
292     + add_timer(&m->timer);
293     +}
294     +
295     +/* called only from timer handler context */
296     +static void keyScrubTimerFn(unsigned long d)
297     +{
298     + struct loop_device *lo = (struct loop_device *)d;
299     + extern void loop_add_keyscrub_fn(struct loop_device *, void (*)(void *), void *);
300     +
301     + /* rw lock needs process context, so make loop thread do scrubbing */
302     + loop_add_keyscrub_fn(lo, (void (*)(void*))keyScrubThreadFn, lo->key_data);
303     + /* start timer again */
304     + keyScrubTimerInit(lo);
305     +}
306     +#endif
307     +
308 max 21 +static AESmultiKey *allocMultiKey(void)
309     +{
310     + AESmultiKey *m;
311     + aes_context *a;
312 max 376 + int x = 0, n;
313 max 21 +
314     + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL);
315     + if(!m) return 0;
316     + memset(m, 0, sizeof(AESmultiKey));
317 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
318     + rwlock_init(&m->rwlock);
319     + init_timer(&m->timer);
320     + again:
321     +#endif
322 max 21 +
323     + n = PAGE_SIZE / sizeof(aes_context);
324     + if(!n) n = 1;
325     +
326     + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
327     + if(!a) {
328 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
329     + if(x) kfree(m->keyPtr[0]);
330     +#endif
331 max 21 + kfree(m);
332 max 254 + return 0;
333 max 21 + }
334     +
335 max 376 + while((x < KEY_ALLOC_COUNT) && n) {
336 max 21 + m->keyPtr[x] = a;
337     + a++;
338     + x++;
339     + n--;
340     + }
341 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
342     + if(x < 2) goto again;
343     +#endif
344 max 21 + return m;
345     +}
346     +
347     +static void clearAndFreeMultiKey(AESmultiKey *m)
348     +{
349     + aes_context *a;
350     + int x, n;
351     +
352 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
353     + /* stop scrub timer. loop thread was killed earlier */
354     + del_timer_sync(&m->timer);
355     + /* make sure allocated keys are in original order */
356     + if(m->reversed) keyScrubWork(m);
357     +#endif
358 max 21 + n = PAGE_SIZE / sizeof(aes_context);
359     + if(!n) n = 1;
360     +
361     + x = 0;
362 max 376 + while(x < KEY_ALLOC_COUNT) {
363 max 21 + a = m->keyPtr[x];
364     + if(!a) break;
365     + memset(a, 0, sizeof(aes_context) * n);
366     + kfree(a);
367     + x += n;
368     + }
369     +
370     + kfree(m);
371     +}
372     +
373     +static int multiKeySetup(struct loop_device *lo, unsigned char *k)
374     +{
375     + AESmultiKey *m;
376     + aes_context *a;
377 max 376 + int x, y, n, err = 0;
378 max 278 + union {
379     + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
380     + unsigned char b[32];
381     + } un;
382 max 21 +
383     + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN))
384     + return -EPERM;
385     +
386     + m = (AESmultiKey *)lo->key_data;
387     + if(!m) return -ENXIO;
388     +
389 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
390     + /* temporarily prevent loop thread from messing with keys */
391     + write_lock(&m->rwlock);
392     + m->blocked = 1;
393     + /* make sure allocated keys are in original order */
394     + if(m->reversed) keyScrubWork(m);
395     + write_unlock(&m->rwlock);
396     +#endif
397 max 21 + n = PAGE_SIZE / sizeof(aes_context);
398     + if(!n) n = 1;
399     +
400     + x = 0;
401 max 376 + while(x < KEY_ALLOC_COUNT) {
402 max 21 + if(!m->keyPtr[x]) {
403     + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
404 max 376 + if(!a) {
405     + err = -ENOMEM;
406     + goto error_out;
407     + }
408 max 21 + y = x;
409 max 376 + while((y < (x + n)) && (y < KEY_ALLOC_COUNT)) {
410 max 21 + m->keyPtr[y] = a;
411     + a++;
412     + y++;
413     + }
414     + }
415 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
416     + if(x >= 64) {
417     + x++;
418     + continue;
419     + }
420     +#endif
421     + if(copy_from_user(&un.b[0], k, 32)) {
422     + err = -EFAULT;
423     + goto error_out;
424     + }
425 max 278 + aes_set_key(m->keyPtr[x], &un.b[0], lo->lo_encrypt_key_size, 0);
426 max 21 + k += 32;
427     + x++;
428     + }
429     + m->keyMask = 0x3F; /* range 0...63 */
430     + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */
431 max 278 + memset(&un.b[0], 0, 32);
432 max 376 +error_out:
433     +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
434     + /* re-enable loop thread key scrubbing */
435     + write_lock(&m->rwlock);
436     + m->blocked = 0;
437     + write_unlock(&m->rwlock);
438     +#endif
439     + return err;
440 max 21 +}
441     +
442     +void loop_compute_sector_iv(int devSect, u_int32_t *ivout)
443     +{
444     + ivout[0] = cpu_to_le32(devSect);
445     + ivout[3] = ivout[2] = ivout[1] = 0;
446     +}
447     +
448     +void loop_compute_md5_iv(int devSect, u_int32_t *ivout, u_int32_t *data)
449     +{
450 max 172 + int x;
451     +#if defined(__BIG_ENDIAN)
452     + int y, e;
453     +#endif
454 max 21 + u_int32_t buf[16];
455     +
456     + ivout[0] = 0x67452301;
457     + ivout[1] = 0xefcdab89;
458     + ivout[2] = 0x98badcfe;
459     + ivout[3] = 0x10325476;
460     +
461 max 172 +#if defined(__BIG_ENDIAN)
462 max 21 + y = 7;
463     + e = 16;
464     + do {
465 max 172 + if (!y) {
466 max 21 + e = 12;
467     + /* md5_transform_CPUbyteorder wants data in CPU byte order */
468     + /* devSect is already in CPU byte order -- no need to convert */
469     + /* 32 bits of sector number + 24 zero bits */
470     + buf[12] = devSect;
471     + buf[13] = 0x80000000;
472     + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
473     + buf[14] = 4024;
474     + buf[15] = 0;
475     + }
476 max 172 + x = 0;
477     + do {
478     + buf[x ] = cpu_to_le32(data[0]);
479     + buf[x + 1] = cpu_to_le32(data[1]);
480     + buf[x + 2] = cpu_to_le32(data[2]);
481     + buf[x + 3] = cpu_to_le32(data[3]);
482     + x += 4;
483     + data += 4;
484     + } while (x < e);
485 max 21 + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
486     + } while (--y >= 0);
487     + ivout[0] = cpu_to_le32(ivout[0]);
488     + ivout[1] = cpu_to_le32(ivout[1]);
489     + ivout[2] = cpu_to_le32(ivout[2]);
490     + ivout[3] = cpu_to_le32(ivout[3]);
491 max 172 +#else
492     + x = 6;
493     + do {
494     + md5_transform_CPUbyteorder(&ivout[0], data);
495     + data += 16;
496     + } while (--x >= 0);
497     + memcpy(buf, data, 48);
498     + /* md5_transform_CPUbyteorder wants data in CPU byte order */
499     + /* devSect is already in CPU byte order -- no need to convert */
500     + /* 32 bits of sector number + 24 zero bits */
501     + buf[12] = devSect;
502     + buf[13] = 0x80000000;
503     + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
504     + buf[14] = 4024;
505     + buf[15] = 0;
506     + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
507 max 21 +#endif
508     +}
509     +
510     +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
511     + char *loop_buf, int size, int devSect)
512     +{
513     + aes_context *a;
514     + AESmultiKey *m;
515     + int x;
516     + unsigned y;
517     + u_int32_t iv[8];
518     +
519     + if(!size || (size & 511)) {
520     + return -EINVAL;
521     + }
522     + m = (AESmultiKey *)lo->key_data;
523     + y = m->keyMask;
524     + if(cmd == READ) {
525     + while(size) {
526 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
527     + read_lock(&m->rwlock);
528     +#endif
529 max 21 + a = m->keyPtr[((unsigned)devSect) & y];
530     + if(y) {
531     + memcpy(&iv[0], raw_buf, 16);
532     + raw_buf += 16;
533     + loop_buf += 16;
534     + } else {
535     + loop_compute_sector_iv(devSect, &iv[0]);
536     + }
537     + x = 15;
538     + do {
539     + memcpy(&iv[4], raw_buf, 16);
540     + aes_decrypt(a, raw_buf, loop_buf);
541     + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0];
542     + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1];
543     + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2];
544     + *((u_int32_t *)(&loop_buf[12])) ^= iv[3];
545     + if(y && !x) {
546     + raw_buf -= 496;
547     + loop_buf -= 496;
548     + loop_compute_md5_iv(devSect, &iv[4], (u_int32_t *)(&loop_buf[16]));
549     + } else {
550     + raw_buf += 16;
551     + loop_buf += 16;
552     + memcpy(&iv[0], raw_buf, 16);
553     + }
554     + aes_decrypt(a, raw_buf, loop_buf);
555     + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4];
556     + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5];
557     + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6];
558     + *((u_int32_t *)(&loop_buf[12])) ^= iv[7];
559     + if(y && !x) {
560     + raw_buf += 512;
561     + loop_buf += 512;
562     + } else {
563     + raw_buf += 16;
564     + loop_buf += 16;
565     + }
566     + } while(--x >= 0);
567 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
568     + read_unlock(&m->rwlock);
569     +#endif
570 max 21 + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
571     + size -= 512;
572     + devSect++;
573     + }
574     + } else {
575     + while(size) {
576 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
577     + read_lock(&m->rwlock);
578     +#endif
579 max 21 + a = m->keyPtr[((unsigned)devSect) & y];
580     + if(y) {
581 max 376 + /* on 2.4 and later kernels, real raw_buf is not doing */
582     + /* any writes now so it can be used as temp buffer */
583 max 74 + memcpy(raw_buf, loop_buf, 512);
584     + loop_compute_md5_iv(devSect, &iv[0], (u_int32_t *)(&raw_buf[16]));
585     + x = 15;
586     + do {
587     + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
588     + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
589     + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
590     + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
591     + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
592     + memcpy(&iv[0], raw_buf, 16);
593     + raw_buf += 16;
594     + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
595     + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
596     + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
597     + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
598     + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
599     + memcpy(&iv[0], raw_buf, 16);
600     + raw_buf += 16;
601     + } while(--x >= 0);
602     + loop_buf += 512;
603 max 21 + } else {
604     + loop_compute_sector_iv(devSect, &iv[0]);
605 max 74 + x = 15;
606     + do {
607     + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
608     + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
609     + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
610     + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
611     + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
612     + memcpy(&iv[0], raw_buf, 16);
613     + loop_buf += 16;
614     + raw_buf += 16;
615     + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
616     + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
617     + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
618     + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
619     + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
620     + memcpy(&iv[0], raw_buf, 16);
621     + loop_buf += 16;
622     + raw_buf += 16;
623     + } while(--x >= 0);
624 max 21 + }
625 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
626     + read_unlock(&m->rwlock);
627     +#endif
628 max 21 + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
629     + size -= 512;
630     + devSect++;
631     + }
632     + }
633     + return(0);
634     +}
635     +
636     +static int keySetup_aes(struct loop_device *lo, struct loop_info *info)
637     +{
638     + AESmultiKey *m;
639 max 278 + union {
640     + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
641     + unsigned char b[32];
642     + } un;
643 max 21 +
644     + lo->key_data = m = allocMultiKey();
645     + if(!m) return(-ENOMEM);
646 max 278 + memcpy(&un.b[0], &info->lo_encrypt_key[0], 32);
647     + aes_set_key(m->keyPtr[0], &un.b[0], info->lo_encrypt_key_size, 0);
648 max 21 + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
649 max 278 + memset(&un.b[0], 0, 32);
650 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
651     + keyScrubTimerInit(lo);
652     +#endif
653 max 21 + return(0);
654     +}
655     +
656     +static int keyClean_aes(struct loop_device *lo)
657     +{
658     + if(lo->key_data) {
659     + clearAndFreeMultiKey((AESmultiKey *)lo->key_data);
660     + lo->key_data = 0;
661     + }
662     + return(0);
663     +}
664     +
665     +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg)
666     +{
667     + int err;
668     +
669     + switch (cmd) {
670     + case LOOP_MULTI_KEY_SETUP:
671     + err = multiKeySetup(lo, (unsigned char *)arg);
672     + break;
673     + default:
674     + err = -EINVAL;
675     + }
676     + return err;
677     +}
678     +
679 max 254 +static struct loop_func_table funcs_aes = {
680 max 21 + number: 16, /* 16 == AES */
681     + transfer: transfer_aes,
682     + init: keySetup_aes,
683     + release: keyClean_aes,
684     + ioctl: handleIoctl_aes
685     +};
686     +
687     +EXPORT_SYMBOL(loop_compute_sector_iv);
688     +EXPORT_SYMBOL(loop_compute_md5_iv);
689     +#endif /* CONFIG_BLK_DEV_LOOP_AES */
690     +
691 max 254 +/* xfer_funcs[0] is special - its release function is never called */
692 max 278 +struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
693     + &none_funcs,
694 max 21 + &xor_funcs,
695     +#if CONFIG_BLK_DEV_LOOP_AES
696     + [LO_CRYPT_AES] = &funcs_aes,
697     +#endif
698 max 278 +};
699     +
700 max 21 +/*
701     + * First number of 'lo_prealloc' is the default number of RAM pages
702     + * to pre-allocate for each device backed loop. Every (configured)
703     + * device backed loop pre-allocates this amount of RAM pages unless
704     + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
705     + * overrides are defined in pairs: loop_index,number_of_pages
706     + */
707     +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 };
708     +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
709     +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
710     +
711     +#ifdef MODULE
712     +MODULE_PARM(lo_prealloc, "1-9i");
713     +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
714     +#else
715     +static int __init lo_prealloc_setup(char *str)
716     +{
717     + int x, y, z;
718     +
719     + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
720     + z = get_option(&str, &y);
721     + if (z > 0)
722     + lo_prealloc[x] = y;
723     + if (z < 2)
724     + break;
725     + }
726     + return 1;
727 max 278 +}
728 max 21 +__setup("lo_prealloc=", lo_prealloc_setup);
729     +#endif
730 max 278 +
731 max 21 +/*
732     + * This is loop helper thread nice value in range
733     + * from 0 (low priority) to -20 (high priority).
734     + */
735     +#if defined(DEF_NICE) && defined(DEF_COUNTER)
736     +static int lo_nice = -20; /* old scheduler default */
737     +#else
738     +static int lo_nice = -1; /* O(1) scheduler default */
739     +#endif
740     +
741     +#ifdef MODULE
742     +MODULE_PARM(lo_nice, "1i");
743     +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
744     +#else
745     +static int __init lo_nice_setup(char *str)
746     +{
747     + int y;
748     +
749     + if (get_option(&str, &y) == 1)
750     + lo_nice = y;
751     + return 1;
752     +}
753     +__setup("lo_nice=", lo_nice_setup);
754     +#endif
755     +
756     +typedef struct {
757     + struct buffer_head **q0;
758     + struct buffer_head **q1;
759     + struct buffer_head **q2;
760     + int x0;
761     + int x1;
762     + int x2;
763     +} que_look_up_table;
764 max 278 +
765 max 21 +static void loop_prealloc_cleanup(struct loop_device *lo)
766 max 278 +{
767 max 21 + struct buffer_head *bh;
768 max 278 +
769 max 21 + while ((bh = lo->lo_bh_free)) {
770     + __free_page(bh->b_page);
771     + lo->lo_bh_free = bh->b_reqnext;
772     + bh->b_reqnext = NULL;
773     + kmem_cache_free(bh_cachep, bh);
774 max 278 + }
775     +}
776     +
777 max 21 +static int loop_prealloc_init(struct loop_device *lo, int y)
778 max 278 +{
779 max 21 + struct buffer_head *bh;
780     + int x;
781 max 278 +
782 max 21 + if(!y) {
783     + y = lo_prealloc[0];
784     + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
785     + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
786     + y = lo_prealloc[x + 1];
787     + break;
788     + }
789     + }
790     + }
791     + lo->lo_bh_flsh = (y * 3) / 4;
792 max 278 +
793 max 21 + for (x = 0; x < y; x++) {
794     + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
795     + if (!bh) {
796     + loop_prealloc_cleanup(lo);
797     + return 1;
798     + }
799     + bh->b_page = alloc_page(GFP_KERNEL);
800     + if (!bh->b_page) {
801     + bh->b_reqnext = NULL;
802     + kmem_cache_free(bh_cachep, bh);
803     + loop_prealloc_cleanup(lo);
804     + return 1;
805     + }
806     + bh->b_reqnext = lo->lo_bh_free;
807     + lo->lo_bh_free = bh;
808 max 278 + }
809 max 21 + return 0;
810 max 278 +}
811     +
812 max 21 +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
813 max 278 +{
814 max 21 + unsigned long flags;
815 max 278 +
816 max 21 + spin_lock_irqsave(&lo->lo_lock, flags);
817     + if (*q) {
818     + bh->b_reqnext = (*q)->b_reqnext;
819     + (*q)->b_reqnext = bh;
820     + } else {
821     + bh->b_reqnext = bh;
822     + }
823     + *q = bh;
824     + spin_unlock_irqrestore(&lo->lo_lock, flags);
825 max 278 +
826 max 21 + if (waitqueue_active(&lo->lo_bh_wait))
827     + wake_up_interruptible(&lo->lo_bh_wait);
828 max 278 +}
829     +
830 max 21 +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
831 max 278 +{
832 max 21 + spin_lock_irq(&lo->lo_lock);
833     + if (*q) {
834     + bh->b_reqnext = (*q)->b_reqnext;
835     + (*q)->b_reqnext = bh;
836     + } else {
837     + bh->b_reqnext = bh;
838     + *q = bh;
839     + }
840     + spin_unlock_irq(&lo->lo_lock);
841 max 278 +}
842     +
843 max 21 +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr,
844     + que_look_up_table *qt)
845 max 278 +{
846 max 21 + struct buffer_head *bh = NULL, *last;
847 max 278 +
848 max 21 + spin_lock_irq(&lo->lo_lock);
849     + if ((last = *qt->q0)) {
850     + bh = last->b_reqnext;
851     + if (bh == last)
852     + *qt->q0 = NULL;
853     + else
854     + last->b_reqnext = bh->b_reqnext;
855     + bh->b_reqnext = NULL;
856     + *list_nr = qt->x0;
857     + } else if ((last = *qt->q1)) {
858     + bh = last->b_reqnext;
859     + if (bh == last)
860     + *qt->q1 = NULL;
861     + else
862     + last->b_reqnext = bh->b_reqnext;
863     + bh->b_reqnext = NULL;
864     + *list_nr = qt->x1;
865     + } else if ((last = *qt->q2)) {
866     + bh = last->b_reqnext;
867     + if (bh == last)
868     + *qt->q2 = NULL;
869     + else
870     + last->b_reqnext = bh->b_reqnext;
871     + bh->b_reqnext = NULL;
872     + *list_nr = qt->x2;
873 max 278 + }
874 max 21 + spin_unlock_irq(&lo->lo_lock);
875     + return bh;
876 max 278 +}
877     +
878 max 21 +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b)
879 max 278 +{
880     + unsigned long flags;
881 max 21 + int wk;
882 max 278 +
883     + spin_lock_irqsave(&lo->lo_lock, flags);
884 max 21 + b->b_reqnext = lo->lo_bh_free;
885     + lo->lo_bh_free = b;
886     + wk = lo->lo_bh_need;
887 max 278 + spin_unlock_irqrestore(&lo->lo_lock, flags);
888     +
889 max 21 + if (wk && waitqueue_active(&lo->lo_bh_wait))
890     + wake_up_interruptible(&lo->lo_bh_wait);
891 max 278 +}
892     +
893 max 21 +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate)
894 max 278 +{
895 max 21 + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
896     + struct buffer_head *rbh = bh->b_private;
897 max 278 +
898 max 21 + rbh->b_reqnext = NULL;
899     + rbh->b_end_io(rbh, uptodate);
900     + loop_put_buffer(lo, bh);
901     + if (atomic_dec_and_test(&lo->lo_pending))
902     + wake_up_interruptible(&lo->lo_bh_wait);
903 max 278 +}
904     +
905 max 21 +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate)
906 max 278 +{
907     + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
908     +
909 max 21 + if (!uptodate)
910     + loop_end_io_transfer_wr(bh, uptodate);
911     + else
912     + loop_add_queue_last(lo, bh, &lo->lo_bh_que0);
913 max 278 +}
914     +
915     +static struct buffer_head *loop_get_buffer(struct loop_device *lo,
916 max 21 + struct buffer_head *rbh, int from_thread, int rw)
917 max 278 +{
918     + struct buffer_head *bh;
919 max 21 + struct page *p;
920     + unsigned long flags;
921 max 278 +
922 max 21 + spin_lock_irqsave(&lo->lo_lock, flags);
923     + bh = lo->lo_bh_free;
924     + if (bh) {
925     + lo->lo_bh_free = bh->b_reqnext;
926     + if (from_thread)
927     + lo->lo_bh_need = 0;
928     + } else {
929     + if (from_thread)
930     + lo->lo_bh_need = 1;
931 max 278 + }
932 max 21 + spin_unlock_irqrestore(&lo->lo_lock, flags);
933     + if (!bh)
934     + return (struct buffer_head *)0;
935 max 278 +
936 max 21 + p = bh->b_page;
937     + memset(bh, 0, sizeof(struct buffer_head));
938     + bh->b_page = p;
939 max 278 +
940 max 21 + bh->b_private = rbh;
941 max 278 + bh->b_size = rbh->b_size;
942     + bh->b_dev = rbh->b_rdev;
943 max 21 + bh->b_rdev = lo->lo_device;
944 max 278 + bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
945 max 21 + bh->b_data = page_address(bh->b_page);
946     + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd;
947     + bh->b_rsector = rbh->b_rsector + lo->lo_offs_sec;
948     + init_waitqueue_head(&bh->b_wait);
949     +
950     + return bh;
951     +}
952     +
953     +static int figure_loop_size(struct loop_device *lo)
954     +{
955     + loff_t size, offs;
956     + unsigned int x;
957     + int err = 0;
958     + kdev_t lodev = lo->lo_device;
959     +
960     + offs = lo->lo_offset;
961     + if (S_ISREG(lo->lo_backing_file->f_dentry->d_inode->i_mode)) {
962     + size = lo->lo_backing_file->f_dentry->d_inode->i_size;
963     + } else {
964     + offs &= ~((loff_t)511);
965     + if (blk_size[MAJOR(lodev)])
966     + size = (loff_t)(blk_size[MAJOR(lodev)][MINOR(lodev)]) << BLOCK_SIZE_BITS;
967     + else
968     + size = 1024*1024*1024; /* unknown size */
969     + }
970     + if ((offs > 0) && (offs < size)) {
971     + size -= offs;
972     + } else {
973     + if (offs)
974     + err = -EINVAL;
975     + lo->lo_offset = 0;
976     + lo->lo_offs_sec = lo->lo_iv_remove = 0;
977     + }
978     + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
979     + size = lo->lo_sizelimit;
980     + } else {
981     + if (lo->lo_sizelimit)
982     + err = -EINVAL;
983     + lo->lo_sizelimit = 0;
984     + }
985     + size >>= BLOCK_SIZE_BITS;
986 max 278 +
987     + /*
988 max 21 + * Unfortunately, if we want to do I/O on the device,
989     + * the number of 1024-byte blocks has to fit into unsigned int
990 max 278 + */
991 max 21 + x = (unsigned int)size;
992     + if ((loff_t)x != size) {
993     + err = -EFBIG;
994     + size = 0;
995     + }
996 max 278 +
997 max 21 + loop_sizes[lo->lo_number] = size;
998     + return err;
999     +}
1000 max 278 +
1001 max 21 +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
1002     +{
1003     + mm_segment_t fs;
1004     + int x, y, z;
1005 max 278 +
1006 max 21 + y = 0;
1007     + do {
1008     + z = size - y;
1009 max 254 + fs = get_fs();
1010 max 21 + set_fs(get_ds());
1011     + if (w) {
1012     + x = file->f_op->write(file, buf + y, z, ppos);
1013     + set_fs(fs);
1014     + } else {
1015     + x = file->f_op->read(file, buf + y, z, ppos);
1016     + set_fs(fs);
1017     + if (!x)
1018     + return 1;
1019     + }
1020     + if (x < 0) {
1021     + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
1022     + run_task_queue(&tq_disk);
1023     + set_current_state(TASK_INTERRUPTIBLE);
1024     + schedule_timeout(HZ / 2);
1025     + continue;
1026     + }
1027     + return 1;
1028     + }
1029     + y += x;
1030     + } while (y < size);
1031     + return 0;
1032     +}
1033 max 278 +
1034 max 21 +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
1035     +{
1036     + loff_t pos;
1037     + struct file *file = lo->lo_backing_file;
1038     + char *data, *buf;
1039     + unsigned int size, len;
1040     + unsigned long IV;
1041     +
1042     + pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
1043     + buf = page_address(lo->lo_bh_free->b_page);
1044     + len = bh->b_size;
1045     + data = bh_kmap(bh);
1046     + IV = bh->b_rsector;
1047     + if (!lo->lo_iv_remove)
1048     + IV += lo->lo_offs_sec;
1049     + while (len > 0) {
1050     + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
1051     + /* this code relies that NONE transfer is a no-op */
1052     + buf = data;
1053     + }
1054     + size = PAGE_SIZE;
1055     + if (size > len)
1056     + size = len;
1057     + if (rw == WRITE) {
1058     + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
1059     + printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV);
1060     + goto kunmap_and_out;
1061     + }
1062     + if (loop_file_io(file, buf, size, &pos, 1)) {
1063     + printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV);
1064     + goto kunmap_and_out;
1065     + }
1066     + } else {
1067     + if (loop_file_io(file, buf, size, &pos, 0)) {
1068     + printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV);
1069     + goto kunmap_and_out;
1070     + }
1071     + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
1072     + printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV);
1073     + goto kunmap_and_out;
1074     + }
1075 max 348 + flush_dcache_page(bh->b_page);
1076 max 21 + }
1077     + data += size;
1078     + len -= size;
1079     + IV += size >> 9;
1080     + }
1081     + bh_kunmap(bh);
1082     + return 0;
1083     +
1084     +kunmap_and_out:
1085     + bh_kunmap(bh);
1086     + return 1;
1087 max 278 +}
1088     +
1089     +static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
1090     +{
1091 max 21 + struct buffer_head *bh;
1092 max 278 + struct loop_device *lo;
1093 max 21 + char *md;
1094 max 278 +
1095 max 21 + set_current_state(TASK_RUNNING);
1096 max 278 + if (!buffer_locked(rbh))
1097     + BUG();
1098     +
1099     + if (MINOR(rbh->b_rdev) >= max_loop)
1100     + goto out;
1101     +
1102     + lo = &loop_dev[MINOR(rbh->b_rdev)];
1103     + spin_lock_irq(&lo->lo_lock);
1104     + if (lo->lo_state != Lo_bound)
1105     + goto inactive;
1106     + atomic_inc(&lo->lo_pending);
1107     + spin_unlock_irq(&lo->lo_lock);
1108     +
1109     + if (rw == WRITE) {
1110     + if (lo->lo_flags & LO_FLAGS_READ_ONLY)
1111     + goto err;
1112     + } else if (rw == READA) {
1113     + rw = READ;
1114     + } else if (rw != READ) {
1115 max 21 + printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw);
1116 max 278 + goto err;
1117     + }
1118     +
1119     + /*
1120     + * file backed, queue for loop_thread to handle
1121     + */
1122     + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1123 max 21 + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que0);
1124 max 254 + return 0;
1125     + }
1126     +
1127     + /*
1128     + * device backed, just remap rdev & rsector for NONE transfer
1129     + */
1130     + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
1131     + rbh->b_rsector += lo->lo_offs_sec;
1132     + rbh->b_rdev = lo->lo_device;
1133     + generic_make_request(rw, rbh);
1134     + if (atomic_dec_and_test(&lo->lo_pending))
1135     + wake_up_interruptible(&lo->lo_bh_wait);
1136 max 278 + return 0;
1137     + }
1138     +
1139     + /*
1140 max 254 + * device backed, start reads and writes now if buffer available
1141 max 278 + */
1142 max 21 + bh = loop_get_buffer(lo, rbh, 0, rw);
1143     + if (!bh) {
1144     + /* just queue request and let thread handle alloc later */
1145     + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
1146     + return 0;
1147     + }
1148 max 278 + if (rw == WRITE) {
1149 max 21 + int trv;
1150     + md = bh_kmap(rbh);
1151     + trv = lo_do_transfer(lo, WRITE, bh->b_data, md, bh->b_size, bh->b_rsector - lo->lo_iv_remove);
1152     + bh_kunmap(rbh);
1153     + if (trv) {
1154     + loop_put_buffer(lo, bh);
1155 max 278 + goto err;
1156 max 21 + }
1157 max 278 + }
1158     + generic_make_request(rw, bh);
1159     + return 0;
1160     +
1161     +err:
1162     + if (atomic_dec_and_test(&lo->lo_pending))
1163 max 21 + wake_up_interruptible(&lo->lo_bh_wait);
1164 max 278 +out:
1165     + buffer_IO_error(rbh);
1166     + return 0;
1167     +inactive:
1168     + spin_unlock_irq(&lo->lo_lock);
1169     + goto out;
1170     +}
1171     +
1172     +/*
1173     + * worker thread that handles reads/writes to file backed loop devices,
1174     + * to avoid blocking in our make_request_fn. it also does loop decrypting
1175     + * on reads for block backed loop, as that is too heavy to do from
1176     + * b_end_io context where irqs may be disabled.
1177     + */
1178     +static int loop_thread(void *data)
1179     +{
1180     + struct loop_device *lo = data;
1181 max 21 + struct buffer_head *bh, *xbh;
1182     + int x, rw, qi = 0, flushcnt = 0;
1183     + wait_queue_t waitq;
1184     + que_look_up_table qt[4] = {
1185     + { &lo->lo_bh_que0, &lo->lo_bh_que1, &lo->lo_bh_que2, 0, 1, 2 },
1186     + { &lo->lo_bh_que2, &lo->lo_bh_que0, &lo->lo_bh_que1, 2, 0, 1 },
1187     + { &lo->lo_bh_que0, &lo->lo_bh_que2, &lo->lo_bh_que1, 0, 2, 1 },
1188     + { &lo->lo_bh_que1, &lo->lo_bh_que0, &lo->lo_bh_que2, 1, 0, 2 }
1189     + };
1190     + char *md;
1191     + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
1192 max 278 +
1193 max 21 + init_waitqueue_entry(&waitq, current);
1194     + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
1195 max 278 + daemonize();
1196     + exit_files(current);
1197     + reparent_to_init();
1198     +
1199     + sprintf(current->comm, "loop%d", lo->lo_number);
1200     +
1201     + spin_lock_irq(&current->sigmask_lock);
1202     + sigfillset(&current->blocked);
1203     + flush_signals(current);
1204     + spin_unlock_irq(&current->sigmask_lock);
1205     +
1206 max 21 + if (lo_nice > 0)
1207     + lo_nice = 0;
1208     + if (lo_nice < -20)
1209     + lo_nice = -20;
1210     +#if defined(DEF_NICE) && defined(DEF_COUNTER)
1211     + /* old scheduler syntax */
1212     + current->policy = SCHED_OTHER;
1213     + current->nice = lo_nice;
1214     +#else
1215     + /* O(1) scheduler syntax */
1216     + set_user_nice(current, lo_nice);
1217     +#endif
1218     +
1219 max 278 + spin_lock_irq(&lo->lo_lock);
1220     + lo->lo_state = Lo_bound;
1221     + atomic_inc(&lo->lo_pending);
1222     + spin_unlock_irq(&lo->lo_lock);
1223     +
1224     + current->flags |= PF_NOIO;
1225 max 254 +#if defined(PF_NOFREEZE)
1226     + current->flags |= PF_NOFREEZE;
1227     +#elif defined(PF_IOTHREAD)
1228     + current->flags |= PF_IOTHREAD;
1229     +#endif
1230 max 278 +
1231     + /*
1232     + * up sem, we are running
1233     + */
1234     + up(&lo->lo_sem);
1235     +
1236     + for (;;) {
1237 max 21 + add_wait_queue(&lo->lo_bh_wait, &waitq);
1238     + for (;;) {
1239     + set_current_state(TASK_INTERRUPTIBLE);
1240     + if (!atomic_read(&lo->lo_pending))
1241     + break;
1242     +
1243     + x = 0;
1244     + spin_lock_irq(&lo->lo_lock);
1245 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
1246     + if(lo->lo_keyscrub_fn) x = 1;
1247     +#endif
1248 max 21 + if (lo->lo_bh_que0) {
1249     + x = 1;
1250     + } else if (lo->lo_bh_que1 || lo->lo_bh_que2) {
1251     + /* file backed works too because lo->lo_bh_need == 0 */
1252     + if (lo->lo_bh_free || !lo->lo_bh_need)
1253     + x = 1;
1254     + }
1255     + spin_unlock_irq(&lo->lo_lock);
1256     + if (x)
1257     + break;
1258     +
1259     + schedule();
1260     + }
1261     + set_current_state(TASK_RUNNING);
1262     + remove_wait_queue(&lo->lo_bh_wait, &waitq);
1263     +
1264 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
1265     + if(lo->lo_keyscrub_fn) {
1266     + (*lo->lo_keyscrub_fn)(lo->lo_keyscrub_ptr);
1267     + lo->lo_keyscrub_fn = 0;
1268     + }
1269     +#endif
1270 max 278 + /*
1271 max 21 + * could be woken because of tear-down, not because of
1272 max 278 + * pending work
1273     + */
1274     + if (!atomic_read(&lo->lo_pending))
1275     + break;
1276     +
1277 max 21 + /*
1278     + * read queues using alternating order to prevent starvation
1279     + */
1280     + bh = loop_get_bh(lo, &x, &qt[++qi & 3]);
1281     + if (!bh)
1282     + continue;
1283     +
1284     + /*
1285     + * x list tag usage(buffer-allocated)
1286     + * --- -------------- -----------------------
1287     + * 0 lo->lo_bh_que0 dev-read(y) / file-read
1288     + * 1 lo->lo_bh_que1 dev-write(n) / file-write
1289     + * 2 lo->lo_bh_que2 dev-read(n)
1290     + */
1291     + rw = (x == 1) ? WRITE : READ;
1292     + if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) {
1293     + /* loop_make_request didn't allocate a buffer, do that now */
1294     + xbh = loop_get_buffer(lo, bh, 1, rw);
1295     + if (!xbh) {
1296     + run_task_queue(&tq_disk);
1297     + flushcnt = 0;
1298     + loop_add_queue_first(lo, bh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
1299     + /* lo->lo_bh_need should be 1 now, go back to sleep */
1300     + continue;
1301     + }
1302     + if (rw == WRITE) {
1303     + int trv;
1304     + md = bh_kmap(bh);
1305     + trv = lo_do_transfer(lo, WRITE, xbh->b_data, md, xbh->b_size, xbh->b_rsector - lo->lo_iv_remove);
1306     + bh_kunmap(bh);
1307     + if (trv) {
1308     + loop_put_buffer(lo, xbh);
1309     + buffer_IO_error(bh);
1310     + atomic_dec(&lo->lo_pending);
1311     + continue;
1312     + }
1313     + }
1314     + generic_make_request(rw, xbh);
1315     +
1316     + /* start I/O if there are no more requests lacking buffers */
1317     + x = 0;
1318     + spin_lock_irq(&lo->lo_lock);
1319     + if (!lo->lo_bh_que1 && !lo->lo_bh_que2)
1320     + x = 1;
1321     + spin_unlock_irq(&lo->lo_lock);
1322     + if (x || (++flushcnt >= lo->lo_bh_flsh)) {
1323     + run_task_queue(&tq_disk);
1324     + flushcnt = 0;
1325     + }
1326     +
1327     + /* request not completely processed yet */
1328 max 278 + continue;
1329     + }
1330 max 21 + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1331     + /* request is for file backed device */
1332     + x = do_bh_filebacked(lo, bh, rw);
1333     + bh->b_reqnext = NULL;
1334     + bh->b_end_io(bh, !x);
1335     + } else {
1336     + /* device backed read has completed, do decrypt now */
1337     + xbh = bh->b_private;
1338     + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */
1339     + /* instead, recompute IV from original request */
1340     + md = bh_kmap(xbh);
1341     + x = lo_do_transfer(lo, READ, bh->b_data, md, bh->b_size, xbh->b_rsector + lo->lo_offs_sec - lo->lo_iv_remove);
1342 max 348 + flush_dcache_page(xbh->b_page);
1343 max 21 + bh_kunmap(xbh);
1344     + xbh->b_reqnext = NULL;
1345     + xbh->b_end_io(xbh, !x);
1346     + loop_put_buffer(lo, bh);
1347     + }
1348 max 278 +
1349     + /*
1350 max 21 + * woken both for pending work and tear-down, lo_pending
1351 max 278 + * will hit zero then
1352     + */
1353     + if (atomic_dec_and_test(&lo->lo_pending))
1354     + break;
1355     + }
1356     +
1357     + up(&lo->lo_sem);
1358     + return 0;
1359     +}
1360     +
1361 max 21 +static void loop_set_softblksz(struct loop_device *lo, kdev_t dev)
1362     +{
1363     + int bs = 0, x;
1364     +
1365     + if (blksize_size[MAJOR(lo->lo_device)])
1366     + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
1367     + if (!bs)
1368     + bs = BLOCK_SIZE;
1369     + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1370     + x = loop_sizes[lo->lo_number];
1371     + if ((bs == 8192) && (x & 7))
1372     + bs = 4096;
1373     + if ((bs == 4096) && (x & 3))
1374     + bs = 2048;
1375     + if ((bs == 2048) && (x & 1))
1376     + bs = 1024;
1377     + }
1378     + set_blocksize(dev, bs);
1379     +}
1380     +
1381 max 278 +static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev,
1382     + unsigned int arg)
1383     +{
1384     + struct file *file;
1385     + struct inode *inode;
1386     + kdev_t lo_device;
1387 max 21 + int lo_flags = 0, hardsz = 512;
1388 max 278 + int error;
1389     +
1390     + MOD_INC_USE_COUNT;
1391     +
1392     + error = -EBUSY;
1393     + if (lo->lo_state != Lo_unbound)
1394     + goto out;
1395     +
1396     + error = -EBADF;
1397     + file = fget(arg);
1398     + if (!file)
1399     + goto out;
1400     +
1401     + error = -EINVAL;
1402     + inode = file->f_dentry->d_inode;
1403     +
1404     + if (!(file->f_mode & FMODE_WRITE))
1405     + lo_flags |= LO_FLAGS_READ_ONLY;
1406     +
1407 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
1408     + lo->lo_keyscrub_fn = 0;
1409     +#endif
1410 max 21 + lo->lo_offset = lo->lo_sizelimit = 0;
1411     + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1412     + lo->lo_bh_free = lo->lo_bh_que2 = lo->lo_bh_que1 = lo->lo_bh_que0 = NULL;
1413     + lo->lo_bh_need = lo->lo_bh_flsh = 0;
1414     + init_waitqueue_head(&lo->lo_bh_wait);
1415 max 278 + if (S_ISBLK(inode->i_mode)) {
1416     + lo_device = inode->i_rdev;
1417     + if (lo_device == dev) {
1418     + error = -EBUSY;
1419     + goto out_putf;
1420     + }
1421 max 21 + if (loop_prealloc_init(lo, 0)) {
1422     + error = -ENOMEM;
1423     + goto out_putf;
1424     + }
1425     + hardsz = get_hardsect_size(lo_device);
1426 max 278 + } else if (S_ISREG(inode->i_mode)) {
1427     + /*
1428     + * If we can't read - sorry. If we only can't write - well,
1429     + * it's going to be read-only.
1430     + */
1431 max 21 + if (!file->f_op || !file->f_op->read)
1432 max 278 + goto out_putf;
1433     +
1434 max 21 + if (!file->f_op->write)
1435 max 278 + lo_flags |= LO_FLAGS_READ_ONLY;
1436     +
1437     + lo_device = inode->i_dev;
1438     + lo_flags |= LO_FLAGS_DO_BMAP;
1439 max 21 + if (loop_prealloc_init(lo, 1)) {
1440     + error = -ENOMEM;
1441     + goto out_putf;
1442     + }
1443 max 278 + error = 0;
1444     + } else
1445     + goto out_putf;
1446     +
1447     + get_file(file);
1448     +
1449 max 21 + if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device)
1450 max 278 + || !(lo_file->f_mode & FMODE_WRITE))
1451     + lo_flags |= LO_FLAGS_READ_ONLY;
1452     +
1453     + set_device_ro(dev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
1454     +
1455     + lo->lo_device = lo_device;
1456     + lo->lo_flags = lo_flags;
1457 max 21 + if(lo_flags & LO_FLAGS_READ_ONLY)
1458     + lo->lo_flags |= 0x200000; /* export to user space */
1459 max 278 + lo->lo_backing_file = file;
1460     + lo->transfer = NULL;
1461     + lo->ioctl = NULL;
1462 max 21 + if (figure_loop_size(lo)) {
1463     + error = -EFBIG;
1464     + goto out_cleanup;
1465     + }
1466 max 278 +
1467 max 21 + if (lo_flags & LO_FLAGS_DO_BMAP) {
1468     + lo->old_gfp_mask = inode->i_mapping->gfp_mask;
1469     + inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);
1470     + inode->i_mapping->gfp_mask |= __GFP_HIGH;
1471     + } else {
1472     + lo->old_gfp_mask = -1;
1473     + }
1474 max 278 +
1475 max 21 + loop_hardsizes[MINOR(dev)] = hardsz;
1476     + loop_set_softblksz(lo, dev);
1477 max 278 +
1478 max 21 + error = kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
1479     + if(error < 0)
1480     + goto out_mapping;
1481     + down(&lo->lo_sem);
1482 max 278 + fput(file);
1483     + return 0;
1484     +
1485 max 21 + out_mapping:
1486     + if(lo->old_gfp_mask != -1)
1487     + inode->i_mapping->gfp_mask = lo->old_gfp_mask;
1488     + out_cleanup:
1489     + loop_prealloc_cleanup(lo);
1490     + fput(file);
1491 max 278 + out_putf:
1492     + fput(file);
1493     + out:
1494     + MOD_DEC_USE_COUNT;
1495     + return error;
1496     +}
1497     +
1498     +static int loop_release_xfer(struct loop_device *lo)
1499     +{
1500 max 254 + int err = 0;
1501 max 278 + if (lo->lo_encrypt_type) {
1502 max 254 + struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type];
1503 max 21 + lo->transfer = NULL;
1504 max 278 + if (xfer && xfer->release)
1505 max 254 + err = xfer->release(lo);
1506 max 278 + if (xfer && xfer->unlock)
1507 max 254 + xfer->unlock(lo);
1508 max 278 + lo->lo_encrypt_type = 0;
1509     + }
1510     + return err;
1511     +}
1512     +
1513     +static int loop_init_xfer(struct loop_device *lo, int type,struct loop_info *i)
1514     +{
1515 max 254 + int err = 0;
1516 max 278 + if (type) {
1517 max 254 + struct loop_func_table *xfer = xfer_funcs[type];
1518 max 278 + if (xfer->init)
1519     + err = xfer->init(lo, i);
1520 max 254 + if (!err) {
1521 max 278 + lo->lo_encrypt_type = type;
1522     + if (xfer->lock)
1523     + xfer->lock(lo);
1524     + }
1525     + }
1526     + return err;
1527 max 254 +}
1528 max 278 +
1529     +static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1530     +{
1531     + struct file *filp = lo->lo_backing_file;
1532     + int gfp = lo->old_gfp_mask;
1533     +
1534     + if (lo->lo_state != Lo_bound)
1535     + return -ENXIO;
1536     + if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
1537     + return -EBUSY;
1538     + if (filp==NULL)
1539     + return -EINVAL;
1540     +
1541     + spin_lock_irq(&lo->lo_lock);
1542     + lo->lo_state = Lo_rundown;
1543     + if (atomic_dec_and_test(&lo->lo_pending))
1544 max 21 + wake_up_interruptible(&lo->lo_bh_wait);
1545 max 278 + spin_unlock_irq(&lo->lo_lock);
1546     +
1547     + down(&lo->lo_sem);
1548     +
1549 max 21 + loop_prealloc_cleanup(lo);
1550 max 278 + lo->lo_backing_file = NULL;
1551     +
1552     + loop_release_xfer(lo);
1553     + lo->transfer = NULL;
1554     + lo->ioctl = NULL;
1555     + lo->lo_device = 0;
1556     + lo->lo_encrypt_type = 0;
1557 max 376 +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
1558     + lo->lo_keyscrub_fn = 0;
1559     +#endif
1560 max 21 + lo->lo_offset = lo->lo_sizelimit = 0;
1561     + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1562 max 278 + lo->lo_encrypt_key_size = 0;
1563     + lo->lo_flags = 0;
1564     + memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1565     + memset(lo->lo_name, 0, LO_NAME_SIZE);
1566 max 21 + memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1567 max 278 + loop_sizes[lo->lo_number] = 0;
1568     + invalidate_bdev(bdev, 0);
1569 max 21 + if (gfp != -1)
1570     + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
1571 max 278 + lo->lo_state = Lo_unbound;
1572     + fput(filp);
1573     + MOD_DEC_USE_COUNT;
1574     + return 0;
1575     +}
1576     +
1577 max 21 +static void
1578     +loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1579     +{
1580     + memset(info64, 0, sizeof(*info64));
1581     + info64->lo_number = info->lo_number;
1582     + info64->lo_device = info->lo_device;
1583     + info64->lo_inode = info->lo_inode;
1584     + info64->lo_rdevice = info->lo_rdevice;
1585     + info64->lo_offset = info->lo_offset;
1586     + info64->lo_encrypt_type = info->lo_encrypt_type;
1587     + info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1588     + info64->lo_flags = info->lo_flags;
1589     + info64->lo_init[0] = info->lo_init[0];
1590     + info64->lo_init[1] = info->lo_init[1];
1591     + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
1592     + memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1593     + else
1594     + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1595     + memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1596     +}
1597     +
1598     +static int
1599     +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
1600     +{
1601     + memset(info, 0, sizeof(*info));
1602     + info->lo_number = info64->lo_number;
1603     + info->lo_device = info64->lo_device;
1604     + info->lo_inode = info64->lo_inode;
1605     + info->lo_rdevice = info64->lo_rdevice;
1606     + info->lo_offset = info64->lo_offset;
1607     + info->lo_encrypt_type = info64->lo_encrypt_type;
1608     + info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1609     + info->lo_flags = info64->lo_flags;
1610     + info->lo_init[0] = info64->lo_init[0];
1611     + info->lo_init[1] = info64->lo_init[1];
1612     + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
1613     + memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1614     + else
1615     + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1616     + memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1617     +
1618     + /* error in case values were truncated */
1619     + if (info->lo_device != info64->lo_device ||
1620     + info->lo_rdevice != info64->lo_rdevice ||
1621     + info->lo_inode != info64->lo_inode ||
1622     + info->lo_offset != info64->lo_offset ||
1623     + info64->lo_sizelimit)
1624     + return -EOVERFLOW;
1625     +
1626     + return 0;
1627     +}
1628     +
1629     +static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loop_info64 *info, struct loop_info *oldinfo)
1630 max 278 +{
1631     + int err;
1632     + unsigned int type;
1633     +
1634 max 254 + if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
1635 max 278 + !capable(CAP_SYS_ADMIN))
1636     + return -EPERM;
1637     + if (lo->lo_state != Lo_bound)
1638     + return -ENXIO;
1639 max 21 + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1640 max 278 + return -EINVAL;
1641 max 254 + type = info->lo_encrypt_type;
1642 max 278 + if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL)
1643     + return -EINVAL;
1644 max 21 + if (type == LO_CRYPT_XOR && info->lo_encrypt_key_size == 0)
1645 max 278 + return -EINVAL;
1646     + err = loop_release_xfer(lo);
1647     + if (err)
1648     + return err;
1649     +
1650 max 21 + if ((loff_t)info->lo_offset < 0) {
1651     + /* negative offset == remove offset from IV computations */
1652     + lo->lo_offset = -(info->lo_offset);
1653     + lo->lo_iv_remove = lo->lo_offset >> 9;
1654     + } else {
1655     + /* positive offset == include offset in IV computations */
1656     + lo->lo_offset = info->lo_offset;
1657     + lo->lo_iv_remove = 0;
1658     + }
1659     + lo->lo_offs_sec = lo->lo_offset >> 9;
1660     + lo->lo_sizelimit = info->lo_sizelimit;
1661     + err = figure_loop_size(lo);
1662     + if (err)
1663     + return err;
1664     + loop_set_softblksz(lo, dev);
1665 max 278 +
1666 max 21 + /* transfer init function for 2.4 kernels takes old style struct */
1667     + err = loop_init_xfer(lo, type, oldinfo);
1668     + /* copy key -- just in case transfer init func modified it */
1669     + memcpy(info->lo_encrypt_key, oldinfo->lo_encrypt_key, sizeof(info->lo_encrypt_key));
1670 max 254 + if (err)
1671 max 21 + return err;
1672 max 254 +
1673 max 21 + strncpy(lo->lo_name, info->lo_file_name, LO_NAME_SIZE);
1674     + strncpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1675 max 278 + lo->transfer = xfer_funcs[type]->transfer;
1676     + lo->ioctl = xfer_funcs[type]->ioctl;
1677 max 21 + lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1678     + lo->lo_init[0] = info->lo_init[0];
1679     + lo->lo_init[1] = info->lo_init[1];
1680     + if (info->lo_encrypt_key_size) {
1681 max 254 + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1682 max 21 + info->lo_encrypt_key_size);
1683 max 254 + lo->lo_key_owner = current->uid;
1684 max 21 + }
1685     +
1686 max 278 + return 0;
1687     +}
1688     +
1689 max 21 +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1690 max 278 +{
1691     + struct file *file = lo->lo_backing_file;
1692     +
1693     + if (lo->lo_state != Lo_bound)
1694     + return -ENXIO;
1695 max 21 + memset(info, 0, sizeof(*info));
1696     + info->lo_number = lo->lo_number;
1697     + info->lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
1698     + info->lo_inode = file->f_dentry->d_inode->i_ino;
1699     + info->lo_rdevice = kdev_t_to_nr(lo->lo_device);
1700     + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
1701     + info->lo_sizelimit = lo->lo_sizelimit;
1702     + info->lo_flags = lo->lo_flags;
1703     + strncpy(info->lo_file_name, lo->lo_name, LO_NAME_SIZE);
1704     + strncpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1705     + info->lo_encrypt_type = lo->lo_encrypt_type;
1706 max 278 + if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1707 max 21 + info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1708     + memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1709 max 278 + lo->lo_encrypt_key_size);
1710 max 21 + info->lo_init[0] = lo->lo_init[0];
1711     + info->lo_init[1] = lo->lo_init[1];
1712 max 278 + }
1713 max 21 + return 0;
1714     +}
1715     +
1716     +static int
1717     +loop_set_status_n(struct loop_device *lo, kdev_t dev, void *arg, int n)
1718     +{
1719     + struct loop_info info;
1720     + struct loop_info64 info64;
1721     + int err;
1722     +
1723     + if (n) {
1724     + if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1725     + return -EFAULT;
1726     + /* truncation errors can be ignored here as transfer init func only wants key bits */
1727     + loop_info64_to_old(&info64, &info);
1728     + } else {
1729     + if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1730     + return -EFAULT;
1731     + loop_info64_from_old(&info, &info64);
1732     + }
1733     + err = loop_set_status(lo, dev, &info64, &info);
1734     + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
1735     + memset(&info64.lo_encrypt_key[0], 0, sizeof(info64.lo_encrypt_key));
1736     + return err;
1737     +}
1738     +
1739     +static int
1740     +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) {
1741     + struct loop_info info;
1742     + struct loop_info64 info64;
1743     + int err = 0;
1744     +
1745     + if (!arg)
1746     + err = -EINVAL;
1747     + if (!err)
1748     + err = loop_get_status(lo, &info64);
1749     + if (!err)
1750     + err = loop_info64_to_old(&info64, &info);
1751     + if (!err && copy_to_user(arg, &info, sizeof(info)))
1752     + err = -EFAULT;
1753     +
1754     + return err;
1755     +}
1756     +
1757     +static int
1758     +loop_get_status64(struct loop_device *lo, struct loop_info64 *arg) {
1759     + struct loop_info64 info64;
1760     + int err = 0;
1761     +
1762     + if (!arg)
1763     + err = -EINVAL;
1764     + if (!err)
1765     + err = loop_get_status(lo, &info64);
1766     + if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1767     + err = -EFAULT;
1768     +
1769     + return err;
1770 max 278 +}
1771     +
1772     +static int lo_ioctl(struct inode * inode, struct file * file,
1773     + unsigned int cmd, unsigned long arg)
1774     +{
1775     + struct loop_device *lo;
1776     + int dev, err;
1777     +
1778     + if (!inode)
1779     + return -EINVAL;
1780     + if (MAJOR(inode->i_rdev) != MAJOR_NR) {
1781     + printk(KERN_WARNING "lo_ioctl: pseudo-major != %d\n",
1782     + MAJOR_NR);
1783     + return -ENODEV;
1784     + }
1785     + dev = MINOR(inode->i_rdev);
1786     + if (dev >= max_loop)
1787     + return -ENODEV;
1788     + lo = &loop_dev[dev];
1789     + down(&lo->lo_ctl_mutex);
1790     + switch (cmd) {
1791     + case LOOP_SET_FD:
1792     + err = loop_set_fd(lo, file, inode->i_rdev, arg);
1793     + break;
1794     + case LOOP_CLR_FD:
1795     + err = loop_clr_fd(lo, inode->i_bdev);
1796     + break;
1797     + case LOOP_SET_STATUS:
1798 max 21 + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 0);
1799 max 278 + break;
1800     + case LOOP_GET_STATUS:
1801 max 21 + err = loop_get_status_old(lo, (struct loop_info *) arg);
1802     + break;
1803     + case LOOP_SET_STATUS64:
1804     + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 1);
1805     + break;
1806     + case LOOP_GET_STATUS64:
1807     + err = loop_get_status64(lo, (struct loop_info64 *) arg);
1808 max 278 + break;
1809     + case BLKGETSIZE:
1810     + if (lo->lo_state != Lo_bound) {
1811     + err = -ENXIO;
1812     + break;
1813     + }
1814     + err = put_user((unsigned long)loop_sizes[lo->lo_number] << 1, (unsigned long *) arg);
1815     + break;
1816     + case BLKGETSIZE64:
1817     + if (lo->lo_state != Lo_bound) {
1818     + err = -ENXIO;
1819     + break;
1820     + }
1821     + err = put_user((u64)loop_sizes[lo->lo_number] << 10, (u64*)arg);
1822     + break;
1823     + case BLKBSZGET:
1824     + case BLKBSZSET:
1825     + case BLKSSZGET:
1826 max 21 + case BLKROGET:
1827     + case BLKROSET:
1828 max 278 + err = blk_ioctl(inode->i_rdev, cmd, arg);
1829     + break;
1830     + default:
1831     + err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1832     + }
1833     + up(&lo->lo_ctl_mutex);
1834     + return err;
1835     +}
1836     +
1837     +static int lo_open(struct inode *inode, struct file *file)
1838     +{
1839     + struct loop_device *lo;
1840 max 21 + int dev;
1841 max 278 +
1842     + if (!inode)
1843     + return -EINVAL;
1844     + if (MAJOR(inode->i_rdev) != MAJOR_NR) {
1845     + printk(KERN_WARNING "lo_open: pseudo-major != %d\n", MAJOR_NR);
1846     + return -ENODEV;
1847     + }
1848     + dev = MINOR(inode->i_rdev);
1849     + if (dev >= max_loop)
1850     + return -ENODEV;
1851     +
1852     + lo = &loop_dev[dev];
1853     + MOD_INC_USE_COUNT;
1854     + down(&lo->lo_ctl_mutex);
1855     + lo->lo_refcnt++;
1856     + up(&lo->lo_ctl_mutex);
1857     + return 0;
1858     +}
1859     +
1860     +static int lo_release(struct inode *inode, struct file *file)
1861     +{
1862     + struct loop_device *lo;
1863 max 21 + int dev;
1864 max 278 +
1865     + if (!inode)
1866     + return 0;
1867     + if (MAJOR(inode->i_rdev) != MAJOR_NR) {
1868     + printk(KERN_WARNING "lo_release: pseudo-major != %d\n",
1869     + MAJOR_NR);
1870     + return 0;
1871     + }
1872     + dev = MINOR(inode->i_rdev);
1873     + if (dev >= max_loop)
1874     + return 0;
1875     +
1876     + lo = &loop_dev[dev];
1877     + down(&lo->lo_ctl_mutex);
1878     + --lo->lo_refcnt;
1879     + up(&lo->lo_ctl_mutex);
1880     + MOD_DEC_USE_COUNT;
1881     + return 0;
1882     +}
1883     +
1884     +static struct block_device_operations lo_fops = {
1885     + owner: THIS_MODULE,
1886     + open: lo_open,
1887     + release: lo_release,
1888     + ioctl: lo_ioctl,
1889     +};
1890     +
1891     +/*
1892     + * And now the modules code and kernel interface.
1893     + */
1894     +MODULE_PARM(max_loop, "i");
1895     +MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)");
1896     +MODULE_LICENSE("GPL");
1897     +
1898     +int loop_register_transfer(struct loop_func_table *funcs)
1899     +{
1900 max 21 + if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number])
1901 max 278 + return -EINVAL;
1902     + xfer_funcs[funcs->number] = funcs;
1903 max 254 + return 0;
1904 max 278 +}
1905     +
1906     +int loop_unregister_transfer(int number)
1907     +{
1908 max 254 + struct loop_device *lo;
1909 max 278 +
1910     + if ((unsigned)number >= MAX_LO_CRYPT)
1911 max 254 + return -EINVAL;
1912     + for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
1913 max 278 + int type = lo->lo_encrypt_type;
1914 max 254 + if (type == number) {
1915 max 21 + loop_release_xfer(lo);
1916 max 278 + }
1917     + }
1918 max 254 + xfer_funcs[number] = NULL;
1919     + return 0;
1920 max 278 +}
1921     +
1922     +EXPORT_SYMBOL(loop_register_transfer);
1923     +EXPORT_SYMBOL(loop_unregister_transfer);
1924     +
1925 max 254 +int __init loop_init(void)
1926 max 278 +{
1927     + int i;
1928     +
1929     + if ((max_loop < 1) || (max_loop > 256)) {
1930     + printk(KERN_WARNING "loop: invalid max_loop (must be between"
1931     + " 1 and 256), using default (8)\n");
1932     + max_loop = 8;
1933     + }
1934     +
1935     + if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
1936     + printk(KERN_WARNING "Unable to get major number %d for loop"
1937     + " device\n", MAJOR_NR);
1938     + return -EIO;
1939     + }
1940     +
1941     + loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
1942     + if (!loop_dev)
1943 max 21 + goto out_dev;
1944 max 278 +
1945     + loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
1946     + if (!loop_sizes)
1947     + goto out_sizes;
1948     +
1949     + loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
1950     + if (!loop_blksizes)
1951     + goto out_blksizes;
1952     +
1953 max 21 + loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
1954     + if (!loop_hardsizes)
1955     + goto out_hardsizes;
1956     +
1957 max 278 + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
1958     +
1959     + for (i = 0; i < max_loop; i++) {
1960     + struct loop_device *lo = &loop_dev[i];
1961     + memset(lo, 0, sizeof(struct loop_device));
1962     + init_MUTEX(&lo->lo_ctl_mutex);
1963     + init_MUTEX_LOCKED(&lo->lo_sem);
1964     + lo->lo_number = i;
1965     + spin_lock_init(&lo->lo_lock);
1966     + }
1967     +
1968     + memset(loop_sizes, 0, max_loop * sizeof(int));
1969     + memset(loop_blksizes, 0, max_loop * sizeof(int));
1970 max 21 + memset(loop_hardsizes, 0, max_loop * sizeof(int));
1971 max 278 + blk_size[MAJOR_NR] = loop_sizes;
1972     + blksize_size[MAJOR_NR] = loop_blksizes;
1973 max 21 + hardsect_size[MAJOR_NR] = loop_hardsizes;
1974 max 278 + for (i = 0; i < max_loop; i++)
1975     + register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
1976     +
1977 max 21 + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
1978     + if (!lo_prealloc[i])
1979     + continue;
1980     + if (lo_prealloc[i] < LO_PREALLOC_MIN)
1981     + lo_prealloc[i] = LO_PREALLOC_MIN;
1982     + if (lo_prealloc[i] > LO_PREALLOC_MAX)
1983     + lo_prealloc[i] = LO_PREALLOC_MAX;
1984     + }
1985     +
1986 max 278 +#if defined(IOCTL32_COMPATIBLE_PTR)
1987 max 376 + lock_kernel();
1988 max 278 + register_ioctl32_conversion(LOOP_SET_STATUS64, IOCTL32_COMPATIBLE_PTR);
1989     + register_ioctl32_conversion(LOOP_GET_STATUS64, IOCTL32_COMPATIBLE_PTR);
1990     + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP, IOCTL32_COMPATIBLE_PTR);
1991 max 376 + unlock_kernel();
1992 max 278 +#endif
1993     +
1994     + devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
1995     + devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
1996     + MAJOR_NR, 0,
1997     + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
1998     + &lo_fops, NULL);
1999     +
2000 max 376 +#if CONFIG_BLK_DEV_LOOP_AES
2001     +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
2002     + printk(KERN_INFO "loop: AES key scrubbing enabled\n");
2003     +#endif
2004     +#endif
2005 max 278 + printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
2006     + return 0;
2007     +
2008 max 21 +out_hardsizes:
2009     + kfree(loop_blksizes);
2010 max 278 +out_blksizes:
2011     + kfree(loop_sizes);
2012     +out_sizes:
2013     + kfree(loop_dev);
2014 max 21 +out_dev:
2015 max 278 + if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
2016     + printk(KERN_WARNING "loop: cannot unregister blkdev\n");
2017     + printk(KERN_ERR "loop: ran out of memory\n");
2018     + return -ENOMEM;
2019     +}
2020     +
2021 max 254 +void loop_exit(void)
2022 max 278 +{
2023     + devfs_unregister(devfs_handle);
2024     + if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
2025     + printk(KERN_WARNING "loop: cannot unregister blkdev\n");
2026 max 21 +
2027     + blk_size[MAJOR_NR] = 0;
2028     + blksize_size[MAJOR_NR] = 0;
2029     + hardsect_size[MAJOR_NR] = 0;
2030 max 278 + kfree(loop_dev);
2031     + kfree(loop_sizes);
2032     + kfree(loop_blksizes);
2033 max 21 + kfree(loop_hardsizes);
2034 max 278 +
2035     +#if defined(IOCTL32_COMPATIBLE_PTR)
2036 max 376 + lock_kernel();
2037 max 278 + unregister_ioctl32_conversion(LOOP_SET_STATUS64);
2038     + unregister_ioctl32_conversion(LOOP_GET_STATUS64);
2039     + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP);
2040 max 376 + unlock_kernel();
2041 max 278 +#endif
2042     +}
2043     +
2044     +module_init(loop_init);
2045     +module_exit(loop_exit);
2046     +
2047     +#ifndef MODULE
2048     +static int __init max_loop_setup(char *str)
2049     +{
2050     + max_loop = simple_strtol(str, NULL, 0);
2051     + return 1;
2052     +}
2053     +
2054     +__setup("max_loop=", max_loop_setup);
2055     +#endif
2056 max 376 +
2057     +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
2058     +void loop_add_keyscrub_fn(struct loop_device *lo, void (*fn)(void *), void *ptr)
2059     +{
2060     + lo->lo_keyscrub_ptr = ptr;
2061     + wmb();
2062     + lo->lo_keyscrub_fn = fn;
2063     + wake_up_interruptible(&lo->lo_bh_wait);
2064     +}
2065     +EXPORT_SYMBOL(loop_add_keyscrub_fn);
2066     +#endif
2067     diff -urN linux-2.4.27-noloop/drivers/misc/Makefile linux-2.4.27-AES/drivers/misc/Makefile
2068     --- linux-2.4.27-noloop/drivers/misc/Makefile Sat Dec 30 00:07:22 2000
2069     +++ linux-2.4.27-AES/drivers/misc/Makefile Sun Aug 15 16:39:41 2004
2070 max 348 @@ -9,7 +9,34 @@
2071 max 21 # parent makes..
2072     #
2073    
2074     +.S.o:
2075 max 278 + $(CC) $(AFLAGS) $(AFLAGS_$@) -c $< -o $*.o
2076 max 21 +
2077     O_TARGET := misc.o
2078     +
2079     +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y)
2080 max 348 +AES_X86_ASM=n
2081 max 21 +ifeq ($(CONFIG_X86),y)
2082     +ifneq ($(CONFIG_X86_64),y)
2083 max 348 + AES_X86_ASM=y
2084 max 21 +endif
2085     +endif
2086 max 348 +ifeq ($(AES_X86_ASM),y)
2087 max 254 + export-objs += crypto-ksym.o
2088 max 348 + obj-y += aes-x86.o md5-x86.o crypto-ksym.o
2089     + AFLAGS_aes-x86.o := -DUSE_UNDERLINE=1
2090 max 21 +else
2091 max 348 +ifeq ($(CONFIG_X86_64),y)
2092 max 254 + export-objs += crypto-ksym.o
2093 max 348 + obj-y += aes-amd64.o md5-amd64.o crypto-ksym.o
2094     + AFLAGS_aes-amd64.o := -DUSE_UNDERLINE=1
2095     +else
2096     + export-objs += crypto-ksym.o
2097 max 254 + obj-y += aes.o md5.o crypto-ksym.o
2098 max 278 + CFLAGS_aes.o := -DDATA_ALWAYS_ALIGNED=1
2099 max 21 +endif
2100     +endif
2101 max 348 +endif
2102 max 21
2103     include $(TOPDIR)/Rules.make
2104    
2105 max 376 diff -urN linux-2.4.27-noloop/drivers/misc/aes-amd64.S linux-2.4.27-AES/drivers/misc/aes-amd64.S
2106     --- linux-2.4.27-noloop/drivers/misc/aes-amd64.S Thu Jan 1 02:00:00 1970
2107     +++ linux-2.4.27-AES/drivers/misc/aes-amd64.S Sun Aug 15 16:39:41 2004
2108 max 348 @@ -0,0 +1,893 @@
2109 max 21 +//
2110     +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
2111     +// All rights reserved.
2112     +//
2113     +// TERMS
2114     +//
2115     +// Redistribution and use in source and binary forms, with or without
2116     +// modification, are permitted subject to the following conditions:
2117     +//
2118     +// 1. Redistributions of source code must retain the above copyright
2119     +// notice, this list of conditions and the following disclaimer.
2120     +//
2121     +// 2. Redistributions in binary form must reproduce the above copyright
2122     +// notice, this list of conditions and the following disclaimer in the
2123     +// documentation and/or other materials provided with the distribution.
2124     +//
2125     +// 3. The copyright holder's name must not be used to endorse or promote
2126     +// any products derived from this software without his specific prior
2127     +// written permission.
2128     +//
2129     +// This software is provided 'as is' with no express or implied warranties
2130     +// of correctness or fitness for purpose.
2131     +
2132     +// Modified by Jari Ruusu, December 24 2001
2133     +// - Converted syntax to GNU CPP/assembler syntax
2134     +// - C programming interface converted back to "old" API
2135     +// - Minor portability cleanups and speed optimizations
2136     +
2137     +// Modified by Jari Ruusu, April 11 2002
2138     +// - Added above copyright and terms to resulting object code so that
2139     +// binary distributions can avoid legal trouble
2140     +
2141 max 348 +// Modified by Jari Ruusu, June 12 2004
2142     +// - Converted 32 bit x86 code to 64 bit AMD64 code
2143     +// - Re-wrote encrypt and decrypt code from scratch
2144     +
2145     +// An AES (Rijndael) implementation for the AMD64. This version only
2146 max 21 +// implements the standard AES block length (128 bits, 16 bytes). This code
2147 max 348 +// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the
2148     +// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are
2149     +// preserved across calls.
2150 max 21 +
2151     +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
2152     +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
2153     +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
2154     +
2155     +#if defined(USE_UNDERLINE)
2156     +# define aes_set_key _aes_set_key
2157     +# define aes_encrypt _aes_encrypt
2158     +# define aes_decrypt _aes_decrypt
2159     +#endif
2160 max 348 +#if !defined(ALIGN64BYTES)
2161     +# define ALIGN64BYTES 64
2162     +#endif
2163     +
2164     + .file "aes-amd64.S"
2165     + .globl aes_set_key
2166     + .globl aes_encrypt
2167     + .globl aes_decrypt
2168     +
2169     + .section .rodata
2170     +copyright:
2171     + .ascii " \000"
2172     + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
2173     + .ascii "All rights reserved.\000"
2174     + .ascii " \000"
2175     + .ascii "TERMS\000"
2176     + .ascii " \000"
2177     + .ascii " Redistribution and use in source and binary forms, with or without\000"
2178     + .ascii " modification, are permitted subject to the following conditions:\000"
2179     + .ascii " \000"
2180     + .ascii " 1. Redistributions of source code must retain the above copyright\000"
2181     + .ascii " notice, this list of conditions and the following disclaimer.\000"
2182     + .ascii " \000"
2183     + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
2184     + .ascii " notice, this list of conditions and the following disclaimer in the\000"
2185     + .ascii " documentation and/or other materials provided with the distribution.\000"
2186     + .ascii " \000"
2187     + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
2188     + .ascii " any products derived from this software without his specific prior\000"
2189     + .ascii " written permission.\000"
2190     + .ascii " \000"
2191     + .ascii " This software is provided 'as is' with no express or implied warranties\000"
2192     + .ascii " of correctness or fitness for purpose.\000"
2193     + .ascii " \000"
2194     +
2195     +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
2196     +
2197     +// offsets in context structure
2198     +
2199     +#define nkey 0 // key length, size 4
2200     +#define nrnd 4 // number of rounds, size 4
2201     +#define ekey 8 // encryption key schedule base address, size 256
2202     +#define dkey 264 // decryption key schedule base address, size 256
2203     +
2204     +// This macro performs a forward encryption cycle. It is entered with
2205     +// the first previous round column values in I1E, I2E, I3E and I4E and
2206     +// exits with the final values OU1, OU2, OU3 and OU4 registers.
2207     +
2208     +#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \
2209     + movl p2(%rbp),OU1 ;\
2210     + movl p2+4(%rbp),OU2 ;\
2211     + movl p2+8(%rbp),OU3 ;\
2212     + movl p2+12(%rbp),OU4 ;\
2213     + movzbl I1B,%edi ;\
2214     + movzbl I2B,%esi ;\
2215     + movzbl I3B,%r8d ;\
2216     + movzbl I4B,%r13d ;\
2217     + shrl $8,I3E ;\
2218     + shrl $8,I4E ;\
2219     + xorl p1(,%rdi,4),OU1 ;\
2220     + xorl p1(,%rsi,4),OU2 ;\
2221     + xorl p1(,%r8,4),OU3 ;\
2222     + xorl p1(,%r13,4),OU4 ;\
2223     + movzbl I2H,%esi ;\
2224     + movzbl I3B,%r8d ;\
2225     + movzbl I4B,%r13d ;\
2226     + movzbl I1H,%edi ;\
2227     + shrl $8,I3E ;\
2228     + shrl $8,I4E ;\
2229     + xorl p1+tlen(,%rsi,4),OU1 ;\
2230     + xorl p1+tlen(,%r8,4),OU2 ;\
2231     + xorl p1+tlen(,%r13,4),OU3 ;\
2232     + xorl p1+tlen(,%rdi,4),OU4 ;\
2233     + shrl $16,I1E ;\
2234     + shrl $16,I2E ;\
2235     + movzbl I3B,%r8d ;\
2236     + movzbl I4B,%r13d ;\
2237     + movzbl I1B,%edi ;\
2238     + movzbl I2B,%esi ;\
2239     + xorl p1+2*tlen(,%r8,4),OU1 ;\
2240     + xorl p1+2*tlen(,%r13,4),OU2 ;\
2241     + xorl p1+2*tlen(,%rdi,4),OU3 ;\
2242     + xorl p1+2*tlen(,%rsi,4),OU4 ;\
2243     + shrl $8,I4E ;\
2244     + movzbl I1H,%edi ;\
2245     + movzbl I2H,%esi ;\
2246     + shrl $8,I3E ;\
2247     + xorl p1+3*tlen(,I4R,4),OU1 ;\
2248     + xorl p1+3*tlen(,%rdi,4),OU2 ;\
2249     + xorl p1+3*tlen(,%rsi,4),OU3 ;\
2250     + xorl p1+3*tlen(,I3R,4),OU4
2251     +
2252     +// This macro performs an inverse encryption cycle. It is entered with
2253     +// the first previous round column values in I1E, I2E, I3E and I4E and
2254     +// exits with the final values OU1, OU2, OU3 and OU4 registers.
2255     +
2256     +#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \
2257     + movl p2+12(%rbp),OU4 ;\
2258     + movl p2+8(%rbp),OU3 ;\
2259     + movl p2+4(%rbp),OU2 ;\
2260     + movl p2(%rbp),OU1 ;\
2261     + movzbl I4B,%edi ;\
2262     + movzbl I3B,%esi ;\
2263     + movzbl I2B,%r8d ;\
2264     + movzbl I1B,%r13d ;\
2265     + shrl $8,I2E ;\
2266     + shrl $8,I1E ;\
2267     + xorl p1(,%rdi,4),OU4 ;\
2268     + xorl p1(,%rsi,4),OU3 ;\
2269     + xorl p1(,%r8,4),OU2 ;\
2270     + xorl p1(,%r13,4),OU1 ;\
2271     + movzbl I3H,%esi ;\
2272     + movzbl I2B,%r8d ;\
2273     + movzbl I1B,%r13d ;\
2274     + movzbl I4H,%edi ;\
2275     + shrl $8,I2E ;\
2276     + shrl $8,I1E ;\
2277     + xorl p1+tlen(,%rsi,4),OU4 ;\
2278     + xorl p1+tlen(,%r8,4),OU3 ;\
2279     + xorl p1+tlen(,%r13,4),OU2 ;\
2280     + xorl p1+tlen(,%rdi,4),OU1 ;\
2281     + shrl $16,I4E ;\
2282     + shrl $16,I3E ;\
2283     + movzbl I2B,%r8d ;\
2284     + movzbl I1B,%r13d ;\
2285     + movzbl I4B,%edi ;\
2286     + movzbl I3B,%esi ;\
2287     + xorl p1+2*tlen(,%r8,4),OU4 ;\
2288     + xorl p1+2*tlen(,%r13,4),OU3 ;\
2289     + xorl p1+2*tlen(,%rdi,4),OU2 ;\
2290     + xorl p1+2*tlen(,%rsi,4),OU1 ;\
2291     + shrl $8,I1E ;\
2292     + movzbl I4H,%edi ;\
2293     + movzbl I3H,%esi ;\
2294     + shrl $8,I2E ;\
2295     + xorl p1+3*tlen(,I1R,4),OU4 ;\
2296     + xorl p1+3*tlen(,%rdi,4),OU3 ;\
2297     + xorl p1+3*tlen(,%rsi,4),OU2 ;\
2298     + xorl p1+3*tlen(,I2R,4),OU1
2299     +
2300     +// AES (Rijndael) Encryption Subroutine
2301     +
2302     +// rdi = pointer to AES context
2303     +// rsi = pointer to input plaintext bytes
2304     +// rdx = pointer to output ciphertext bytes
2305     +
2306     + .text
2307     + .align ALIGN64BYTES
2308     +aes_encrypt:
2309     + movl (%rsi),%eax // read in plaintext
2310     + movl 4(%rsi),%ecx
2311     + movl 8(%rsi),%r10d
2312     + movl 12(%rsi),%r11d
2313     +
2314     + pushq %rbp
2315     + leaq ekey+16(%rdi),%rbp // encryption key pointer
2316     + movq %rdx,%r9 // pointer to out block
2317     + movl nrnd(%rdi),%edx // number of rounds
2318     + pushq %rbx
2319     + pushq %r13
2320     + pushq %r14
2321     + pushq %r15
2322     +
2323     + xorl -16(%rbp),%eax // xor in first round key
2324     + xorl -12(%rbp),%ecx
2325     + xorl -8(%rbp),%r10d
2326     + xorl -4(%rbp),%r11d
2327     +
2328     + subl $10,%edx
2329     + je aes_15
2330     + addq $32,%rbp
2331     + subl $2,%edx
2332     + je aes_13
2333     + addq $32,%rbp
2334     +
2335     + fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2336     + fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2337     + jmp aes_13
2338     + .align ALIGN64BYTES
2339     +aes_13: fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2340     + fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2341     + jmp aes_15
2342     + .align ALIGN64BYTES
2343     +aes_15: fwd_rnd(aes_ft_tab,0, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2344     + fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2345     + fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2346     + fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2347     + fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2348     + fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2349     + fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2350     + fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2351     + fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2352     + fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2353     +
2354     + popq %r15
2355     + popq %r14
2356     + popq %r13
2357     + popq %rbx
2358     + popq %rbp
2359     +
2360     + movl %eax,(%r9) // move final values to the output array.
2361     + movl %ecx,4(%r9)
2362     + movl %r10d,8(%r9)
2363     + movl %r11d,12(%r9)
2364     + ret
2365     +
2366     +// AES (Rijndael) Decryption Subroutine
2367     +
2368     +// rdi = pointer to AES context
2369     +// rsi = pointer to input ciphertext bytes
2370     +// rdx = pointer to output plaintext bytes
2371     +
2372     + .align ALIGN64BYTES
2373     +aes_decrypt:
2374     + movl 12(%rsi),%eax // read in ciphertext
2375     + movl 8(%rsi),%ecx
2376     + movl 4(%rsi),%r10d
2377     + movl (%rsi),%r11d
2378     +
2379     + pushq %rbp
2380     + leaq dkey+16(%rdi),%rbp // decryption key pointer
2381     + movq %rdx,%r9 // pointer to out block
2382     + movl nrnd(%rdi),%edx // number of rounds
2383     + pushq %rbx
2384     + pushq %r13
2385     + pushq %r14
2386     + pushq %r15
2387     +
2388     + xorl -4(%rbp),%eax // xor in first round key
2389     + xorl -8(%rbp),%ecx
2390     + xorl -12(%rbp),%r10d
2391     + xorl -16(%rbp),%r11d
2392     +
2393     + subl $10,%edx
2394     + je aes_25
2395     + addq $32,%rbp
2396     + subl $2,%edx
2397     + je aes_23
2398     + addq $32,%rbp
2399     +
2400     + inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2401     + inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2402     + jmp aes_23
2403     + .align ALIGN64BYTES
2404     +aes_23: inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2405     + inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2406     + jmp aes_25
2407     + .align ALIGN64BYTES
2408     +aes_25: inv_rnd(aes_it_tab,0, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2409     + inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2410     + inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2411     + inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2412     + inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2413     + inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2414     + inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2415     + inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2416     + inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2417     + inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2418     +
2419     + popq %r15
2420     + popq %r14
2421     + popq %r13
2422     + popq %rbx
2423     + popq %rbp
2424     +
2425     + movl %eax,12(%r9) // move final values to the output array.
2426     + movl %ecx,8(%r9)
2427     + movl %r10d,4(%r9)
2428     + movl %r11d,(%r9)
2429     + ret
2430     +
2431     +// AES (Rijndael) Key Schedule Subroutine
2432     +
2433     +// This macro performs a column mixing operation on an input 32-bit
2434     +// word to give a 32-bit result. It uses each of the 4 bytes in the
2435     +// the input column to index 4 different tables of 256 32-bit words
2436     +// that are xored together to form the output value.
2437     +
2438     +#define mix_col(p1) \
2439     + movzbl %bl,%ecx ;\
2440     + movl p1(,%rcx,4),%eax ;\
2441     + movzbl %bh,%ecx ;\
2442     + ror $16,%ebx ;\
2443     + xorl p1+tlen(,%rcx,4),%eax ;\
2444     + movzbl %bl,%ecx ;\
2445     + xorl p1+2*tlen(,%rcx,4),%eax ;\
2446     + movzbl %bh,%ecx ;\
2447     + xorl p1+3*tlen(,%rcx,4),%eax
2448     +
2449     +// Key Schedule Macros
2450     +
2451     +#define ksc4(p1) \
2452     + rol $24,%ebx ;\
2453     + mix_col(aes_fl_tab) ;\
2454     + ror $8,%ebx ;\
2455     + xorl 4*p1+aes_rcon_tab,%eax ;\
2456     + xorl %eax,%esi ;\
2457     + xorl %esi,%ebp ;\
2458     + movl %esi,16*p1(%rdi) ;\
2459     + movl %ebp,16*p1+4(%rdi) ;\
2460     + xorl %ebp,%edx ;\
2461     + xorl %edx,%ebx ;\
2462     + movl %edx,16*p1+8(%rdi) ;\
2463     + movl %ebx,16*p1+12(%rdi)
2464     +
2465     +#define ksc6(p1) \
2466     + rol $24,%ebx ;\
2467     + mix_col(aes_fl_tab) ;\
2468     + ror $8,%ebx ;\
2469     + xorl 4*p1+aes_rcon_tab,%eax ;\
2470     + xorl 24*p1-24(%rdi),%eax ;\
2471     + movl %eax,24*p1(%rdi) ;\
2472     + xorl 24*p1-20(%rdi),%eax ;\
2473     + movl %eax,24*p1+4(%rdi) ;\
2474     + xorl %eax,%esi ;\
2475     + xorl %esi,%ebp ;\
2476     + movl %esi,24*p1+8(%rdi) ;\
2477     + movl %ebp,24*p1+12(%rdi) ;\
2478     + xorl %ebp,%edx ;\
2479     + xorl %edx,%ebx ;\
2480     + movl %edx,24*p1+16(%rdi) ;\
2481     + movl %ebx,24*p1+20(%rdi)
2482     +
2483     +#define ksc8(p1) \
2484     + rol $24,%ebx ;\
2485     + mix_col(aes_fl_tab) ;\
2486     + ror $8,%ebx ;\
2487     + xorl 4*p1+aes_rcon_tab,%eax ;\
2488     + xorl 32*p1-32(%rdi),%eax ;\
2489     + movl %eax,32*p1(%rdi) ;\
2490     + xorl 32*p1-28(%rdi),%eax ;\
2491     + movl %eax,32*p1+4(%rdi) ;\
2492     + xorl 32*p1-24(%rdi),%eax ;\
2493     + movl %eax,32*p1+8(%rdi) ;\
2494     + xorl 32*p1-20(%rdi),%eax ;\
2495     + movl %eax,32*p1+12(%rdi) ;\
2496     + pushq %rbx ;\
2497     + movl %eax,%ebx ;\
2498     + mix_col(aes_fl_tab) ;\
2499     + popq %rbx ;\
2500     + xorl %eax,%esi ;\
2501     + xorl %esi,%ebp ;\
2502     + movl %esi,32*p1+16(%rdi) ;\
2503     + movl %ebp,32*p1+20(%rdi) ;\
2504     + xorl %ebp,%edx ;\
2505     + xorl %edx,%ebx ;\
2506     + movl %edx,32*p1+24(%rdi) ;\
2507     + movl %ebx,32*p1+28(%rdi)
2508     +
2509     +// rdi = pointer to AES context
2510     +// rsi = pointer to key bytes
2511     +// rdx = key length, bytes or bits
2512     +// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt
2513     +
2514     + .align ALIGN64BYTES
2515     +aes_set_key:
2516     + pushfq
2517     + pushq %rbp
2518     + pushq %rbx
2519     +
2520     + movq %rcx,%r11 // ed_flg
2521     + movq %rdx,%rcx // key length
2522     + movq %rdi,%r10 // AES context
2523     +
2524     + cmpl $128,%ecx
2525     + jb aes_30
2526     + shrl $3,%ecx
2527     +aes_30: cmpl $32,%ecx
2528     + je aes_32
2529     + cmpl $24,%ecx
2530     + je aes_32
2531     + movl $16,%ecx
2532     +aes_32: shrl $2,%ecx
2533     + movl %ecx,nkey(%r10)
2534     + leaq 6(%rcx),%rax // 10/12/14 for 4/6/8 32-bit key length
2535     + movl %eax,nrnd(%r10)
2536     + leaq ekey(%r10),%rdi // key position in AES context
2537     + cld
2538     + movl %ecx,%eax // save key length in eax
2539     + rep ; movsl // words in the key schedule
2540     + movl -4(%rsi),%ebx // put some values in registers
2541     + movl -8(%rsi),%edx // to allow faster code
2542     + movl -12(%rsi),%ebp
2543     + movl -16(%rsi),%esi
2544     +
2545     + cmpl $4,%eax // jump on key size
2546     + je aes_36
2547     + cmpl $6,%eax
2548     + je aes_35
2549     +
2550     + ksc8(0)
2551     + ksc8(1)
2552     + ksc8(2)
2553     + ksc8(3)
2554     + ksc8(4)
2555     + ksc8(5)
2556     + ksc8(6)
2557     + jmp aes_37
2558     +aes_35: ksc6(0)
2559     + ksc6(1)
2560     + ksc6(2)
2561     + ksc6(3)
2562     + ksc6(4)
2563     + ksc6(5)
2564     + ksc6(6)
2565     + ksc6(7)
2566     + jmp aes_37
2567     +aes_36: ksc4(0)
2568     + ksc4(1)
2569     + ksc4(2)
2570     + ksc4(3)
2571     + ksc4(4)
2572     + ksc4(5)
2573     + ksc4(6)
2574     + ksc4(7)
2575     + ksc4(8)
2576     + ksc4(9)
2577     +aes_37: cmpl $0,%r11d // ed_flg
2578     + jne aes_39
2579     +
2580     +// compile decryption key schedule from encryption schedule - reverse
2581     +// order and do mix_column operation on round keys except first and last
2582     +
2583     + movl nrnd(%r10),%eax // kt = cx->d_key + nc * cx->Nrnd
2584     + shl $2,%rax
2585     + leaq dkey(%r10,%rax,4),%rdi
2586     + leaq ekey(%r10),%rsi // kf = cx->e_key
2587     +
2588     + movsq // copy first round key (unmodified)
2589     + movsq
2590     + subq $32,%rdi
2591     + movl $1,%r9d
2592     +aes_38: // do mix column on each column of
2593     + lodsl // each round key
2594     + movl %eax,%ebx
2595     + mix_col(aes_im_tab)
2596     + stosl
2597     + lodsl
2598     + movl %eax,%ebx
2599     + mix_col(aes_im_tab)
2600     + stosl
2601     + lodsl
2602     + movl %eax,%ebx
2603     + mix_col(aes_im_tab)
2604     + stosl
2605     + lodsl
2606     + movl %eax,%ebx
2607     + mix_col(aes_im_tab)
2608     + stosl
2609     + subq $32,%rdi
2610     +
2611     + incl %r9d
2612     + cmpl nrnd(%r10),%r9d
2613     + jb aes_38
2614     +
2615     + movsq // copy last round key (unmodified)
2616     + movsq
2617     +aes_39: popq %rbx
2618     + popq %rbp
2619     + popfq
2620     + ret
2621     +
2622     +
2623     +// finite field multiplies by {02}, {04} and {08}
2624     +
2625     +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
2626     +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
2627     +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
2628     +
2629     +// finite field multiplies required in table generation
2630     +
2631     +#define f3(x) (f2(x) ^ x)
2632     +#define f9(x) (f8(x) ^ x)
2633     +#define fb(x) (f8(x) ^ f2(x) ^ x)
2634     +#define fd(x) (f8(x) ^ f4(x) ^ x)
2635     +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
2636     +
2637     +// These defines generate the forward table entries
2638     +
2639     +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
2640     +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
2641     +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
2642     +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
2643     +
2644     +// These defines generate the inverse table entries
2645     +
2646     +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
2647     +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
2648     +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
2649     +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
2650     +
2651     +// These defines generate entries for the last round tables
2652     +
2653     +#define w0(x) (x)
2654     +#define w1(x) (x << 8)
2655     +#define w2(x) (x << 16)
2656     +#define w3(x) (x << 24)
2657     +
2658     +// macro to generate inverse mix column tables (needed for the key schedule)
2659     +
2660     +#define im_data0(p1) \
2661     + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
2662     + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
2663     + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
2664     + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
2665     +#define im_data1(p1) \
2666     + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
2667     + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
2668     + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
2669     + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
2670     +#define im_data2(p1) \
2671     + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
2672     + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
2673     + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
2674     + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
2675     +#define im_data3(p1) \
2676     + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
2677     + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
2678     + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
2679     + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
2680     +#define im_data4(p1) \
2681     + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
2682     + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
2683     + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
2684     + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
2685     +#define im_data5(p1) \
2686     + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
2687     + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
2688     + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
2689     + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
2690     +#define im_data6(p1) \
2691     + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
2692     + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
2693     + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
2694     + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
2695     +#define im_data7(p1) \
2696     + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
2697     + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
2698     + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
2699     + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
2700     +
2701     +// S-box data - 256 entries
2702     +
2703     +#define sb_data0(p1) \
2704     + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
2705     + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
2706     + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
2707     + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
2708     +#define sb_data1(p1) \
2709     + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
2710     + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
2711     + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
2712     + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
2713     +#define sb_data2(p1) \
2714     + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
2715     + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
2716     + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
2717     + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
2718     +#define sb_data3(p1) \
2719     + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
2720     + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
2721     + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
2722     + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
2723     +#define sb_data4(p1) \
2724     + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
2725     + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
2726     + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
2727     + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
2728     +#define sb_data5(p1) \
2729     + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
2730     + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
2731     + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
2732     + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
2733     +#define sb_data6(p1) \
2734     + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
2735     + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
2736     + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
2737     + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
2738     +#define sb_data7(p1) \
2739     + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
2740     + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
2741     + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
2742     + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
2743     +
2744     +// Inverse S-box data - 256 entries
2745     +
2746     +#define ib_data0(p1) \
2747     + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
2748     + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
2749     + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
2750     + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
2751     +#define ib_data1(p1) \
2752     + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
2753     + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
2754     + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
2755     + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
2756     +#define ib_data2(p1) \
2757     + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
2758     + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
2759     + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
2760     + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
2761     +#define ib_data3(p1) \
2762     + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
2763     + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
2764     + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
2765     + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
2766     +#define ib_data4(p1) \
2767     + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
2768     + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
2769     + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
2770     + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
2771     +#define ib_data5(p1) \
2772     + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
2773     + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
2774     + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
2775     + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
2776     +#define ib_data6(p1) \
2777     + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
2778     + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
2779     + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
2780     + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
2781     +#define ib_data7(p1) \
2782     + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
2783     + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
2784     + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
2785     + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
2786     +
2787     +// The rcon_table (needed for the key schedule)
2788     +//
2789     +// Here is original Dr Brian Gladman's source code:
2790     +// _rcon_tab:
2791     +// %assign x 1
2792     +// %rep 29
2793     +// dd x
2794     +// %assign x f2(x)
2795     +// %endrep
2796     +//
2797     +// Here is precomputed output (it's more portable this way):
2798     +
2799     + .section .rodata
2800     + .align ALIGN64BYTES
2801     +aes_rcon_tab:
2802     + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
2803     + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
2804     + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
2805     + .long 0xb3,0x7d,0xfa,0xef,0xc5
2806     +
2807     +// The forward xor tables
2808     +
2809     + .align ALIGN64BYTES
2810     +aes_ft_tab:
2811     + sb_data0(u0)
2812     + sb_data1(u0)
2813     + sb_data2(u0)
2814     + sb_data3(u0)
2815     + sb_data4(u0)
2816     + sb_data5(u0)
2817     + sb_data6(u0)
2818     + sb_data7(u0)
2819     +
2820     + sb_data0(u1)
2821     + sb_data1(u1)
2822     + sb_data2(u1)
2823     + sb_data3(u1)
2824     + sb_data4(u1)
2825     + sb_data5(u1)
2826     + sb_data6(u1)
2827     + sb_data7(u1)
2828     +
2829     + sb_data0(u2)
2830     + sb_data1(u2)
2831     + sb_data2(u2)
2832     + sb_data3(u2)
2833     + sb_data4(u2)
2834     + sb_data5(u2)
2835     + sb_data6(u2)
2836     + sb_data7(u2)
2837     +
2838     + sb_data0(u3)
2839     + sb_data1(u3)
2840     + sb_data2(u3)
2841     + sb_data3(u3)
2842     + sb_data4(u3)
2843     + sb_data5(u3)
2844     + sb_data6(u3)
2845     + sb_data7(u3)
2846     +
2847     + .align ALIGN64BYTES
2848     +aes_fl_tab:
2849     + sb_data0(w0)
2850     + sb_data1(w0)
2851     + sb_data2(w0)
2852     + sb_data3(w0)
2853     + sb_data4(w0)
2854     + sb_data5(w0)
2855     + sb_data6(w0)
2856     + sb_data7(w0)
2857     +
2858     + sb_data0(w1)
2859     + sb_data1(w1)
2860     + sb_data2(w1)
2861     + sb_data3(w1)
2862     + sb_data4(w1)
2863     + sb_data5(w1)
2864     + sb_data6(w1)
2865     + sb_data7(w1)
2866     +
2867     + sb_data0(w2)
2868     + sb_data1(w2)
2869     + sb_data2(w2)
2870     + sb_data3(w2)
2871     + sb_data4(w2)
2872     + sb_data5(w2)
2873     + sb_data6(w2)
2874     + sb_data7(w2)
2875     +
2876     + sb_data0(w3)
2877     + sb_data1(w3)
2878     + sb_data2(w3)
2879     + sb_data3(w3)
2880     + sb_data4(w3)
2881     + sb_data5(w3)
2882     + sb_data6(w3)
2883     + sb_data7(w3)
2884     +
2885     +// The inverse xor tables
2886     +
2887     + .align ALIGN64BYTES
2888     +aes_it_tab:
2889     + ib_data0(v0)
2890     + ib_data1(v0)
2891     + ib_data2(v0)
2892     + ib_data3(v0)
2893     + ib_data4(v0)
2894     + ib_data5(v0)
2895     + ib_data6(v0)
2896     + ib_data7(v0)
2897     +
2898     + ib_data0(v1)
2899     + ib_data1(v1)
2900     + ib_data2(v1)
2901     + ib_data3(v1)
2902     + ib_data4(v1)
2903     + ib_data5(v1)
2904     + ib_data6(v1)
2905     + ib_data7(v1)
2906     +
2907     + ib_data0(v2)
2908     + ib_data1(v2)
2909     + ib_data2(v2)
2910     + ib_data3(v2)
2911     + ib_data4(v2)
2912     + ib_data5(v2)
2913     + ib_data6(v2)
2914     + ib_data7(v2)
2915     +
2916     + ib_data0(v3)
2917     + ib_data1(v3)
2918     + ib_data2(v3)
2919     + ib_data3(v3)
2920     + ib_data4(v3)
2921     + ib_data5(v3)
2922     + ib_data6(v3)
2923     + ib_data7(v3)
2924     +
2925     + .align ALIGN64BYTES
2926     +aes_il_tab:
2927     + ib_data0(w0)
2928     + ib_data1(w0)
2929     + ib_data2(w0)
2930     + ib_data3(w0)
2931     + ib_data4(w0)
2932     + ib_data5(w0)
2933     + ib_data6(w0)
2934     + ib_data7(w0)
2935     +
2936     + ib_data0(w1)
2937     + ib_data1(w1)
2938     + ib_data2(w1)
2939     + ib_data3(w1)
2940     + ib_data4(w1)
2941     + ib_data5(w1)
2942     + ib_data6(w1)
2943     + ib_data7(w1)
2944     +
2945     + ib_data0(w2)
2946     + ib_data1(w2)
2947     + ib_data2(w2)
2948     + ib_data3(w2)
2949     + ib_data4(w2)
2950     + ib_data5(w2)
2951     + ib_data6(w2)
2952     + ib_data7(w2)
2953     +
2954     + ib_data0(w3)
2955     + ib_data1(w3)
2956     + ib_data2(w3)
2957     + ib_data3(w3)
2958     + ib_data4(w3)
2959     + ib_data5(w3)
2960     + ib_data6(w3)
2961     + ib_data7(w3)
2962     +
2963     +// The inverse mix column tables
2964     +
2965     + .align ALIGN64BYTES
2966     +aes_im_tab:
2967     + im_data0(v0)
2968     + im_data1(v0)
2969     + im_data2(v0)
2970     + im_data3(v0)
2971     + im_data4(v0)
2972     + im_data5(v0)
2973     + im_data6(v0)
2974     + im_data7(v0)
2975     +
2976     + im_data0(v1)
2977     + im_data1(v1)
2978     + im_data2(v1)
2979     + im_data3(v1)
2980     + im_data4(v1)
2981     + im_data5(v1)
2982     + im_data6(v1)
2983     + im_data7(v1)
2984     +
2985     + im_data0(v2)
2986     + im_data1(v2)
2987     + im_data2(v2)
2988     + im_data3(v2)
2989     + im_data4(v2)
2990     + im_data5(v2)
2991     + im_data6(v2)
2992     + im_data7(v2)
2993     +
2994     + im_data0(v3)
2995     + im_data1(v3)
2996     + im_data2(v3)
2997     + im_data3(v3)
2998     + im_data4(v3)
2999     + im_data5(v3)
3000     + im_data6(v3)
3001     + im_data7(v3)
3002 max 376 diff -urN linux-2.4.27-noloop/drivers/misc/aes-x86.S linux-2.4.27-AES/drivers/misc/aes-x86.S
3003     --- linux-2.4.27-noloop/drivers/misc/aes-x86.S Thu Jan 1 02:00:00 1970
3004     +++ linux-2.4.27-AES/drivers/misc/aes-x86.S Sun Aug 15 16:39:41 2004
3005 max 348 @@ -0,0 +1,922 @@
3006     +//
3007     +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
3008     +// All rights reserved.
3009     +//
3010     +// TERMS
3011     +//
3012     +// Redistribution and use in source and binary forms, with or without
3013     +// modification, are permitted subject to the following conditions:
3014     +//
3015     +// 1. Redistributions of source code must retain the above copyright
3016     +// notice, this list of conditions and the following disclaimer.
3017     +//
3018     +// 2. Redistributions in binary form must reproduce the above copyright
3019     +// notice, this list of conditions and the following disclaimer in the
3020     +// documentation and/or other materials provided with the distribution.
3021     +//
3022     +// 3. The copyright holder's name must not be used to endorse or promote
3023     +// any products derived from this software without his specific prior
3024     +// written permission.
3025     +//
3026     +// This software is provided 'as is' with no express or implied warranties
3027     +// of correctness or fitness for purpose.
3028     +
3029     +// Modified by Jari Ruusu, December 24 2001
3030     +// - Converted syntax to GNU CPP/assembler syntax
3031     +// - C programming interface converted back to "old" API
3032     +// - Minor portability cleanups and speed optimizations
3033     +
3034     +// Modified by Jari Ruusu, April 11 2002
3035     +// - Added above copyright and terms to resulting object code so that
3036     +// binary distributions can avoid legal trouble
3037     +
3038     +// An AES (Rijndael) implementation for x86 compatible processors. This
3039     +// version uses i386 instruction set but instruction scheduling is optimized
3040     +// for Pentium-2. This version only implements the standard AES block length
3041     +// (128 bits, 16 bytes). This code does not preserve the eax, ecx or edx
3042     +// registers or the artihmetic status flags. However, the ebx, esi, edi, and
3043     +// ebp registers are preserved across calls.
3044     +
3045     +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
3046     +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
3047     +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
3048     +
3049     +#if defined(USE_UNDERLINE)
3050     +# define aes_set_key _aes_set_key
3051     +# define aes_encrypt _aes_encrypt
3052     +# define aes_decrypt _aes_decrypt
3053     +#endif
3054 max 21 +#if !defined(ALIGN32BYTES)
3055     +# define ALIGN32BYTES 32
3056     +#endif
3057     +
3058 max 348 + .file "aes-x86.S"
3059 max 21 + .globl aes_set_key
3060     + .globl aes_encrypt
3061     + .globl aes_decrypt
3062     +
3063     + .text
3064     +copyright:
3065     + .ascii " \000"
3066     + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
3067     + .ascii "All rights reserved.\000"
3068     + .ascii " \000"
3069     + .ascii "TERMS\000"
3070     + .ascii " \000"
3071     + .ascii " Redistribution and use in source and binary forms, with or without\000"
3072     + .ascii " modification, are permitted subject to the following conditions:\000"
3073     + .ascii " \000"
3074     + .ascii " 1. Redistributions of source code must retain the above copyright\000"
3075     + .ascii " notice, this list of conditions and the following disclaimer.\000"
3076     + .ascii " \000"
3077     + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
3078     + .ascii " notice, this list of conditions and the following disclaimer in the\000"
3079     + .ascii " documentation and/or other materials provided with the distribution.\000"
3080     + .ascii " \000"
3081     + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
3082     + .ascii " any products derived from this software without his specific prior\000"
3083     + .ascii " written permission.\000"
3084     + .ascii " \000"
3085     + .ascii " This software is provided 'as is' with no express or implied warranties\000"
3086     + .ascii " of correctness or fitness for purpose.\000"
3087     + .ascii " \000"
3088     +
3089     +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
3090     +
3091     +// offsets to parameters with one register pushed onto stack
3092     +
3093     +#define ctx 8 // AES context structure
3094     +#define in_blk 12 // input byte array address parameter
3095     +#define out_blk 16 // output byte array address parameter
3096     +
3097     +// offsets in context structure
3098     +
3099     +#define nkey 0 // key length, size 4
3100     +#define nrnd 4 // number of rounds, size 4
3101     +#define ekey 8 // encryption key schedule base address, size 256
3102     +#define dkey 264 // decryption key schedule base address, size 256
3103     +
3104     +// This macro performs a forward encryption cycle. It is entered with
3105     +// the first previous round column values in %eax, %ebx, %esi and %edi and
3106     +// exits with the final values in the same registers.
3107     +
3108     +#define fwd_rnd(p1,p2) \
3109     + mov %ebx,(%esp) ;\
3110     + movzbl %al,%edx ;\
3111     + mov %eax,%ecx ;\
3112     + mov p2(%ebp),%eax ;\
3113     + mov %edi,4(%esp) ;\
3114     + mov p2+12(%ebp),%edi ;\
3115     + xor p1(,%edx,4),%eax ;\
3116     + movzbl %ch,%edx ;\
3117     + shr $16,%ecx ;\
3118     + mov p2+4(%ebp),%ebx ;\
3119     + xor p1+tlen(,%edx,4),%edi ;\
3120     + movzbl %cl,%edx ;\
3121     + movzbl %ch,%ecx ;\
3122     + xor p1+3*tlen(,%ecx,4),%ebx ;\
3123     + mov %esi,%ecx ;\
3124     + mov p1+2*tlen(,%edx,4),%esi ;\
3125     + movzbl %cl,%edx ;\
3126     + xor p1(,%edx,4),%esi ;\
3127     + movzbl %ch,%edx ;\
3128     + shr $16,%ecx ;\
3129     + xor p1+tlen(,%edx,4),%ebx ;\
3130     + movzbl %cl,%edx ;\
3131     + movzbl %ch,%ecx ;\
3132     + xor p1+2*tlen(,%edx,4),%eax ;\
3133     + mov (%esp),%edx ;\
3134     + xor p1+3*tlen(,%ecx,4),%edi ;\
3135     + movzbl %dl,%ecx ;\
3136     + xor p2+8(%ebp),%esi ;\
3137     + xor p1(,%ecx,4),%ebx ;\
3138     + movzbl %dh,%ecx ;\
3139     + shr $16,%edx ;\
3140     + xor p1+tlen(,%ecx,4),%eax ;\
3141     + movzbl %dl,%ecx ;\
3142     + movzbl %dh,%edx ;\
3143     + xor p1+2*tlen(,%ecx,4),%edi ;\
3144     + mov 4(%esp),%ecx ;\
3145     + xor p1+3*tlen(,%edx,4),%esi ;\
3146     + movzbl %cl,%edx ;\
3147     + xor p1(,%edx,4),%edi ;\
3148     + movzbl %ch,%edx ;\
3149     + shr $16,%ecx ;\
3150     + xor p1+tlen(,%edx,4),%esi ;\
3151     + movzbl %cl,%edx ;\
3152     + movzbl %ch,%ecx ;\
3153     + xor p1+2*tlen(,%edx,4),%ebx ;\
3154     + xor p1+3*tlen(,%ecx,4),%eax
3155     +
3156     +// This macro performs an inverse encryption cycle. It is entered with
3157     +// the first previous round column values in %eax, %ebx, %esi and %edi and
3158     +// exits with the final values in the same registers.
3159     +
3160     +#define inv_rnd(p1,p2) \
3161     + movzbl %al,%edx ;\
3162     + mov %ebx,(%esp) ;\
3163     + mov %eax,%ecx ;\
3164     + mov p2(%ebp),%eax ;\
3165     + mov %edi,4(%esp) ;\
3166     + mov p2+4(%ebp),%ebx ;\
3167     + xor p1(,%edx,4),%eax ;\
3168     + movzbl %ch,%edx ;\
3169     + shr $16,%ecx ;\
3170     + mov p2+12(%ebp),%edi ;\
3171     + xor p1+tlen(,%edx,4),%ebx ;\
3172     + movzbl %cl,%edx ;\
3173     + movzbl %ch,%ecx ;\
3174     + xor p1+3*tlen(,%ecx,4),%edi ;\
3175     + mov %esi,%ecx ;\
3176     + mov p1+2*tlen(,%edx,4),%esi ;\
3177     + movzbl %cl,%edx ;\
3178     + xor p1(,%edx,4),%esi ;\
3179     + movzbl %ch,%edx ;\
3180     + shr $16,%ecx ;\
3181     + xor p1+tlen(,%edx,4),%edi ;\
3182     + movzbl %cl,%edx ;\
3183     + movzbl %ch,%ecx ;\
3184     + xor p1+2*tlen(,%edx,4),%eax ;\
3185     + mov (%esp),%edx ;\
3186     + xor p1+3*tlen(,%ecx,4),%ebx ;\
3187     + movzbl %dl,%ecx ;\
3188     + xor p2+8(%ebp),%esi ;\
3189     + xor p1(,%ecx,4),%ebx ;\
3190     + movzbl %dh,%ecx ;\
3191     + shr $16,%edx ;\
3192     + xor p1+tlen(,%ecx,4),%esi ;\
3193     + movzbl %dl,%ecx ;\
3194     + movzbl %dh,%edx ;\
3195     + xor p1+2*tlen(,%ecx,4),%edi ;\
3196     + mov 4(%esp),%ecx ;\
3197     + xor p1+3*tlen(,%edx,4),%eax ;\
3198     + movzbl %cl,%edx ;\
3199     + xor p1(,%edx,4),%edi ;\
3200     + movzbl %ch,%edx ;\
3201     + shr $16,%ecx ;\
3202     + xor p1+tlen(,%edx,4),%eax ;\
3203     + movzbl %cl,%edx ;\
3204     + movzbl %ch,%ecx ;\
3205     + xor p1+2*tlen(,%edx,4),%ebx ;\
3206     + xor p1+3*tlen(,%ecx,4),%esi
3207     +
3208     +// AES (Rijndael) Encryption Subroutine
3209     +
3210     + .text
3211     + .align ALIGN32BYTES
3212     +aes_encrypt:
3213     + push %ebp
3214     + mov ctx(%esp),%ebp // pointer to context
3215     + mov in_blk(%esp),%ecx
3216     + push %ebx
3217     + push %esi
3218     + push %edi
3219     + mov nrnd(%ebp),%edx // number of rounds
3220     + lea ekey+16(%ebp),%ebp // key pointer
3221     +
3222     +// input four columns and xor in first round key
3223     +
3224     + mov (%ecx),%eax
3225     + mov 4(%ecx),%ebx
3226     + mov 8(%ecx),%esi
3227     + mov 12(%ecx),%edi
3228     + xor -16(%ebp),%eax
3229     + xor -12(%ebp),%ebx
3230     + xor -8(%ebp),%esi
3231     + xor -4(%ebp),%edi
3232     +
3233     + sub $8,%esp // space for register saves on stack
3234     +
3235     + sub $10,%edx
3236     + je aes_15
3237     + add $32,%ebp
3238     + sub $2,%edx
3239     + je aes_13
3240     + add $32,%ebp
3241     +
3242     + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
3243     + fwd_rnd(aes_ft_tab,-48)
3244     +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
3245     + fwd_rnd(aes_ft_tab,-16)
3246     +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
3247     + fwd_rnd(aes_ft_tab,16)
3248     + fwd_rnd(aes_ft_tab,32)
3249     + fwd_rnd(aes_ft_tab,48)
3250     + fwd_rnd(aes_ft_tab,64)
3251     + fwd_rnd(aes_ft_tab,80)
3252     + fwd_rnd(aes_ft_tab,96)
3253     + fwd_rnd(aes_ft_tab,112)
3254     + fwd_rnd(aes_ft_tab,128)
3255     + fwd_rnd(aes_fl_tab,144) // last round uses a different table
3256     +
3257     +// move final values to the output array.
3258     +
3259     + mov out_blk+20(%esp),%ebp
3260     + add $8,%esp
3261     + mov %eax,(%ebp)
3262     + mov %ebx,4(%ebp)
3263     + mov %esi,8(%ebp)
3264     + mov %edi,12(%ebp)
3265     + pop %edi
3266     + pop %esi
3267     + pop %ebx
3268     + pop %ebp
3269     + ret
3270     +
3271     +
3272     +// AES (Rijndael) Decryption Subroutine
3273     +
3274     + .align ALIGN32BYTES
3275     +aes_decrypt:
3276     + push %ebp
3277     + mov ctx(%esp),%ebp // pointer to context
3278     + mov in_blk(%esp),%ecx
3279     + push %ebx
3280     + push %esi
3281     + push %edi
3282     + mov nrnd(%ebp),%edx // number of rounds
3283     + lea dkey+16(%ebp),%ebp // key pointer
3284     +
3285     +// input four columns and xor in first round key
3286     +
3287     + mov (%ecx),%eax
3288     + mov 4(%ecx),%ebx
3289     + mov 8(%ecx),%esi
3290     + mov 12(%ecx),%edi
3291     + xor -16(%ebp),%eax
3292     + xor -12(%ebp),%ebx
3293     + xor -8(%ebp),%esi
3294     + xor -4(%ebp),%edi
3295     +
3296     + sub $8,%esp // space for register saves on stack
3297     +
3298     + sub $10,%edx
3299     + je aes_25
3300     + add $32,%ebp
3301     + sub $2,%edx
3302     + je aes_23
3303     + add $32,%ebp
3304     +
3305     + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
3306     + inv_rnd(aes_it_tab,-48)
3307     +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
3308     + inv_rnd(aes_it_tab,-16)
3309     +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
3310     + inv_rnd(aes_it_tab,16)
3311     + inv_rnd(aes_it_tab,32)
3312     + inv_rnd(aes_it_tab,48)
3313     + inv_rnd(aes_it_tab,64)
3314     + inv_rnd(aes_it_tab,80)
3315     + inv_rnd(aes_it_tab,96)
3316     + inv_rnd(aes_it_tab,112)
3317     + inv_rnd(aes_it_tab,128)
3318     + inv_rnd(aes_il_tab,144) // last round uses a different table
3319     +
3320     +// move final values to the output array.
3321     +
3322     + mov out_blk+20(%esp),%ebp
3323     + add $8,%esp
3324     + mov %eax,(%ebp)
3325     + mov %ebx,4(%ebp)
3326     + mov %esi,8(%ebp)
3327     + mov %edi,12(%ebp)
3328     + pop %edi
3329     + pop %esi
3330     + pop %ebx
3331     + pop %ebp
3332     + ret
3333     +
3334     +// AES (Rijndael) Key Schedule Subroutine
3335     +
3336     +// input/output parameters
3337     +
3338     +#define aes_cx 12 // AES context
3339     +#define in_key 16 // key input array address
3340     +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
3341     +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
3342     +
3343     +// offsets for locals
3344     +
3345     +#define cnt -4
3346     +#define slen 8
3347     +
3348     +// This macro performs a column mixing operation on an input 32-bit
3349     +// word to give a 32-bit result. It uses each of the 4 bytes in the
3350     +// the input column to index 4 different tables of 256 32-bit words
3351     +// that are xored together to form the output value.
3352     +
3353     +#define mix_col(p1) \
3354     + movzbl %bl,%ecx ;\
3355     + mov p1(,%ecx,4),%eax ;\
3356     + movzbl %bh,%ecx ;\
3357     + ror $16,%ebx ;\
3358     + xor p1+tlen(,%ecx,4),%eax ;\
3359     + movzbl %bl,%ecx ;\
3360     + xor p1+2*tlen(,%ecx,4),%eax ;\
3361     + movzbl %bh,%ecx ;\
3362     + xor p1+3*tlen(,%ecx,4),%eax
3363     +
3364     +// Key Schedule Macros
3365     +
3366     +#define ksc4(p1) \
3367     + rol $24,%ebx ;\
3368     + mix_col(aes_fl_tab) ;\
3369     + ror $8,%ebx ;\
3370     + xor 4*p1+aes_rcon_tab,%eax ;\
3371     + xor %eax,%esi ;\
3372     + xor %esi,%ebp ;\
3373     + mov %esi,16*p1(%edi) ;\
3374     + mov %ebp,16*p1+4(%edi) ;\
3375     + xor %ebp,%edx ;\
3376     + xor %edx,%ebx ;\
3377     + mov %edx,16*p1+8(%edi) ;\
3378     + mov %ebx,16*p1+12(%edi)
3379     +
3380     +#define ksc6(p1) \
3381     + rol $24,%ebx ;\
3382     + mix_col(aes_fl_tab) ;\
3383     + ror $8,%ebx ;\
3384     + xor 4*p1+aes_rcon_tab,%eax ;\
3385     + xor 24*p1-24(%edi),%eax ;\
3386     + mov %eax,24*p1(%edi) ;\
3387     + xor 24*p1-20(%edi),%eax ;\
3388     + mov %eax,24*p1+4(%edi) ;\
3389     + xor %eax,%esi ;\
3390     + xor %esi,%ebp ;\
3391     + mov %esi,24*p1+8(%edi) ;\
3392     + mov %ebp,24*p1+12(%edi) ;\
3393     + xor %ebp,%edx ;\
3394     + xor %edx,%ebx ;\
3395     + mov %edx,24*p1+16(%edi) ;\
3396     + mov %ebx,24*p1+20(%edi)
3397     +
3398     +#define ksc8(p1) \
3399     + rol $24,%ebx ;\
3400     + mix_col(aes_fl_tab) ;\
3401     + ror $8,%ebx ;\
3402     + xor 4*p1+aes_rcon_tab,%eax ;\
3403     + xor 32*p1-32(%edi),%eax ;\
3404     + mov %eax,32*p1(%edi) ;\
3405     + xor 32*p1-28(%edi),%eax ;\
3406     + mov %eax,32*p1+4(%edi) ;\
3407     + xor 32*p1-24(%edi),%eax ;\
3408     + mov %eax,32*p1+8(%edi) ;\
3409     + xor 32*p1-20(%edi),%eax ;\
3410     + mov %eax,32*p1+12(%edi) ;\
3411     + push %ebx ;\
3412     + mov %eax,%ebx ;\
3413     + mix_col(aes_fl_tab) ;\
3414     + pop %ebx ;\
3415     + xor %eax,%esi ;\
3416     + xor %esi,%ebp ;\
3417     + mov %esi,32*p1+16(%edi) ;\
3418     + mov %ebp,32*p1+20(%edi) ;\
3419     + xor %ebp,%edx ;\
3420     + xor %edx,%ebx ;\
3421     + mov %edx,32*p1+24(%edi) ;\
3422     + mov %ebx,32*p1+28(%edi)
3423     +
3424     + .align ALIGN32BYTES
3425     +aes_set_key:
3426     + pushfl
3427     + push %ebp
3428     + mov %esp,%ebp
3429     + sub $slen,%esp
3430     + push %ebx
3431     + push %esi
3432     + push %edi
3433     +
3434     + mov aes_cx(%ebp),%edx // edx -> AES context
3435     +
3436     + mov key_ln(%ebp),%ecx // key length
3437     + cmpl $128,%ecx
3438     + jb aes_30
3439     + shr $3,%ecx
3440     +aes_30: cmpl $32,%ecx
3441     + je aes_32
3442     + cmpl $24,%ecx
3443     + je aes_32
3444     + mov $16,%ecx
3445     +aes_32: shr $2,%ecx
3446     + mov %ecx,nkey(%edx)
3447     +
3448     + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
3449     + mov %eax,nrnd(%edx)
3450     +
3451     + mov in_key(%ebp),%esi // key input array
3452     + lea ekey(%edx),%edi // key position in AES context
3453     + cld
3454     + push %ebp
3455     + mov %ecx,%eax // save key length in eax
3456     + rep ; movsl // words in the key schedule
3457     + mov -4(%esi),%ebx // put some values in registers
3458     + mov -8(%esi),%edx // to allow faster code
3459     + mov -12(%esi),%ebp
3460     + mov -16(%esi),%esi
3461     +
3462     + cmpl $4,%eax // jump on key size
3463     + je aes_36
3464     + cmpl $6,%eax
3465     + je aes_35
3466     +
3467     + ksc8(0)
3468     + ksc8(1)
3469     + ksc8(2)
3470     + ksc8(3)
3471     + ksc8(4)
3472     + ksc8(5)
3473     + ksc8(6)
3474     + jmp aes_37
3475     +aes_35: ksc6(0)
3476     + ksc6(1)
3477     + ksc6(2)
3478     + ksc6(3)
3479     + ksc6(4)
3480     + ksc6(5)
3481     + ksc6(6)
3482     + ksc6(7)
3483     + jmp aes_37
3484     +aes_36: ksc4(0)
3485     + ksc4(1)
3486     + ksc4(2)
3487     + ksc4(3)
3488     + ksc4(4)
3489     + ksc4(5)
3490     + ksc4(6)
3491     + ksc4(7)
3492     + ksc4(8)
3493     + ksc4(9)
3494     +aes_37: pop %ebp
3495     + mov aes_cx(%ebp),%edx // edx -> AES context
3496     + cmpl $0,ed_flg(%ebp)
3497     + jne aes_39
3498     +
3499     +// compile decryption key schedule from encryption schedule - reverse
3500     +// order and do mix_column operation on round keys except first and last
3501     +
3502     + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
3503     + shl $2,%eax
3504     + lea dkey(%edx,%eax,4),%edi
3505     + lea ekey(%edx),%esi // kf = cx->e_key
3506     +
3507     + movsl // copy first round key (unmodified)
3508     + movsl
3509     + movsl
3510     + movsl
3511     + sub $32,%edi
3512     + movl $1,cnt(%ebp)
3513     +aes_38: // do mix column on each column of
3514     + lodsl // each round key
3515     + mov %eax,%ebx
3516     + mix_col(aes_im_tab)
3517     + stosl
3518     + lodsl
3519     + mov %eax,%ebx
3520     + mix_col(aes_im_tab)
3521     + stosl
3522     + lodsl
3523     + mov %eax,%ebx
3524     + mix_col(aes_im_tab)
3525     + stosl
3526     + lodsl
3527     + mov %eax,%ebx
3528     + mix_col(aes_im_tab)
3529     + stosl
3530     + sub $32,%edi
3531     +
3532     + incl cnt(%ebp)
3533     + mov cnt(%ebp),%eax
3534     + cmp nrnd(%edx),%eax
3535     + jb aes_38
3536     +
3537     + movsl // copy last round key (unmodified)
3538     + movsl
3539     + movsl
3540     + movsl
3541     +aes_39: pop %edi
3542     + pop %esi
3543     + pop %ebx
3544     + mov %ebp,%esp
3545     + pop %ebp
3546     + popfl
3547     + ret
3548     +
3549     +
3550     +// finite field multiplies by {02}, {04} and {08}
3551     +
3552     +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
3553     +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
3554     +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
3555     +
3556     +// finite field multiplies required in table generation
3557     +
3558     +#define f3(x) (f2(x) ^ x)
3559     +#define f9(x) (f8(x) ^ x)
3560     +#define fb(x) (f8(x) ^ f2(x) ^ x)
3561     +#define fd(x) (f8(x) ^ f4(x) ^ x)
3562     +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
3563     +
3564     +// These defines generate the forward table entries
3565     +
3566     +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
3567     +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
3568     +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
3569     +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
3570     +
3571     +// These defines generate the inverse table entries
3572     +
3573     +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
3574     +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
3575     +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
3576     +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
3577     +
3578     +// These defines generate entries for the last round tables
3579     +
3580     +#define w0(x) (x)
3581     +#define w1(x) (x << 8)
3582     +#define w2(x) (x << 16)
3583     +#define w3(x) (x << 24)
3584     +
3585     +// macro to generate inverse mix column tables (needed for the key schedule)
3586     +
3587     +#define im_data0(p1) \
3588     + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
3589     + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
3590     + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
3591     + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
3592     +#define im_data1(p1) \
3593     + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
3594     + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
3595     + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
3596     + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
3597     +#define im_data2(p1) \
3598     + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
3599     + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
3600     + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
3601     + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
3602     +#define im_data3(p1) \
3603     + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
3604     + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
3605     + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
3606     + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
3607     +#define im_data4(p1) \
3608     + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
3609     + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
3610     + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
3611     +