/[pkg-loop-aes]/upstream/loop-aes/current/kernel-2.4.36.diff
ViewVC logotype

Contents of /upstream/loop-aes/current/kernel-2.4.36.diff

Parent Directory Parent Directory | Revision Log Revision Log


Revision 21 - (hide annotations) (download)
Sat Nov 29 14:52:18 2003 UTC (9 years, 5 months ago) by max
Original Path: import/loop-aes/vendor/current/kernel-2.4.23.diff
File size: 154560 byte(s)
Load . into loop-aes/vendor/current.
1 max 21 diff -urN linux-2.4.23/Documentation/Configure.help linux-2.4.23-AES/Documentation/Configure.help
2     --- linux-2.4.23/Documentation/Configure.help Sat Nov 29 10:54:00 2003
3     +++ linux-2.4.23-AES/Documentation/Configure.help Sat Nov 29 11:08:29 2003
4     @@ -593,6 +593,11 @@
5    
6     If unsure, say N.
7    
8     +AES encrypted loop device support
9     +CONFIG_BLK_DEV_LOOP_AES
10     + If you want to use AES encryption algorithm to encrypt loop devices,
11     + say Y here. If you don't know what to do here, say N.
12     +
13     ATA/IDE/MFM/RLL support
14     CONFIG_IDE
15     If you say Y here, your kernel will be able to manage low cost mass
16     diff -urN linux-2.4.23/drivers/block/Config.in linux-2.4.23-AES/drivers/block/Config.in
17     --- linux-2.4.23/drivers/block/Config.in Sat Nov 29 10:54:08 2003
18     +++ linux-2.4.23-AES/drivers/block/Config.in Sat Nov 29 11:08:29 2003
19     @@ -41,6 +41,9 @@
20     dep_tristate 'Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)' CONFIG_BLK_DEV_UMEM $CONFIG_PCI $CONFIG_EXPERIMENTAL
21    
22     tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
23     +if [ "$CONFIG_BLK_DEV_LOOP" != "n" ]; then
24     + bool ' AES encrypted loop device support' CONFIG_BLK_DEV_LOOP_AES
25     +fi
26     dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
27    
28     tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
29     diff -urN linux-2.4.23/drivers/block/loop.c linux-2.4.23-AES/drivers/block/loop.c
30     --- linux-2.4.23/drivers/block/loop.c Mon Aug 25 20:48:03 2003
31     +++ linux-2.4.23-AES/drivers/block/loop.c Sat Nov 29 11:08:29 2003
32     @@ -39,19 +39,29 @@
33     * Support up to 256 loop devices
34     * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
35     *
36     + * AES transfer added. IV is now passed as (512 byte) sector number.
37     + * Jari Ruusu, May 18 2001
38     + *
39     + * External encryption module locking bug fixed.
40     + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
41     + *
42     + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
43     + * Jari Ruusu, September 2 2001
44     + *
45     + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
46     + * Jari Ruusu, May 23 2002
47     + *
48     + * Backported struct loop_info64 ioctls from 2.6 kernels (64 bit offsets and
49     + * 64 bit sizelimits). Added support for removing offset from IV computations.
50     + * Jari Ruusu, September 21 2003
51     + *
52     + * Added support for MD5 IV computation and multi-key operation.
53     + * Jari Ruusu, October 8 2003
54     + *
55     + *
56     * Still To Fix:
57     * - Advisory locking is ignored here.
58     * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
59     - *
60     - * WARNING/FIXME:
61     - * - The block number as IV passing to low level transfer functions is broken:
62     - * it passes the underlying device's block number instead of the
63     - * offset. This makes it change for a given block when the file is
64     - * moved/restored/copied and also doesn't work over NFS.
65     - * AV, Feb 12, 2000: we pass the logical block number now. It fixes the
66     - * problem above. Encryption modules that used to rely on the old scheme
67     - * should just call ->i_mapping->bmap() to calculate the physical block
68     - * number.
69     */
70    
71     #include <linux/config.h>
72     @@ -73,8 +83,11 @@
73     #include <linux/slab.h>
74    
75     #include <asm/uaccess.h>
76     +#include <asm/byteorder.h>
77    
78     #include <linux/loop.h>
79     +#include "../misc/aes.h"
80     +#include "../misc/md5.h"
81    
82     #define MAJOR_NR LOOP_MAJOR
83    
84     @@ -82,6 +95,7 @@
85     static struct loop_device *loop_dev;
86     static int *loop_sizes;
87     static int *loop_blksizes;
88     +static int *loop_hardsizes;
89     static devfs_handle_t devfs_handle; /* For the directory */
90    
91     /*
92     @@ -90,13 +104,11 @@
93     static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
94     char *loop_buf, int size, int real_block)
95     {
96     - if (raw_buf != loop_buf) {
97     - if (cmd == READ)
98     - memcpy(loop_buf, raw_buf, size);
99     - else
100     - memcpy(raw_buf, loop_buf, size);
101     - }
102     + /* this code is only called from file backed loop */
103     + /* and that code expects this function to be no-op */
104    
105     + if (current->need_resched)
106     + {set_current_state(TASK_RUNNING);schedule();}
107     return 0;
108     }
109    
110     @@ -118,12 +130,13 @@
111     keysize = lo->lo_encrypt_key_size;
112     for (i = 0; i < size; i++)
113     *out++ = *in++ ^ key[(i & 511) % keysize];
114     + if (current->need_resched)
115     + {set_current_state(TASK_RUNNING);schedule();}
116     return 0;
117     }
118    
119     static int none_status(struct loop_device *lo, struct loop_info *info)
120     {
121     - lo->lo_flags |= LO_FLAGS_BH_REMAP;
122     return 0;
123     }
124    
125     @@ -146,324 +159,694 @@
126     init: xor_status
127     };
128    
129     +#if CONFIG_BLK_DEV_LOOP_AES
130     +typedef struct {
131     + aes_context *keyPtr[64];
132     + unsigned keyMask;
133     +} AESmultiKey;
134     +
135     +static AESmultiKey *allocMultiKey(void)
136     +{
137     + AESmultiKey *m;
138     + aes_context *a;
139     + int x, n;
140     +
141     + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL);
142     + if(!m) return 0;
143     + memset(m, 0, sizeof(AESmultiKey));
144     +
145     + n = PAGE_SIZE / sizeof(aes_context);
146     + if(!n) n = 1;
147     +
148     + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
149     + if(!a) {
150     + kfree(m);
151     + return 0;
152     + }
153     +
154     + x = 0;
155     + while((x < 64) && n) {
156     + m->keyPtr[x] = a;
157     + a++;
158     + x++;
159     + n--;
160     + }
161     + return m;
162     +}
163     +
164     +static void clearAndFreeMultiKey(AESmultiKey *m)
165     +{
166     + aes_context *a;
167     + int x, n;
168     +
169     + n = PAGE_SIZE / sizeof(aes_context);
170     + if(!n) n = 1;
171     +
172     + x = 0;
173     + while(x < 64) {
174     + a = m->keyPtr[x];
175     + if(!a) break;
176     + memset(a, 0, sizeof(aes_context) * n);
177     + kfree(a);
178     + x += n;
179     + }
180     +
181     + kfree(m);
182     +}
183     +
184     +static int multiKeySetup(struct loop_device *lo, unsigned char *k)
185     +{
186     + AESmultiKey *m;
187     + aes_context *a;
188     + int x, y, n;
189     + unsigned char b[32];
190     +
191     + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN))
192     + return -EPERM;
193     +
194     + m = (AESmultiKey *)lo->key_data;
195     + if(!m) return -ENXIO;
196     +
197     + n = PAGE_SIZE / sizeof(aes_context);
198     + if(!n) n = 1;
199     +
200     + x = 0;
201     + while(x < 64) {
202     + if(!m->keyPtr[x]) {
203     + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
204     + if(!a) return -ENOMEM;
205     + y = x;
206     + while((y < (x + n)) && (y < 64)) {
207     + m->keyPtr[y] = a;
208     + a++;
209     + y++;
210     + }
211     + }
212     + if(copy_from_user(&b[0], k, 32)) return -EFAULT;
213     + aes_set_key(m->keyPtr[x], &b[0], lo->lo_encrypt_key_size, 0);
214     + k += 32;
215     + x++;
216     + }
217     + m->keyMask = 0x3F; /* range 0...63 */
218     + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */
219     + memset(&b[0], 0, 32);
220     + return 0;
221     +}
222     +
223     +void loop_compute_sector_iv(int devSect, u_int32_t *ivout)
224     +{
225     + ivout[0] = cpu_to_le32(devSect);
226     + ivout[3] = ivout[2] = ivout[1] = 0;
227     +}
228     +
229     +void loop_compute_md5_iv(int devSect, u_int32_t *ivout, u_int32_t *data)
230     +{
231     + int x, y, e;
232     + u_int32_t buf[16];
233     +
234     + ivout[0] = 0x67452301;
235     + ivout[1] = 0xefcdab89;
236     + ivout[2] = 0x98badcfe;
237     + ivout[3] = 0x10325476;
238     +
239     + y = 7;
240     + e = 16;
241     + do {
242     + x = 0;
243     + if (!y)
244     + e = 12;
245     + do {
246     + buf[x ] = cpu_to_le32(data[0]);
247     + buf[x + 1] = cpu_to_le32(data[1]);
248     + buf[x + 2] = cpu_to_le32(data[2]);
249     + buf[x + 3] = cpu_to_le32(data[3]);
250     + x += 4;
251     + data += 4;
252     + } while (x < e);
253     + if (!y) {
254     + /* md5_transform_CPUbyteorder wants data in CPU byte order */
255     + /* devSect is already in CPU byte order -- no need to convert */
256     + /* 32 bits of sector number + 24 zero bits */
257     + buf[12] = devSect;
258     + buf[13] = 0x80000000;
259     + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
260     + buf[14] = 4024;
261     + buf[15] = 0;
262     + }
263     + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
264     + } while (--y >= 0);
265     +
266     +#if defined(__BIG_ENDIAN)
267     + ivout[0] = cpu_to_le32(ivout[0]);
268     + ivout[1] = cpu_to_le32(ivout[1]);
269     + ivout[2] = cpu_to_le32(ivout[2]);
270     + ivout[3] = cpu_to_le32(ivout[3]);
271     +#endif
272     +}
273     +
274     +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
275     + char *loop_buf, int size, int devSect)
276     +{
277     + aes_context *a;
278     + AESmultiKey *m;
279     + int x;
280     + unsigned y;
281     + u_int32_t iv[8];
282     +
283     + if(!size || (size & 511)) {
284     + return -EINVAL;
285     + }
286     + m = (AESmultiKey *)lo->key_data;
287     + y = m->keyMask;
288     + if(cmd == READ) {
289     + while(size) {
290     + a = m->keyPtr[((unsigned)devSect) & y];
291     + if(y) {
292     + memcpy(&iv[0], raw_buf, 16);
293     + raw_buf += 16;
294     + loop_buf += 16;
295     + } else {
296     + loop_compute_sector_iv(devSect, &iv[0]);
297     + }
298     + x = 15;
299     + do {
300     + memcpy(&iv[4], raw_buf, 16);
301     + aes_decrypt(a, raw_buf, loop_buf);
302     + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0];
303     + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1];
304     + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2];
305     + *((u_int32_t *)(&loop_buf[12])) ^= iv[3];
306     + if(y && !x) {
307     + raw_buf -= 496;
308     + loop_buf -= 496;
309     + loop_compute_md5_iv(devSect, &iv[4], (u_int32_t *)(&loop_buf[16]));
310     + } else {
311     + raw_buf += 16;
312     + loop_buf += 16;
313     + memcpy(&iv[0], raw_buf, 16);
314     + }
315     + aes_decrypt(a, raw_buf, loop_buf);
316     + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4];
317     + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5];
318     + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6];
319     + *((u_int32_t *)(&loop_buf[12])) ^= iv[7];
320     + if(y && !x) {
321     + raw_buf += 512;
322     + loop_buf += 512;
323     + } else {
324     + raw_buf += 16;
325     + loop_buf += 16;
326     + }
327     + } while(--x >= 0);
328     + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
329     + size -= 512;
330     + devSect++;
331     + }
332     + } else {
333     + while(size) {
334     + a = m->keyPtr[((unsigned)devSect) & y];
335     + if(y) {
336     + loop_compute_md5_iv(devSect, &iv[0], (u_int32_t *)(&loop_buf[16]));
337     + } else {
338     + loop_compute_sector_iv(devSect, &iv[0]);
339     + }
340     + x = 15;
341     + do {
342     + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
343     + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
344     + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
345     + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
346     + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
347     + memcpy(&iv[0], raw_buf, 16);
348     + loop_buf += 16;
349     + raw_buf += 16;
350     + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
351     + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
352     + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
353     + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
354     + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
355     + memcpy(&iv[0], raw_buf, 16);
356     + loop_buf += 16;
357     + raw_buf += 16;
358     + } while(--x >= 0);
359     + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
360     + size -= 512;
361     + devSect++;
362     + }
363     + }
364     + return(0);
365     +}
366     +
367     +static int keySetup_aes(struct loop_device *lo, struct loop_info *info)
368     +{
369     + AESmultiKey *m;
370     +
371     + lo->key_data = m = allocMultiKey();
372     + if(!m) return(-ENOMEM);
373     + aes_set_key(m->keyPtr[0], &info->lo_encrypt_key[0], info->lo_encrypt_key_size, 0);
374     + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
375     + return(0);
376     +}
377     +
378     +static int keyClean_aes(struct loop_device *lo)
379     +{
380     + if(lo->key_data) {
381     + clearAndFreeMultiKey((AESmultiKey *)lo->key_data);
382     + lo->key_data = 0;
383     + }
384     + return(0);
385     +}
386     +
387     +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg)
388     +{
389     + int err;
390     +
391     + switch (cmd) {
392     + case LOOP_MULTI_KEY_SETUP:
393     + err = multiKeySetup(lo, (unsigned char *)arg);
394     + break;
395     + default:
396     + err = -EINVAL;
397     + }
398     + return err;
399     +}
400     +
401     +static struct loop_func_table funcs_aes = {
402     + number: 16, /* 16 == AES */
403     + transfer: transfer_aes,
404     + init: keySetup_aes,
405     + release: keyClean_aes,
406     + ioctl: handleIoctl_aes
407     +};
408     +
409     +EXPORT_SYMBOL(loop_compute_sector_iv);
410     +EXPORT_SYMBOL(loop_compute_md5_iv);
411     +#endif /* CONFIG_BLK_DEV_LOOP_AES */
412     +
413     /* xfer_funcs[0] is special - its release function is never called */
414     struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
415     &none_funcs,
416     - &xor_funcs
417     + &xor_funcs,
418     +#if CONFIG_BLK_DEV_LOOP_AES
419     + [LO_CRYPT_AES] = &funcs_aes,
420     +#endif
421     };
422    
423     -#define MAX_DISK_SIZE 1024*1024*1024
424     -
425     -static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev)
426     -{
427     - if (S_ISREG(lo_dentry->d_inode->i_mode))
428     - return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
429     - if (blk_size[MAJOR(lodev)])
430     - return blk_size[MAJOR(lodev)][MINOR(lodev)] -
431     - (lo->lo_offset >> BLOCK_SIZE_BITS);
432     - return MAX_DISK_SIZE;
433     +/*
434     + * First number of 'lo_prealloc' is the default number of RAM pages
435     + * to pre-allocate for each device backed loop. Every (configured)
436     + * device backed loop pre-allocates this amount of RAM pages unless
437     + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
438     + * overrides are defined in pairs: loop_index,number_of_pages
439     + */
440     +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 };
441     +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
442     +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
443     +
444     +#ifdef MODULE
445     +MODULE_PARM(lo_prealloc, "1-9i");
446     +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
447     +#else
448     +static int __init lo_prealloc_setup(char *str)
449     +{
450     + int x, y, z;
451     +
452     + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
453     + z = get_option(&str, &y);
454     + if (z > 0)
455     + lo_prealloc[x] = y;
456     + if (z < 2)
457     + break;
458     + }
459     + return 1;
460     }
461     +__setup("lo_prealloc=", lo_prealloc_setup);
462     +#endif
463    
464     -static void figure_loop_size(struct loop_device *lo)
465     -{
466     - loop_sizes[lo->lo_number] = compute_loop_size(lo,
467     - lo->lo_backing_file->f_dentry,
468     - lo->lo_device);
469     -}
470     +/*
471     + * This is loop helper thread nice value in range
472     + * from 0 (low priority) to -20 (high priority).
473     + */
474     +#if defined(DEF_NICE) && defined(DEF_COUNTER)
475     +static int lo_nice = -20; /* old scheduler default */
476     +#else
477     +static int lo_nice = -1; /* O(1) scheduler default */
478     +#endif
479     +
480     +#ifdef MODULE
481     +MODULE_PARM(lo_nice, "1i");
482     +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
483     +#else
484     +static int __init lo_nice_setup(char *str)
485     +{
486     + int y;
487     +
488     + if (get_option(&str, &y) == 1)
489     + lo_nice = y;
490     + return 1;
491     +}
492     +__setup("lo_nice=", lo_nice_setup);
493     +#endif
494     +
495     +typedef struct {
496     + struct buffer_head **q0;
497     + struct buffer_head **q1;
498     + struct buffer_head **q2;
499     + int x0;
500     + int x1;
501     + int x2;
502     +} que_look_up_table;
503    
504     -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
505     - loff_t pos)
506     +static void loop_prealloc_cleanup(struct loop_device *lo)
507     {
508     - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
509     - struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
510     - struct address_space_operations *aops = mapping->a_ops;
511     - struct page *page;
512     - char *kaddr, *data;
513     - unsigned long index;
514     - unsigned size, offset;
515     - int len;
516     -
517     - down(&mapping->host->i_sem);
518     - index = pos >> PAGE_CACHE_SHIFT;
519     - offset = pos & (PAGE_CACHE_SIZE - 1);
520     - len = bh->b_size;
521     - data = bh->b_data;
522     - while (len > 0) {
523     - int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
524     - int transfer_result;
525     -
526     - size = PAGE_CACHE_SIZE - offset;
527     - if (size > len)
528     - size = len;
529     + struct buffer_head *bh;
530    
531     - page = grab_cache_page(mapping, index);
532     - if (!page)
533     - goto fail;
534     - kaddr = kmap(page);
535     - if (aops->prepare_write(file, page, offset, offset+size))
536     - goto unlock;
537     - flush_dcache_page(page);
538     - transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV);
539     - if (transfer_result) {
540     - /*
541     - * The transfer failed, but we still write the data to
542     - * keep prepare/commit calls balanced.
543     - */
544     - printk(KERN_ERR "loop: transfer error block %ld\n", index);
545     - memset(kaddr + offset, 0, size);
546     - }
547     - if (aops->commit_write(file, page, offset, offset+size))
548     - goto unlock;
549     - if (transfer_result)
550     - goto unlock;
551     - kunmap(page);
552     - data += size;
553     - len -= size;
554     - offset = 0;
555     - index++;
556     - pos += size;
557     - UnlockPage(page);
558     - page_cache_release(page);
559     + while ((bh = lo->lo_bh_free)) {
560     + __free_page(bh->b_page);
561     + lo->lo_bh_free = bh->b_reqnext;
562     + bh->b_reqnext = NULL;
563     + kmem_cache_free(bh_cachep, bh);
564     }
565     - up(&mapping->host->i_sem);
566     - return 0;
567     -
568     -unlock:
569     - kunmap(page);
570     - UnlockPage(page);
571     - page_cache_release(page);
572     -fail:
573     - up(&mapping->host->i_sem);
574     - return -1;
575     }
576    
577     -struct lo_read_data {
578     - struct loop_device *lo;
579     - char *data;
580     - int bsize;
581     -};
582     -
583     -static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
584     +static int loop_prealloc_init(struct loop_device *lo, int y)
585     {
586     - char *kaddr;
587     - unsigned long count = desc->count;
588     - struct lo_read_data *p = (struct lo_read_data*)desc->buf;
589     - struct loop_device *lo = p->lo;
590     - int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize;
591     + struct buffer_head *bh;
592     + int x;
593    
594     - if (size > count)
595     - size = count;
596     + if(!y) {
597     + y = lo_prealloc[0];
598     + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
599     + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
600     + y = lo_prealloc[x + 1];
601     + break;
602     + }
603     + }
604     + }
605     + lo->lo_bh_flsh = (y * 3) / 4;
606    
607     - kaddr = kmap(page);
608     - if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
609     - size = 0;
610     - printk(KERN_ERR "loop: transfer error block %ld\n",page->index);
611     - desc->error = -EINVAL;
612     + for (x = 0; x < y; x++) {
613     + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
614     + if (!bh) {
615     + loop_prealloc_cleanup(lo);
616     + return 1;
617     + }
618     + bh->b_page = alloc_page(GFP_KERNEL);
619     + if (!bh->b_page) {
620     + bh->b_reqnext = NULL;
621     + kmem_cache_free(bh_cachep, bh);
622     + loop_prealloc_cleanup(lo);
623     + return 1;
624     + }
625     + bh->b_reqnext = lo->lo_bh_free;
626     + lo->lo_bh_free = bh;
627     }
628     - kunmap(page);
629     -
630     - desc->count = count - size;
631     - desc->written += size;
632     - p->data += size;
633     - return size;
634     -}
635     -
636     -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
637     - loff_t pos)
638     -{
639     - struct lo_read_data cookie;
640     - read_descriptor_t desc;
641     - struct file *file;
642     -
643     - cookie.lo = lo;
644     - cookie.data = bh->b_data;
645     - cookie.bsize = bsize;
646     - desc.written = 0;
647     - desc.count = bh->b_size;
648     - desc.buf = (char*)&cookie;
649     - desc.error = 0;
650     - spin_lock_irq(&lo->lo_lock);
651     - file = lo->lo_backing_file;
652     - spin_unlock_irq(&lo->lo_lock);
653     - do_generic_file_read(file, &pos, &desc, lo_read_actor);
654     - return desc.error;
655     + return 0;
656     }
657    
658     -static inline int loop_get_bs(struct loop_device *lo)
659     +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
660     {
661     - int bs = 0;
662     + unsigned long flags;
663    
664     - if (blksize_size[MAJOR(lo->lo_device)])
665     - bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
666     - if (!bs)
667     - bs = BLOCK_SIZE;
668     + spin_lock_irqsave(&lo->lo_lock, flags);
669     + if (*q) {
670     + bh->b_reqnext = (*q)->b_reqnext;
671     + (*q)->b_reqnext = bh;
672     + } else {
673     + bh->b_reqnext = bh;
674     + }
675     + *q = bh;
676     + spin_unlock_irqrestore(&lo->lo_lock, flags);
677    
678     - return bs;
679     + if (waitqueue_active(&lo->lo_bh_wait))
680     + wake_up_interruptible(&lo->lo_bh_wait);
681     }
682    
683     -static inline unsigned long loop_get_iv(struct loop_device *lo,
684     - unsigned long sector)
685     +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
686     {
687     - int bs = loop_get_bs(lo);
688     - unsigned long offset, IV;
689     -
690     - IV = sector / (bs >> 9) + lo->lo_offset / bs;
691     - offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs;
692     - if (offset >= bs)
693     - IV++;
694     -
695     - return IV;
696     + spin_lock_irq(&lo->lo_lock);
697     + if (*q) {
698     + bh->b_reqnext = (*q)->b_reqnext;
699     + (*q)->b_reqnext = bh;
700     + } else {
701     + bh->b_reqnext = bh;
702     + *q = bh;
703     + }
704     + spin_unlock_irq(&lo->lo_lock);
705     }
706    
707     -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
708     +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr,
709     + que_look_up_table *qt)
710     {
711     - loff_t pos;
712     - int ret;
713     -
714     - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
715     -
716     - if (rw == WRITE)
717     - ret = lo_send(lo, bh, loop_get_bs(lo), pos);
718     - else
719     - ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
720     + struct buffer_head *bh = NULL, *last;
721    
722     - return ret;
723     -}
724     -
725     -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate);
726     -static void loop_put_buffer(struct buffer_head *bh)
727     -{
728     - /*
729     - * check b_end_io, may just be a remapped bh and not an allocated one
730     - */
731     - if (bh && bh->b_end_io == loop_end_io_transfer) {
732     - __free_page(bh->b_page);
733     - kmem_cache_free(bh_cachep, bh);
734     + spin_lock_irq(&lo->lo_lock);
735     + if ((last = *qt->q0)) {
736     + bh = last->b_reqnext;
737     + if (bh == last)
738     + *qt->q0 = NULL;
739     + else
740     + last->b_reqnext = bh->b_reqnext;
741     + bh->b_reqnext = NULL;
742     + *list_nr = qt->x0;
743     + } else if ((last = *qt->q1)) {
744     + bh = last->b_reqnext;
745     + if (bh == last)
746     + *qt->q1 = NULL;
747     + else
748     + last->b_reqnext = bh->b_reqnext;
749     + bh->b_reqnext = NULL;
750     + *list_nr = qt->x1;
751     + } else if ((last = *qt->q2)) {
752     + bh = last->b_reqnext;
753     + if (bh == last)
754     + *qt->q2 = NULL;
755     + else
756     + last->b_reqnext = bh->b_reqnext;
757     + bh->b_reqnext = NULL;
758     + *list_nr = qt->x2;
759     }
760     + spin_unlock_irq(&lo->lo_lock);
761     + return bh;
762     }
763    
764     -/*
765     - * Add buffer_head to back of pending list
766     - */
767     -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
768     +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b)
769     {
770     unsigned long flags;
771     + int wk;
772    
773     spin_lock_irqsave(&lo->lo_lock, flags);
774     - if (lo->lo_bhtail) {
775     - lo->lo_bhtail->b_reqnext = bh;
776     - lo->lo_bhtail = bh;
777     - } else
778     - lo->lo_bh = lo->lo_bhtail = bh;
779     + b->b_reqnext = lo->lo_bh_free;
780     + lo->lo_bh_free = b;
781     + wk = lo->lo_bh_need;
782     spin_unlock_irqrestore(&lo->lo_lock, flags);
783    
784     - up(&lo->lo_bh_mutex);
785     + if (wk && waitqueue_active(&lo->lo_bh_wait))
786     + wake_up_interruptible(&lo->lo_bh_wait);
787     }
788    
789     -/*
790     - * Grab first pending buffer
791     - */
792     -static struct buffer_head *loop_get_bh(struct loop_device *lo)
793     +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate)
794     {
795     - struct buffer_head *bh;
796     -
797     - spin_lock_irq(&lo->lo_lock);
798     - if ((bh = lo->lo_bh)) {
799     - if (bh == lo->lo_bhtail)
800     - lo->lo_bhtail = NULL;
801     - lo->lo_bh = bh->b_reqnext;
802     - bh->b_reqnext = NULL;
803     - }
804     - spin_unlock_irq(&lo->lo_lock);
805     + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
806     + struct buffer_head *rbh = bh->b_private;
807    
808     - return bh;
809     + rbh->b_reqnext = NULL;
810     + rbh->b_end_io(rbh, uptodate);
811     + loop_put_buffer(lo, bh);
812     + if (atomic_dec_and_test(&lo->lo_pending))
813     + wake_up_interruptible(&lo->lo_bh_wait);
814     }
815    
816     -/*
817     - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
818     - * and lo->transfer stuff has already been done. if not, it was a READ
819     - * so queue it for the loop thread and let it do the transfer out of
820     - * b_end_io context (we don't want to do decrypt of a page with irqs
821     - * disabled)
822     - */
823     -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
824     +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate)
825     {
826     struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
827    
828     - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
829     - struct buffer_head *rbh = bh->b_private;
830     -
831     - rbh->b_end_io(rbh, uptodate);
832     - if (atomic_dec_and_test(&lo->lo_pending))
833     - up(&lo->lo_bh_mutex);
834     - loop_put_buffer(bh);
835     - } else
836     - loop_add_bh(lo, bh);
837     + if (!uptodate)
838     + loop_end_io_transfer_wr(bh, uptodate);
839     + else
840     + loop_add_queue_last(lo, bh, &lo->lo_bh_que0);
841     }
842    
843     static struct buffer_head *loop_get_buffer(struct loop_device *lo,
844     - struct buffer_head *rbh)
845     + struct buffer_head *rbh, int from_thread, int rw)
846     {
847     struct buffer_head *bh;
848     + struct page *p;
849     + unsigned long flags;
850    
851     - /*
852     - * for xfer_funcs that can operate on the same bh, do that
853     - */
854     - if (lo->lo_flags & LO_FLAGS_BH_REMAP) {
855     - bh = rbh;
856     - goto out_bh;
857     + spin_lock_irqsave(&lo->lo_lock, flags);
858     + bh = lo->lo_bh_free;
859     + if (bh) {
860     + lo->lo_bh_free = bh->b_reqnext;
861     + if (from_thread)
862     + lo->lo_bh_need = 0;
863     + } else {
864     + if (from_thread)
865     + lo->lo_bh_need = 1;
866     }
867     + spin_unlock_irqrestore(&lo->lo_lock, flags);
868     + if (!bh)
869     + return (struct buffer_head *)0;
870    
871     - do {
872     - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
873     - if (bh)
874     - break;
875     -
876     - run_task_queue(&tq_disk);
877     - set_current_state(TASK_INTERRUPTIBLE);
878     - schedule_timeout(HZ);
879     - } while (1);
880     - memset(bh, 0, sizeof(*bh));
881     + p = bh->b_page;
882     + memset(bh, 0, sizeof(struct buffer_head));
883     + bh->b_page = p;
884    
885     + bh->b_private = rbh;
886     bh->b_size = rbh->b_size;
887     bh->b_dev = rbh->b_rdev;
888     + bh->b_rdev = lo->lo_device;
889     bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
890     + bh->b_data = page_address(bh->b_page);
891     + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd;
892     + bh->b_rsector = rbh->b_rsector + lo->lo_offs_sec;
893     + init_waitqueue_head(&bh->b_wait);
894     +
895     + return bh;
896     +}
897     +
898     +static int figure_loop_size(struct loop_device *lo)
899     +{
900     + loff_t size, offs;
901     + unsigned int x;
902     + int err = 0;
903     + kdev_t lodev = lo->lo_device;
904     +
905     + offs = lo->lo_offset;
906     + if (S_ISREG(lo->lo_backing_file->f_dentry->d_inode->i_mode)) {
907     + size = lo->lo_backing_file->f_dentry->d_inode->i_size;
908     + } else {
909     + offs &= ~((loff_t)511);
910     + if (blk_size[MAJOR(lodev)])
911     + size = (loff_t)(blk_size[MAJOR(lodev)][MINOR(lodev)]) << BLOCK_SIZE_BITS;
912     + else
913     + size = 1024*1024*1024; /* unknown size */
914     + }
915     + if ((offs > 0) && (offs < size)) {
916     + size -= offs;
917     + } else {
918     + if (offs)
919     + err = -EINVAL;
920     + lo->lo_offset = 0;
921     + lo->lo_offs_sec = lo->lo_iv_remove = 0;
922     + }
923     + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
924     + size = lo->lo_sizelimit;
925     + } else {
926     + if (lo->lo_sizelimit)
927     + err = -EINVAL;
928     + lo->lo_sizelimit = 0;
929     + }
930     + size >>= BLOCK_SIZE_BITS;
931    
932     /*
933     - * easy way out, although it does waste some memory for < PAGE_SIZE
934     - * blocks... if highmem bounce buffering can get away with it,
935     - * so can we :-)
936     + * Unfortunately, if we want to do I/O on the device,
937     + * the number of 1024-byte blocks has to fit into unsigned int
938     */
939     - do {
940     - bh->b_page = alloc_page(GFP_NOIO);
941     - if (bh->b_page)
942     - break;
943     + x = (unsigned int)size;
944     + if ((loff_t)x != size) {
945     + err = -EFBIG;
946     + size = 0;
947     + }
948    
949     - run_task_queue(&tq_disk);
950     - set_current_state(TASK_INTERRUPTIBLE);
951     - schedule_timeout(HZ);
952     - } while (1);
953     + loop_sizes[lo->lo_number] = size;
954     + return err;
955     +}
956    
957     - bh->b_data = page_address(bh->b_page);
958     - bh->b_end_io = loop_end_io_transfer;
959     - bh->b_private = rbh;
960     - init_waitqueue_head(&bh->b_wait);
961     +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
962     +{
963     + mm_segment_t fs;
964     + int x, y, z;
965    
966     -out_bh:
967     - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
968     - spin_lock_irq(&lo->lo_lock);
969     - bh->b_rdev = lo->lo_device;
970     - spin_unlock_irq(&lo->lo_lock);
971     + y = 0;
972     + do {
973     + z = size - y;
974     + fs = get_fs();
975     + set_fs(get_ds());
976     + if (w) {
977     + x = file->f_op->write(file, buf + y, z, ppos);
978     + set_fs(fs);
979     + } else {
980     + x = file->f_op->read(file, buf + y, z, ppos);
981     + set_fs(fs);
982     + if (!x)
983     + return 1;
984     + }
985     + if (x < 0) {
986     + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
987     + run_task_queue(&tq_disk);
988     + set_current_state(TASK_INTERRUPTIBLE);
989     + schedule_timeout(HZ / 2);
990     + continue;
991     + }
992     + return 1;
993     + }
994     + y += x;
995     + } while (y < size);
996     + return 0;
997     +}
998    
999     - return bh;
1000     +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
1001     +{
1002     + loff_t pos;
1003     + struct file *file = lo->lo_backing_file;
1004     + char *data, *buf;
1005     + unsigned int size, len;
1006     + unsigned long IV;
1007     +
1008     + pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
1009     + buf = page_address(lo->lo_bh_free->b_page);
1010     + len = bh->b_size;
1011     + data = bh_kmap(bh);
1012     + IV = bh->b_rsector;
1013     + if (!lo->lo_iv_remove)
1014     + IV += lo->lo_offs_sec;
1015     + while (len > 0) {
1016     + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
1017     + /* this code relies that NONE transfer is a no-op */
1018     + buf = data;
1019     + }
1020     + size = PAGE_SIZE;
1021     + if (size > len)
1022     + size = len;
1023     + if (rw == WRITE) {
1024     + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
1025     + printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV);
1026     + goto kunmap_and_out;
1027     + }
1028     + if (loop_file_io(file, buf, size, &pos, 1)) {
1029     + printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV);
1030     + goto kunmap_and_out;
1031     + }
1032     + } else {
1033     + if (loop_file_io(file, buf, size, &pos, 0)) {
1034     + printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV);
1035     + goto kunmap_and_out;
1036     + }
1037     + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
1038     + printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV);
1039     + goto kunmap_and_out;
1040     + }
1041     + }
1042     + data += size;
1043     + len -= size;
1044     + IV += size >> 9;
1045     + }
1046     + bh_kunmap(bh);
1047     + return 0;
1048     +
1049     +kunmap_and_out:
1050     + bh_kunmap(bh);
1051     + return 1;
1052     }
1053    
1054     static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
1055     {
1056     - struct buffer_head *bh = NULL;
1057     + struct buffer_head *bh;
1058     struct loop_device *lo;
1059     - unsigned long IV;
1060     + char *md;
1061    
1062     + set_current_state(TASK_RUNNING);
1063     if (!buffer_locked(rbh))
1064     BUG();
1065    
1066     @@ -483,45 +866,55 @@
1067     } else if (rw == READA) {
1068     rw = READ;
1069     } else if (rw != READ) {
1070     - printk(KERN_ERR "loop: unknown command (%d)\n", rw);
1071     + printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw);
1072     goto err;
1073     }
1074    
1075     - rbh = blk_queue_bounce(q, rw, rbh);
1076     -
1077     /*
1078     * file backed, queue for loop_thread to handle
1079     */
1080     if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1081     - /*
1082     - * rbh locked at this point, noone else should clear
1083     - * the dirty flag
1084     - */
1085     - if (rw == WRITE)
1086     - set_bit(BH_Dirty, &rbh->b_state);
1087     - loop_add_bh(lo, rbh);
1088     + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que0);
1089     return 0;
1090     }
1091    
1092     /*
1093     - * piggy old buffer on original, and submit for I/O
1094     + * device backed, just remap rdev & rsector for NONE transfer
1095     */
1096     - bh = loop_get_buffer(lo, rbh);
1097     - IV = loop_get_iv(lo, rbh->b_rsector);
1098     + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
1099     + rbh->b_rsector += lo->lo_offs_sec;
1100     + rbh->b_rdev = lo->lo_device;
1101     + generic_make_request(rw, rbh);
1102     + if (atomic_dec_and_test(&lo->lo_pending))
1103     + wake_up_interruptible(&lo->lo_bh_wait);
1104     + return 0;
1105     + }
1106     +
1107     + /*
1108     + * device backed, start reads and writes now if buffer available
1109     + */
1110     + bh = loop_get_buffer(lo, rbh, 0, rw);
1111     + if (!bh) {
1112     + /* just queue request and let thread handle alloc later */
1113     + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
1114     + return 0;
1115     + }
1116     if (rw == WRITE) {
1117     - set_bit(BH_Dirty, &bh->b_state);
1118     - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data,
1119     - bh->b_size, IV))
1120     + int trv;
1121     + md = bh_kmap(rbh);
1122     + trv = lo_do_transfer(lo, WRITE, bh->b_data, md, bh->b_size, bh->b_rsector - lo->lo_iv_remove);
1123     + bh_kunmap(rbh);
1124     + if (trv) {
1125     + loop_put_buffer(lo, bh);
1126     goto err;
1127     + }
1128     }
1129     -
1130     generic_make_request(rw, bh);
1131     return 0;
1132    
1133     err:
1134     if (atomic_dec_and_test(&lo->lo_pending))
1135     - up(&lo->lo_bh_mutex);
1136     - loop_put_buffer(bh);
1137     + wake_up_interruptible(&lo->lo_bh_wait);
1138     out:
1139     buffer_IO_error(rbh);
1140     return 0;
1141     @@ -530,30 +923,6 @@
1142     goto out;
1143     }
1144    
1145     -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
1146     -{
1147     - int ret;
1148     -
1149     - /*
1150     - * For block backed loop, we know this is a READ
1151     - */
1152     - if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1153     - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
1154     -
1155     - ret = do_bh_filebacked(lo, bh, rw);
1156     - bh->b_end_io(bh, !ret);
1157     - } else {
1158     - struct buffer_head *rbh = bh->b_private;
1159     - unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
1160     -
1161     - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
1162     - bh->b_size, IV);
1163     -
1164     - rbh->b_end_io(rbh, !ret);
1165     - loop_put_buffer(bh);
1166     - }
1167     -}
1168     -
1169     /*
1170     * worker thread that handles reads/writes to file backed loop devices,
1171     * to avoid blocking in our make_request_fn. it also does loop decrypting
1172     @@ -563,8 +932,20 @@
1173     static int loop_thread(void *data)
1174     {
1175     struct loop_device *lo = data;
1176     - struct buffer_head *bh;
1177     + struct buffer_head *bh, *xbh;
1178     + int x, rw, qi = 0, flushcnt = 0;
1179     + wait_queue_t waitq;
1180     + que_look_up_table qt[4] = {
1181     + { &lo->lo_bh_que0, &lo->lo_bh_que1, &lo->lo_bh_que2, 0, 1, 2 },
1182     + { &lo->lo_bh_que2, &lo->lo_bh_que0, &lo->lo_bh_que1, 2, 0, 1 },
1183     + { &lo->lo_bh_que0, &lo->lo_bh_que2, &lo->lo_bh_que1, 0, 2, 1 },
1184     + { &lo->lo_bh_que1, &lo->lo_bh_que0, &lo->lo_bh_que2, 1, 0, 2 }
1185     + };
1186     + char *md;
1187     + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
1188    
1189     + init_waitqueue_entry(&waitq, current);
1190     + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
1191     daemonize();
1192     exit_files(current);
1193     reparent_to_init();
1194     @@ -576,6 +957,19 @@
1195     flush_signals(current);
1196     spin_unlock_irq(&current->sigmask_lock);
1197    
1198     + if (lo_nice > 0)
1199     + lo_nice = 0;
1200     + if (lo_nice < -20)
1201     + lo_nice = -20;
1202     +#if defined(DEF_NICE) && defined(DEF_COUNTER)
1203     + /* old scheduler syntax */
1204     + current->policy = SCHED_OTHER;
1205     + current->nice = lo_nice;
1206     +#else
1207     + /* O(1) scheduler syntax */
1208     + set_user_nice(current, lo_nice);
1209     +#endif
1210     +
1211     spin_lock_irq(&lo->lo_lock);
1212     lo->lo_state = Lo_bound;
1213     atomic_inc(&lo->lo_pending);
1214     @@ -589,23 +983,110 @@
1215     up(&lo->lo_sem);
1216    
1217     for (;;) {
1218     - down_interruptible(&lo->lo_bh_mutex);
1219     + add_wait_queue(&lo->lo_bh_wait, &waitq);
1220     + for (;;) {
1221     + set_current_state(TASK_INTERRUPTIBLE);
1222     + if (!atomic_read(&lo->lo_pending))
1223     + break;
1224     +
1225     + x = 0;
1226     + spin_lock_irq(&lo->lo_lock);
1227     + if (lo->lo_bh_que0) {
1228     + x = 1;
1229     + } else if (lo->lo_bh_que1 || lo->lo_bh_que2) {
1230     + /* file backed works too because lo->lo_bh_need == 0 */
1231     + if (lo->lo_bh_free || !lo->lo_bh_need)
1232     + x = 1;
1233     + }
1234     + spin_unlock_irq(&lo->lo_lock);
1235     + if (x)
1236     + break;
1237     +
1238     + schedule();
1239     + }
1240     + set_current_state(TASK_RUNNING);
1241     + remove_wait_queue(&lo->lo_bh_wait, &waitq);
1242     +
1243     /*
1244     - * could be upped because of tear-down, not because of
1245     + * could be woken because of tear-down, not because of
1246     * pending work
1247     */
1248     if (!atomic_read(&lo->lo_pending))
1249     break;
1250    
1251     - bh = loop_get_bh(lo);
1252     - if (!bh) {
1253     - printk("loop: missing bh\n");
1254     + /*
1255     + * read queues using alternating order to prevent starvation
1256     + */
1257     + bh = loop_get_bh(lo, &x, &qt[++qi & 3]);
1258     + if (!bh)
1259     + continue;
1260     +
1261     + /*
1262     + * x list tag usage(buffer-allocated)
1263     + * --- -------------- -----------------------
1264     + * 0 lo->lo_bh_que0 dev-read(y) / file-read
1265     + * 1 lo->lo_bh_que1 dev-write(n) / file-write
1266     + * 2 lo->lo_bh_que2 dev-read(n)
1267     + */
1268     + rw = (x == 1) ? WRITE : READ;
1269     + if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) {
1270     + /* loop_make_request didn't allocate a buffer, do that now */
1271     + xbh = loop_get_buffer(lo, bh, 1, rw);
1272     + if (!xbh) {
1273     + run_task_queue(&tq_disk);
1274     + flushcnt = 0;
1275     + loop_add_queue_first(lo, bh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
1276     + /* lo->lo_bh_need should be 1 now, go back to sleep */
1277     + continue;
1278     + }
1279     + if (rw == WRITE) {
1280     + int trv;
1281     + md = bh_kmap(bh);
1282     + trv = lo_do_transfer(lo, WRITE, xbh->b_data, md, xbh->b_size, xbh->b_rsector - lo->lo_iv_remove);
1283     + bh_kunmap(bh);
1284     + if (trv) {
1285     + loop_put_buffer(lo, xbh);
1286     + buffer_IO_error(bh);
1287     + atomic_dec(&lo->lo_pending);
1288     + continue;
1289     + }
1290     + }
1291     + generic_make_request(rw, xbh);
1292     +
1293     + /* start I/O if there are no more requests lacking buffers */
1294     + x = 0;
1295     + spin_lock_irq(&lo->lo_lock);
1296     + if (!lo->lo_bh_que1 && !lo->lo_bh_que2)
1297     + x = 1;
1298     + spin_unlock_irq(&lo->lo_lock);
1299     + if (x || (++flushcnt >= lo->lo_bh_flsh)) {
1300     + run_task_queue(&tq_disk);
1301     + flushcnt = 0;
1302     + }
1303     +
1304     + /* request not completely processed yet */
1305     continue;
1306     }
1307     - loop_handle_bh(lo, bh);
1308     + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1309     + /* request is for file backed device */
1310     + x = do_bh_filebacked(lo, bh, rw);
1311     + bh->b_reqnext = NULL;
1312     + bh->b_end_io(bh, !x);
1313     + } else {
1314     + /* device backed read has completed, do decrypt now */
1315     + xbh = bh->b_private;
1316     + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */
1317     + /* instead, recompute IV from original request */
1318     + md = bh_kmap(xbh);
1319     + x = lo_do_transfer(lo, READ, bh->b_data, md, bh->b_size, xbh->b_rsector + lo->lo_offs_sec - lo->lo_iv_remove);
1320     + bh_kunmap(xbh);
1321     + xbh->b_reqnext = NULL;
1322     + xbh->b_end_io(xbh, !x);
1323     + loop_put_buffer(lo, bh);
1324     + }
1325    
1326     /*
1327     - * upped both for pending work and tear-down, lo_pending
1328     + * woken both for pending work and tear-down, lo_pending
1329     * will hit zero then
1330     */
1331     if (atomic_dec_and_test(&lo->lo_pending))
1332     @@ -616,15 +1097,34 @@
1333     return 0;
1334     }
1335    
1336     +static void loop_set_softblksz(struct loop_device *lo, kdev_t dev)
1337     +{
1338     + int bs = 0, x;
1339     +
1340     + if (blksize_size[MAJOR(lo->lo_device)])
1341     + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
1342     + if (!bs)
1343     + bs = BLOCK_SIZE;
1344     + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1345     + x = loop_sizes[lo->lo_number];
1346     + if ((bs == 8192) && (x & 7))
1347     + bs = 4096;
1348     + if ((bs == 4096) && (x & 3))
1349     + bs = 2048;
1350     + if ((bs == 2048) && (x & 1))
1351     + bs = 1024;
1352     + }
1353     + set_blocksize(dev, bs);
1354     +}
1355     +
1356     static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev,
1357     unsigned int arg)
1358     {
1359     struct file *file;
1360     struct inode *inode;
1361     kdev_t lo_device;
1362     - int lo_flags = 0;
1363     + int lo_flags = 0, hardsz = 512;
1364     int error;
1365     - int bs;
1366    
1367     MOD_INC_USE_COUNT;
1368    
1369     @@ -643,33 +1143,46 @@
1370     if (!(file->f_mode & FMODE_WRITE))
1371     lo_flags |= LO_FLAGS_READ_ONLY;
1372    
1373     + lo->lo_offset = lo->lo_sizelimit = 0;
1374     + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1375     + lo->lo_bh_free = lo->lo_bh_que2 = lo->lo_bh_que1 = lo->lo_bh_que0 = NULL;
1376     + lo->lo_bh_need = lo->lo_bh_flsh = 0;
1377     + init_waitqueue_head(&lo->lo_bh_wait);
1378     if (S_ISBLK(inode->i_mode)) {
1379     lo_device = inode->i_rdev;
1380     if (lo_device == dev) {
1381     error = -EBUSY;
1382     goto out_putf;
1383     }
1384     + if (loop_prealloc_init(lo, 0)) {
1385     + error = -ENOMEM;
1386     + goto out_putf;
1387     + }
1388     + hardsz = get_hardsect_size(lo_device);
1389     } else if (S_ISREG(inode->i_mode)) {
1390     - struct address_space_operations *aops = inode->i_mapping->a_ops;
1391     /*
1392     * If we can't read - sorry. If we only can't write - well,
1393     * it's going to be read-only.
1394     */
1395     - if (!aops->readpage)
1396     + if (!file->f_op || !file->f_op->read)
1397     goto out_putf;
1398    
1399     - if (!aops->prepare_write || !aops->commit_write)
1400     + if (!file->f_op->write)
1401     lo_flags |= LO_FLAGS_READ_ONLY;
1402    
1403     lo_device = inode->i_dev;
1404     lo_flags |= LO_FLAGS_DO_BMAP;
1405     + if (loop_prealloc_init(lo, 1)) {
1406     + error = -ENOMEM;
1407     + goto out_putf;
1408     + }
1409     error = 0;
1410     } else
1411     goto out_putf;
1412    
1413     get_file(file);
1414    
1415     - if (IS_RDONLY (inode) || is_read_only(lo_device)
1416     + if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device)
1417     || !(lo_file->f_mode & FMODE_WRITE))
1418     lo_flags |= LO_FLAGS_READ_ONLY;
1419    
1420     @@ -677,28 +1190,40 @@
1421    
1422     lo->lo_device = lo_device;
1423     lo->lo_flags = lo_flags;
1424     + if(lo_flags & LO_FLAGS_READ_ONLY)
1425     + lo->lo_flags |= 0x200000; /* export to user space */
1426     lo->lo_backing_file = file;
1427     lo->transfer = NULL;
1428     lo->ioctl = NULL;
1429     - figure_loop_size(lo);
1430     - lo->old_gfp_mask = inode->i_mapping->gfp_mask;
1431     - inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);
1432     -
1433     - bs = 0;
1434     - if (blksize_size[MAJOR(lo_device)])
1435     - bs = blksize_size[MAJOR(lo_device)][MINOR(lo_device)];
1436     - if (!bs)
1437     - bs = BLOCK_SIZE;
1438     + if (figure_loop_size(lo)) {
1439     + error = -EFBIG;
1440     + goto out_cleanup;
1441     + }
1442    
1443     - set_blocksize(dev, bs);
1444     + if (lo_flags & LO_FLAGS_DO_BMAP) {
1445     + lo->old_gfp_mask = inode->i_mapping->gfp_mask;
1446     + inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);
1447     + inode->i_mapping->gfp_mask |= __GFP_HIGH;
1448     + } else {
1449     + lo->old_gfp_mask = -1;
1450     + }
1451    
1452     - lo->lo_bh = lo->lo_bhtail = NULL;
1453     - kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
1454     - down(&lo->lo_sem);
1455     + loop_hardsizes[MINOR(dev)] = hardsz;
1456     + loop_set_softblksz(lo, dev);
1457    
1458     + error = kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
1459     + if(error < 0)
1460     + goto out_mapping;
1461     + down(&lo->lo_sem);
1462     fput(file);
1463     return 0;
1464    
1465     + out_mapping:
1466     + if(lo->old_gfp_mask != -1)
1467     + inode->i_mapping->gfp_mask = lo->old_gfp_mask;
1468     + out_cleanup:
1469     + loop_prealloc_cleanup(lo);
1470     + fput(file);
1471     out_putf:
1472     fput(file);
1473     out:
1474     @@ -711,6 +1236,7 @@
1475     int err = 0;
1476     if (lo->lo_encrypt_type) {
1477     struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type];
1478     + lo->transfer = NULL;
1479     if (xfer && xfer->release)
1480     err = xfer->release(lo);
1481     if (xfer && xfer->unlock)
1482     @@ -751,11 +1277,12 @@
1483     spin_lock_irq(&lo->lo_lock);
1484     lo->lo_state = Lo_rundown;
1485     if (atomic_dec_and_test(&lo->lo_pending))
1486     - up(&lo->lo_bh_mutex);
1487     + wake_up_interruptible(&lo->lo_bh_wait);
1488     spin_unlock_irq(&lo->lo_lock);
1489    
1490     down(&lo->lo_sem);
1491    
1492     + loop_prealloc_cleanup(lo);
1493     lo->lo_backing_file = NULL;
1494    
1495     loop_release_xfer(lo);
1496     @@ -763,23 +1290,77 @@
1497     lo->ioctl = NULL;
1498     lo->lo_device = 0;
1499     lo->lo_encrypt_type = 0;
1500     - lo->lo_offset = 0;
1501     + lo->lo_offset = lo->lo_sizelimit = 0;
1502     + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1503     lo->lo_encrypt_key_size = 0;
1504     lo->lo_flags = 0;
1505     memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1506     memset(lo->lo_name, 0, LO_NAME_SIZE);
1507     + memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1508     loop_sizes[lo->lo_number] = 0;
1509     invalidate_bdev(bdev, 0);
1510     - filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
1511     + if (gfp != -1)
1512     + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
1513     lo->lo_state = Lo_unbound;
1514     fput(filp);
1515     MOD_DEC_USE_COUNT;
1516     return 0;
1517     }
1518    
1519     -static int loop_set_status(struct loop_device *lo, struct loop_info *arg)
1520     +static void
1521     +loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1522     +{
1523     + memset(info64, 0, sizeof(*info64));
1524     + info64->lo_number = info->lo_number;
1525     + info64->lo_device = info->lo_device;
1526     + info64->lo_inode = info->lo_inode;
1527     + info64->lo_rdevice = info->lo_rdevice;
1528     + info64->lo_offset = info->lo_offset;
1529     + info64->lo_encrypt_type = info->lo_encrypt_type;
1530     + info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1531     + info64->lo_flags = info->lo_flags;
1532     + info64->lo_init[0] = info->lo_init[0];
1533     + info64->lo_init[1] = info->lo_init[1];
1534     + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
1535     + memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1536     + else
1537     + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1538     + memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1539     +}
1540     +
1541     +static int
1542     +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
1543     +{
1544     + memset(info, 0, sizeof(*info));
1545     + info->lo_number = info64->lo_number;
1546     + info->lo_device = info64->lo_device;
1547     + info->lo_inode = info64->lo_inode;
1548     + info->lo_rdevice = info64->lo_rdevice;
1549     + info->lo_offset = info64->lo_offset;
1550     + info->lo_encrypt_type = info64->lo_encrypt_type;
1551     + info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1552     + info->lo_flags = info64->lo_flags;
1553     + info->lo_init[0] = info64->lo_init[0];
1554     + info->lo_init[1] = info64->lo_init[1];
1555     + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
1556     + memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1557     + else
1558     + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1559     + memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1560     +
1561     + /* error in case values were truncated */
1562     + if (info->lo_device != info64->lo_device ||
1563     + info->lo_rdevice != info64->lo_rdevice ||
1564     + info->lo_inode != info64->lo_inode ||
1565     + info->lo_offset != info64->lo_offset ||
1566     + info64->lo_sizelimit)
1567     + return -EOVERFLOW;
1568     +
1569     + return 0;
1570     +}
1571     +
1572     +static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loop_info64 *info, struct loop_info *oldinfo)
1573     {
1574     - struct loop_info info;
1575     int err;
1576     unsigned int type;
1577    
1578     @@ -788,62 +1369,137 @@
1579     return -EPERM;
1580     if (lo->lo_state != Lo_bound)
1581     return -ENXIO;
1582     - if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1583     - return -EFAULT;
1584     - if ((unsigned int) info.lo_encrypt_key_size > LO_KEY_SIZE)
1585     + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1586     return -EINVAL;
1587     - type = info.lo_encrypt_type;
1588     + type = info->lo_encrypt_type;
1589     if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL)
1590     return -EINVAL;
1591     - if (type == LO_CRYPT_XOR && info.lo_encrypt_key_size == 0)
1592     + if (type == LO_CRYPT_XOR && info->lo_encrypt_key_size == 0)
1593     return -EINVAL;
1594     err = loop_release_xfer(lo);
1595     - if (!err)
1596     - err = loop_init_xfer(lo, type, &info);
1597     if (err)
1598     return err;
1599    
1600     - lo->lo_offset = info.lo_offset;
1601     - strncpy(lo->lo_name, info.lo_name, LO_NAME_SIZE);
1602     + if ((loff_t)info->lo_offset < 0) {
1603     + /* negative offset == remove offset from IV computations */
1604     + lo->lo_offset = -(info->lo_offset);
1605     + lo->lo_iv_remove = lo->lo_offset >> 9;
1606     + } else {
1607     + /* positive offset == include offset in IV computations */
1608     + lo->lo_offset = info->lo_offset;
1609     + lo->lo_iv_remove = 0;
1610     + }
1611     + lo->lo_offs_sec = lo->lo_offset >> 9;
1612     + lo->lo_sizelimit = info->lo_sizelimit;
1613     + err = figure_loop_size(lo);
1614     + if (err)
1615     + return err;
1616     + loop_set_softblksz(lo, dev);
1617     +
1618     + /* transfer init function for 2.4 kernels takes old style struct */
1619     + err = loop_init_xfer(lo, type, oldinfo);
1620     + /* copy key -- just in case transfer init func modified it */
1621     + memcpy(info->lo_encrypt_key, oldinfo->lo_encrypt_key, sizeof(info->lo_encrypt_key));
1622     + if (err)
1623     + return err;
1624    
1625     + strncpy(lo->lo_name, info->lo_file_name, LO_NAME_SIZE);
1626     + strncpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1627     lo->transfer = xfer_funcs[type]->transfer;
1628     lo->ioctl = xfer_funcs[type]->ioctl;
1629     - lo->lo_encrypt_key_size = info.lo_encrypt_key_size;
1630     - lo->lo_init[0] = info.lo_init[0];
1631     - lo->lo_init[1] = info.lo_init[1];
1632     - if (info.lo_encrypt_key_size) {
1633     - memcpy(lo->lo_encrypt_key, info.lo_encrypt_key,
1634     - info.lo_encrypt_key_size);
1635     + lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1636     + lo->lo_init[0] = info->lo_init[0];
1637     + lo->lo_init[1] = info->lo_init[1];
1638     + if (info->lo_encrypt_key_size) {
1639     + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1640     + info->lo_encrypt_key_size);
1641     lo->lo_key_owner = current->uid;
1642     - }
1643     - figure_loop_size(lo);
1644     + }
1645     +
1646     return 0;
1647     }
1648    
1649     -static int loop_get_status(struct loop_device *lo, struct loop_info *arg)
1650     +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1651     {
1652     - struct loop_info info;
1653     struct file *file = lo->lo_backing_file;
1654    
1655     if (lo->lo_state != Lo_bound)
1656     return -ENXIO;
1657     - if (!arg)
1658     - return -EINVAL;
1659     - memset(&info, 0, sizeof(info));
1660     - info.lo_number = lo->lo_number;
1661     - info.lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
1662     - info.lo_inode = file->f_dentry->d_inode->i_ino;
1663     - info.lo_rdevice = kdev_t_to_nr(lo->lo_device);
1664     - info.lo_offset = lo->lo_offset;
1665     - info.lo_flags = lo->lo_flags;
1666     - strncpy(info.lo_name, lo->lo_name, LO_NAME_SIZE);
1667     - info.lo_encrypt_type = lo->lo_encrypt_type;
1668     + memset(info, 0, sizeof(*info));
1669     + info->lo_number = lo->lo_number;
1670     + info->lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
1671     + info->lo_inode = file->f_dentry->d_inode->i_ino;
1672     + info->lo_rdevice = kdev_t_to_nr(lo->lo_device);
1673     + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
1674     + info->lo_sizelimit = lo->lo_sizelimit;
1675     + info->lo_flags = lo->lo_flags;
1676     + strncpy(info->lo_file_name, lo->lo_name, LO_NAME_SIZE);
1677     + strncpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1678     + info->lo_encrypt_type = lo->lo_encrypt_type;
1679     if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1680     - info.lo_encrypt_key_size = lo->lo_encrypt_key_size;
1681     - memcpy(info.lo_encrypt_key, lo->lo_encrypt_key,
1682     + info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1683     + memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1684     lo->lo_encrypt_key_size);
1685     + info->lo_init[0] = lo->lo_init[0];
1686     + info->lo_init[1] = lo->lo_init[1];
1687     }
1688     - return copy_to_user(arg, &info, sizeof(info)) ? -EFAULT : 0;
1689     + return 0;
1690     +}
1691     +
1692     +static int
1693     +loop_set_status_n(struct loop_device *lo, kdev_t dev, void *arg, int n)
1694     +{
1695     + struct loop_info info;
1696     + struct loop_info64 info64;
1697     + int err;
1698     +
1699     + if (n) {
1700     + if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1701     + return -EFAULT;
1702     + /* truncation errors can be ignored here as transfer init func only wants key bits */
1703     + loop_info64_to_old(&info64, &info);
1704     + } else {
1705     + if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1706     + return -EFAULT;
1707     + loop_info64_from_old(&info, &info64);
1708     + }
1709     + err = loop_set_status(lo, dev, &info64, &info);
1710     + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
1711     + memset(&info64.lo_encrypt_key[0], 0, sizeof(info64.lo_encrypt_key));
1712     + return err;
1713     +}
1714     +
1715     +static int
1716     +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) {
1717     + struct loop_info info;
1718     + struct loop_info64 info64;
1719     + int err = 0;
1720     +
1721     + if (!arg)
1722     + err = -EINVAL;
1723     + if (!err)
1724     + err = loop_get_status(lo, &info64);
1725     + if (!err)
1726     + err = loop_info64_to_old(&info64, &info);
1727     + if (!err && copy_to_user(arg, &info, sizeof(info)))
1728     + err = -EFAULT;
1729     +
1730     + return err;
1731     +}
1732     +
1733     +static int
1734     +loop_get_status64(struct loop_device *lo, struct loop_info64 *arg) {
1735     + struct loop_info64 info64;
1736     + int err = 0;
1737     +
1738     + if (!arg)
1739     + err = -EINVAL;
1740     + if (!err)
1741     + err = loop_get_status(lo, &info64);
1742     + if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1743     + err = -EFAULT;
1744     +
1745     + return err;
1746     }
1747    
1748     static int lo_ioctl(struct inode * inode, struct file * file,
1749     @@ -872,10 +1528,16 @@
1750     err = loop_clr_fd(lo, inode->i_bdev);
1751     break;
1752     case LOOP_SET_STATUS:
1753     - err = loop_set_status(lo, (struct loop_info *) arg);
1754     + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 0);
1755     break;
1756     case LOOP_GET_STATUS:
1757     - err = loop_get_status(lo, (struct loop_info *) arg);
1758     + err = loop_get_status_old(lo, (struct loop_info *) arg);
1759     + break;
1760     + case LOOP_SET_STATUS64:
1761     + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 1);
1762     + break;
1763     + case LOOP_GET_STATUS64:
1764     + err = loop_get_status64(lo, (struct loop_info64 *) arg);
1765     break;
1766     case BLKGETSIZE:
1767     if (lo->lo_state != Lo_bound) {
1768     @@ -894,6 +1556,8 @@
1769     case BLKBSZGET:
1770     case BLKBSZSET:
1771     case BLKSSZGET:
1772     + case BLKROGET:
1773     + case BLKROSET:
1774     err = blk_ioctl(inode->i_rdev, cmd, arg);
1775     break;
1776     default:
1777     @@ -906,7 +1570,7 @@
1778     static int lo_open(struct inode *inode, struct file *file)
1779     {
1780     struct loop_device *lo;
1781     - int dev, type;
1782     + int dev;
1783    
1784     if (!inode)
1785     return -EINVAL;
1786     @@ -921,10 +1585,6 @@
1787     lo = &loop_dev[dev];
1788     MOD_INC_USE_COUNT;
1789     down(&lo->lo_ctl_mutex);
1790     -
1791     - type = lo->lo_encrypt_type;
1792     - if (type && xfer_funcs[type] && xfer_funcs[type]->lock)
1793     - xfer_funcs[type]->lock(lo);
1794     lo->lo_refcnt++;
1795     up(&lo->lo_ctl_mutex);
1796     return 0;
1797     @@ -933,7 +1593,7 @@
1798     static int lo_release(struct inode *inode, struct file *file)
1799     {
1800     struct loop_device *lo;
1801     - int dev, type;
1802     + int dev;
1803    
1804     if (!inode)
1805     return 0;
1806     @@ -948,11 +1608,7 @@
1807    
1808     lo = &loop_dev[dev];
1809     down(&lo->lo_ctl_mutex);
1810     - type = lo->lo_encrypt_type;
1811     --lo->lo_refcnt;
1812     - if (xfer_funcs[type] && xfer_funcs[type]->unlock)
1813     - xfer_funcs[type]->unlock(lo);
1814     -
1815     up(&lo->lo_ctl_mutex);
1816     MOD_DEC_USE_COUNT;
1817     return 0;
1818     @@ -974,7 +1630,7 @@
1819    
1820     int loop_register_transfer(struct loop_func_table *funcs)
1821     {
1822     - if ((unsigned)funcs->number > MAX_LO_CRYPT || xfer_funcs[funcs->number])
1823     + if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number])
1824     return -EINVAL;
1825     xfer_funcs[funcs->number] = funcs;
1826     return 0;
1827     @@ -989,9 +1645,7 @@
1828     for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
1829     int type = lo->lo_encrypt_type;
1830     if (type == number) {
1831     - xfer_funcs[type]->release(lo);
1832     - lo->transfer = NULL;
1833     - lo->lo_encrypt_type = 0;
1834     + loop_release_xfer(lo);
1835     }
1836     }
1837     xfer_funcs[number] = NULL;
1838     @@ -1017,10 +1671,9 @@
1839     return -EIO;
1840     }
1841    
1842     -
1843     loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
1844     if (!loop_dev)
1845     - return -ENOMEM;
1846     + goto out_dev;
1847    
1848     loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
1849     if (!loop_sizes)
1850     @@ -1030,6 +1683,10 @@
1851     if (!loop_blksizes)
1852     goto out_blksizes;
1853    
1854     + loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
1855     + if (!loop_hardsizes)
1856     + goto out_hardsizes;
1857     +
1858     blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
1859    
1860     for (i = 0; i < max_loop; i++) {
1861     @@ -1037,18 +1694,28 @@
1862     memset(lo, 0, sizeof(struct loop_device));
1863     init_MUTEX(&lo->lo_ctl_mutex);
1864     init_MUTEX_LOCKED(&lo->lo_sem);
1865     - init_MUTEX_LOCKED(&lo->lo_bh_mutex);
1866     lo->lo_number = i;
1867     spin_lock_init(&lo->lo_lock);
1868     }
1869    
1870     memset(loop_sizes, 0, max_loop * sizeof(int));
1871     memset(loop_blksizes, 0, max_loop * sizeof(int));
1872     + memset(loop_hardsizes, 0, max_loop * sizeof(int));
1873     blk_size[MAJOR_NR] = loop_sizes;
1874     blksize_size[MAJOR_NR] = loop_blksizes;
1875     + hardsect_size[MAJOR_NR] = loop_hardsizes;
1876     for (i = 0; i < max_loop; i++)
1877     register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
1878    
1879     + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
1880     + if (!lo_prealloc[i])
1881     + continue;
1882     + if (lo_prealloc[i] < LO_PREALLOC_MIN)
1883     + lo_prealloc[i] = LO_PREALLOC_MIN;
1884     + if (lo_prealloc[i] > LO_PREALLOC_MAX)
1885     + lo_prealloc[i] = LO_PREALLOC_MAX;
1886     + }
1887     +
1888     devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
1889     devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
1890     MAJOR_NR, 0,
1891     @@ -1058,10 +1725,13 @@
1892     printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
1893     return 0;
1894    
1895     +out_hardsizes:
1896     + kfree(loop_blksizes);
1897     out_blksizes:
1898     kfree(loop_sizes);
1899     out_sizes:
1900     kfree(loop_dev);
1901     +out_dev:
1902     if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
1903     printk(KERN_WARNING "loop: cannot unregister blkdev\n");
1904     printk(KERN_ERR "loop: ran out of memory\n");
1905     @@ -1073,9 +1743,14 @@
1906     devfs_unregister(devfs_handle);
1907     if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
1908     printk(KERN_WARNING "loop: cannot unregister blkdev\n");
1909     +
1910     + blk_size[MAJOR_NR] = 0;
1911     + blksize_size[MAJOR_NR] = 0;
1912     + hardsect_size[MAJOR_NR] = 0;
1913     kfree(loop_dev);
1914     kfree(loop_sizes);
1915     kfree(loop_blksizes);
1916     + kfree(loop_hardsizes);
1917     }
1918    
1919     module_init(loop_init);
1920     diff -urN linux-2.4.23/drivers/misc/Makefile linux-2.4.23-AES/drivers/misc/Makefile
1921     --- linux-2.4.23/drivers/misc/Makefile Sat Dec 30 00:07:22 2000
1922     +++ linux-2.4.23-AES/drivers/misc/Makefile Sat Nov 29 11:08:29 2003
1923     @@ -9,7 +9,30 @@
1924     # parent makes..
1925     #
1926    
1927     +.S.o:
1928     + $(CC) $(AFLAGS) -c $< -o $*.o
1929     +
1930     O_TARGET := misc.o
1931     +
1932     +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y)
1933     +AES_PENTIUM_ASM=n
1934     +ifeq ($(CONFIG_X86),y)
1935     +ifneq ($(CONFIG_X86_64),y)
1936     +ifneq ($(CONFIG_M386),y)
1937     +ifneq ($(CONFIG_M486),y)
1938     + AES_PENTIUM_ASM=y
1939     +endif
1940     +endif
1941     +endif
1942     +endif
1943     +ifeq ($(AES_PENTIUM_ASM),y)
1944     + export-objs += i586-ksym.o
1945     + obj-y += aes-i586.o md5-i586.o i586-ksym.o
1946     +else
1947     + export-objs += aes.o md5.o
1948     + obj-y += aes.o md5.o
1949     +endif
1950     +endif
1951    
1952     include $(TOPDIR)/Rules.make
1953    
1954     diff -urN linux-2.4.23/drivers/misc/aes-i586.S linux-2.4.23-AES/drivers/misc/aes-i586.S
1955     --- linux-2.4.23/drivers/misc/aes-i586.S Thu Jan 1 02:00:00 1970
1956     +++ linux-2.4.23-AES/drivers/misc/aes-i586.S Sat Nov 29 11:08:29 2003
1957     @@ -0,0 +1,922 @@
1958     +//
1959     +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
1960     +// All rights reserved.
1961     +//
1962     +// TERMS
1963     +//
1964     +// Redistribution and use in source and binary forms, with or without
1965     +// modification, are permitted subject to the following conditions:
1966     +//
1967     +// 1. Redistributions of source code must retain the above copyright
1968     +// notice, this list of conditions and the following disclaimer.
1969     +//
1970     +// 2. Redistributions in binary form must reproduce the above copyright
1971     +// notice, this list of conditions and the following disclaimer in the
1972     +// documentation and/or other materials provided with the distribution.
1973     +//
1974     +// 3. The copyright holder's name must not be used to endorse or promote
1975     +// any products derived from this software without his specific prior
1976     +// written permission.
1977     +//
1978     +// This software is provided 'as is' with no express or implied warranties
1979     +// of correctness or fitness for purpose.
1980     +
1981     +// Modified by Jari Ruusu, December 24 2001
1982     +// - Converted syntax to GNU CPP/assembler syntax
1983     +// - C programming interface converted back to "old" API
1984     +// - Minor portability cleanups and speed optimizations
1985     +
1986     +// Modified by Jari Ruusu, April 11 2002
1987     +// - Added above copyright and terms to resulting object code so that
1988     +// binary distributions can avoid legal trouble
1989     +
1990     +// An AES (Rijndael) implementation for the Pentium. This version only
1991     +// implements the standard AES block length (128 bits, 16 bytes). This code
1992     +// does not preserve the eax, ecx or edx registers or the artihmetic status
1993     +// flags. However, the ebx, esi, edi, and ebp registers are preserved across
1994     +// calls.
1995     +
1996     +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
1997     +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
1998     +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
1999     +
2000     +#if defined(USE_UNDERLINE)
2001     +# define aes_set_key _aes_set_key
2002     +# define aes_encrypt _aes_encrypt
2003     +# define aes_decrypt _aes_decrypt
2004     +#endif
2005     +#if !defined(ALIGN32BYTES)
2006     +# define ALIGN32BYTES 32
2007     +#endif
2008     +
2009     + .file "aes-i586.S"
2010     + .globl aes_set_key
2011     + .globl aes_encrypt
2012     + .globl aes_decrypt
2013     +
2014     + .text
2015     +copyright:
2016     + .ascii " \000"
2017     + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
2018     + .ascii "All rights reserved.\000"
2019     + .ascii " \000"
2020     + .ascii "TERMS\000"
2021     + .ascii " \000"
2022     + .ascii " Redistribution and use in source and binary forms, with or without\000"
2023     + .ascii " modification, are permitted subject to the following conditions:\000"
2024     + .ascii " \000"
2025     + .ascii " 1. Redistributions of source code must retain the above copyright\000"
2026     + .ascii " notice, this list of conditions and the following disclaimer.\000"
2027     + .ascii " \000"
2028     + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
2029     + .ascii " notice, this list of conditions and the following disclaimer in the\000"
2030     + .ascii " documentation and/or other materials provided with the distribution.\000"
2031     + .ascii " \000"
2032     + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
2033     + .ascii " any products derived from this software without his specific prior\000"
2034     + .ascii " written permission.\000"
2035     + .ascii " \000"
2036     + .ascii " This software is provided 'as is' with no express or implied warranties\000"
2037     + .ascii " of correctness or fitness for purpose.\000"
2038     + .ascii " \000"
2039     +
2040     +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
2041     +
2042     +// offsets to parameters with one register pushed onto stack
2043     +
2044     +#define ctx 8 // AES context structure
2045     +#define in_blk 12 // input byte array address parameter
2046     +#define out_blk 16 // output byte array address parameter
2047     +
2048     +// offsets in context structure
2049     +
2050     +#define nkey 0 // key length, size 4
2051     +#define nrnd 4 // number of rounds, size 4
2052     +#define ekey 8 // encryption key schedule base address, size 256
2053     +#define dkey 264 // decryption key schedule base address, size 256
2054     +
2055     +// This macro performs a forward encryption cycle. It is entered with
2056     +// the first previous round column values in %eax, %ebx, %esi and %edi and
2057     +// exits with the final values in the same registers.
2058     +
2059     +#define fwd_rnd(p1,p2) \
2060     + mov %ebx,(%esp) ;\
2061     + movzbl %al,%edx ;\
2062     + mov %eax,%ecx ;\
2063     + mov p2(%ebp),%eax ;\
2064     + mov %edi,4(%esp) ;\
2065     + mov p2+12(%ebp),%edi ;\
2066     + xor p1(,%edx,4),%eax ;\
2067     + movzbl %ch,%edx ;\
2068     + shr $16,%ecx ;\
2069     + mov p2+4(%ebp),%ebx ;\
2070     + xor p1+tlen(,%edx,4),%edi ;\
2071     + movzbl %cl,%edx ;\
2072     + movzbl %ch,%ecx ;\
2073     + xor p1+3*tlen(,%ecx,4),%ebx ;\
2074     + mov %esi,%ecx ;\
2075     + mov p1+2*tlen(,%edx,4),%esi ;\
2076     + movzbl %cl,%edx ;\
2077     + xor p1(,%edx,4),%esi ;\
2078     + movzbl %ch,%edx ;\
2079     + shr $16,%ecx ;\
2080     + xor p1+tlen(,%edx,4),%ebx ;\
2081     + movzbl %cl,%edx ;\
2082     + movzbl %ch,%ecx ;\
2083     + xor p1+2*tlen(,%edx,4),%eax ;\
2084     + mov (%esp),%edx ;\
2085     + xor p1+3*tlen(,%ecx,4),%edi ;\
2086     + movzbl %dl,%ecx ;\
2087     + xor p2+8(%ebp),%esi ;\
2088     + xor p1(,%ecx,4),%ebx ;\
2089     + movzbl %dh,%ecx ;\
2090     + shr $16,%edx ;\
2091     + xor p1+tlen(,%ecx,4),%eax ;\
2092     + movzbl %dl,%ecx ;\
2093     + movzbl %dh,%edx ;\
2094     + xor p1+2*tlen(,%ecx,4),%edi ;\
2095     + mov 4(%esp),%ecx ;\
2096     + xor p1+3*tlen(,%edx,4),%esi ;\
2097     + movzbl %cl,%edx ;\
2098     + xor p1(,%edx,4),%edi ;\
2099     + movzbl %ch,%edx ;\
2100     + shr $16,%ecx ;\
2101     + xor p1+tlen(,%edx,4),%esi ;\
2102     + movzbl %cl,%edx ;\
2103     + movzbl %ch,%ecx ;\
2104     + xor p1+2*tlen(,%edx,4),%ebx ;\
2105     + xor p1+3*tlen(,%ecx,4),%eax
2106     +
2107     +// This macro performs an inverse encryption cycle. It is entered with
2108     +// the first previous round column values in %eax, %ebx, %esi and %edi and
2109     +// exits with the final values in the same registers.
2110     +
2111     +#define inv_rnd(p1,p2) \
2112     + movzbl %al,%edx ;\
2113     + mov %ebx,(%esp) ;\
2114     + mov %eax,%ecx ;\
2115     + mov p2(%ebp),%eax ;\
2116     + mov %edi,4(%esp) ;\
2117     + mov p2+4(%ebp),%ebx ;\
2118     + xor p1(,%edx,4),%eax ;\
2119     + movzbl %ch,%edx ;\
2120     + shr $16,%ecx ;\
2121     + mov p2+12(%ebp),%edi ;\
2122     + xor p1+tlen(,%edx,4),%ebx ;\
2123     + movzbl %cl,%edx ;\
2124     + movzbl %ch,%ecx ;\
2125     + xor p1+3*tlen(,%ecx,4),%edi ;\
2126     + mov %esi,%ecx ;\
2127     + mov p1+2*tlen(,%edx,4),%esi ;\
2128     + movzbl %cl,%edx ;\
2129     + xor p1(,%edx,4),%esi ;\
2130     + movzbl %ch,%edx ;\
2131     + shr $16,%ecx ;\
2132     + xor p1+tlen(,%edx,4),%edi ;\
2133     + movzbl %cl,%edx ;\
2134     + movzbl %ch,%ecx ;\
2135     + xor p1+2*tlen(,%edx,4),%eax ;\
2136     + mov (%esp),%edx ;\
2137     + xor p1+3*tlen(,%ecx,4),%ebx ;\
2138     + movzbl %dl,%ecx ;\
2139     + xor p2+8(%ebp),%esi ;\
2140     + xor p1(,%ecx,4),%ebx ;\
2141     + movzbl %dh,%ecx ;\
2142     + shr $16,%edx ;\
2143     + xor p1+tlen(,%ecx,4),%esi ;\
2144     + movzbl %dl,%ecx ;\
2145     + movzbl %dh,%edx ;\
2146     + xor p1+2*tlen(,%ecx,4),%edi ;\
2147     + mov 4(%esp),%ecx ;\
2148     + xor p1+3*tlen(,%edx,4),%eax ;\
2149     + movzbl %cl,%edx ;\
2150     + xor p1(,%edx,4),%edi ;\
2151     + movzbl %ch,%edx ;\
2152     + shr $16,%ecx ;\
2153     + xor p1+tlen(,%edx,4),%eax ;\
2154     + movzbl %cl,%edx ;\
2155     + movzbl %ch,%ecx ;\
2156     + xor p1+2*tlen(,%edx,4),%ebx ;\
2157     + xor p1+3*tlen(,%ecx,4),%esi
2158     +
2159     +// AES (Rijndael) Encryption Subroutine
2160     +
2161     + .text
2162     + .align ALIGN32BYTES
2163     +aes_encrypt:
2164     + push %ebp
2165     + mov ctx(%esp),%ebp // pointer to context
2166     + mov in_blk(%esp),%ecx
2167     + push %ebx
2168     + push %esi
2169     + push %edi
2170     + mov nrnd(%ebp),%edx // number of rounds
2171     + lea ekey+16(%ebp),%ebp // key pointer
2172     +
2173     +// input four columns and xor in first round key
2174     +
2175     + mov (%ecx),%eax
2176     + mov 4(%ecx),%ebx
2177     + mov 8(%ecx),%esi
2178     + mov 12(%ecx),%edi
2179     + xor -16(%ebp),%eax
2180     + xor -12(%ebp),%ebx
2181     + xor -8(%ebp),%esi
2182     + xor -4(%ebp),%edi
2183     +
2184     + sub $8,%esp // space for register saves on stack
2185     +
2186     + sub $10,%edx
2187     + je aes_15
2188     + add $32,%ebp
2189     + sub $2,%edx
2190     + je aes_13
2191     + add $32,%ebp
2192     +
2193     + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
2194     + fwd_rnd(aes_ft_tab,-48)
2195     +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
2196     + fwd_rnd(aes_ft_tab,-16)
2197     +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
2198     + fwd_rnd(aes_ft_tab,16)
2199     + fwd_rnd(aes_ft_tab,32)
2200     + fwd_rnd(aes_ft_tab,48)
2201     + fwd_rnd(aes_ft_tab,64)
2202     + fwd_rnd(aes_ft_tab,80)
2203     + fwd_rnd(aes_ft_tab,96)
2204     + fwd_rnd(aes_ft_tab,112)
2205     + fwd_rnd(aes_ft_tab,128)
2206     + fwd_rnd(aes_fl_tab,144) // last round uses a different table
2207     +
2208     +// move final values to the output array.
2209     +
2210     + mov out_blk+20(%esp),%ebp
2211     + add $8,%esp
2212     + mov %eax,(%ebp)
2213     + mov %ebx,4(%ebp)
2214     + mov %esi,8(%ebp)
2215     + mov %edi,12(%ebp)
2216     + pop %edi
2217     + pop %esi
2218     + pop %ebx
2219     + pop %ebp
2220     + ret
2221     +
2222     +
2223     +// AES (Rijndael) Decryption Subroutine
2224     +
2225     + .align ALIGN32BYTES
2226     +aes_decrypt:
2227     + push %ebp
2228     + mov ctx(%esp),%ebp // pointer to context
2229     + mov in_blk(%esp),%ecx
2230     + push %ebx
2231     + push %esi
2232     + push %edi
2233     + mov nrnd(%ebp),%edx // number of rounds
2234     + lea dkey+16(%ebp),%ebp // key pointer
2235     +
2236     +// input four columns and xor in first round key
2237     +
2238     + mov (%ecx),%eax
2239     + mov 4(%ecx),%ebx
2240     + mov 8(%ecx),%esi
2241     + mov 12(%ecx),%edi
2242     + xor -16(%ebp),%eax
2243     + xor -12(%ebp),%ebx
2244     + xor -8(%ebp),%esi
2245     + xor -4(%ebp),%edi
2246     +
2247     + sub $8,%esp // space for register saves on stack
2248     +
2249     + sub $10,%edx
2250     + je aes_25
2251     + add $32,%ebp
2252     + sub $2,%edx
2253     + je aes_23
2254     + add $32,%ebp
2255     +
2256     + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
2257     + inv_rnd(aes_it_tab,-48)
2258     +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
2259     + inv_rnd(aes_it_tab,-16)
2260     +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
2261     + inv_rnd(aes_it_tab,16)
2262     + inv_rnd(aes_it_tab,32)
2263     + inv_rnd(aes_it_tab,48)
2264     + inv_rnd(aes_it_tab,64)
2265     + inv_rnd(aes_it_tab,80)
2266     + inv_rnd(aes_it_tab,96)
2267     + inv_rnd(aes_it_tab,112)
2268     + inv_rnd(aes_it_tab,128)
2269     + inv_rnd(aes_il_tab,144) // last round uses a different table
2270     +
2271     +// move final values to the output array.
2272     +
2273     + mov out_blk+20(%esp),%ebp
2274     + add $8,%esp
2275     + mov %eax,(%ebp)
2276     + mov %ebx,4(%ebp)
2277     + mov %esi,8(%ebp)
2278     + mov %edi,12(%ebp)
2279     + pop %edi
2280     + pop %esi
2281     + pop %ebx
2282     + pop %ebp
2283     + ret
2284     +
2285     +// AES (Rijndael) Key Schedule Subroutine
2286     +
2287     +// input/output parameters
2288     +
2289     +#define aes_cx 12 // AES context
2290     +#define in_key 16 // key input array address
2291     +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
2292     +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
2293     +
2294     +// offsets for locals
2295     +
2296     +#define cnt -4
2297     +#define kpf -8
2298     +#define slen 8
2299     +
2300     +// This macro performs a column mixing operation on an input 32-bit
2301     +// word to give a 32-bit result. It uses each of the 4 bytes in the
2302     +// the input column to index 4 different tables of 256 32-bit words
2303     +// that are xored together to form the output value.
2304     +
2305     +#define mix_col(p1) \
2306     + movzbl %bl,%ecx ;\
2307     + mov p1(,%ecx,4),%eax ;\
2308     + movzbl %bh,%ecx ;\
2309     + ror $16,%ebx ;\
2310     + xor p1+tlen(,%ecx,4),%eax ;\
2311     + movzbl %bl,%ecx ;\
2312     + xor p1+2*tlen(,%ecx,4),%eax ;\
2313     + movzbl %bh,%ecx ;\
2314     + xor p1+3*tlen(,%ecx,4),%eax
2315     +
2316     +// Key Schedule Macros
2317     +
2318     +#define ksc4(p1) \
2319     + rol $24,%ebx ;\
2320     + mix_col(aes_fl_tab) ;\
2321     + ror $8,%ebx ;\
2322     + xor 4*p1+aes_rcon_tab,%eax ;\
2323     + xor %eax,%esi ;\
2324     + xor %esi,%ebp ;\
2325     + mov %esi,16*p1(%edi) ;\
2326     + mov %ebp,16*p1+4(%edi) ;\
2327     + xor %ebp,%edx ;\
2328     + xor %edx,%ebx ;\
2329     + mov %edx,16*p1+8(%edi) ;\
2330     + mov %ebx,16*p1+12(%edi)
2331     +
2332     +#define ksc6(p1) \
2333     + rol $24,%ebx ;\
2334     + mix_col(aes_fl_tab) ;\
2335     + ror $8,%ebx ;\
2336     + xor 4*p1+aes_rcon_tab,%eax ;\
2337     + xor 24*p1-24(%edi),%eax ;\
2338     + mov %eax,24*p1(%edi) ;\
2339     + xor 24*p1-20(%edi),%eax ;\
2340     + mov %eax,24*p1+4(%edi) ;\
2341     + xor %eax,%esi ;\
2342     + xor %esi,%ebp ;\
2343     + mov %esi,24*p1+8(%edi) ;\
2344     + mov %ebp,24*p1+12(%edi) ;\
2345     + xor %ebp,%edx ;\
2346     + xor %edx,%ebx ;\
2347     + mov %edx,24*p1+16(%edi) ;\
2348     + mov %ebx,24*p1+20(%edi)
2349     +
2350     +#define ksc8(p1) \
2351     + rol $24,%ebx ;\
2352     + mix_col(aes_fl_tab) ;\
2353     + ror $8,%ebx ;\
2354     + xor 4*p1+aes_rcon_tab,%eax ;\
2355     + xor 32*p1-32(%edi),%eax ;\
2356     + mov %eax,32*p1(%edi) ;\
2357     + xor 32*p1-28(%edi),%eax ;\
2358     + mov %eax,32*p1+4(%edi) ;\
2359     + xor 32*p1-24(%edi),%eax ;\
2360     + mov %eax,32*p1+8(%edi) ;\
2361     + xor 32*p1-20(%edi),%eax ;\
2362     + mov %eax,32*p1+12(%edi) ;\
2363     + push %ebx ;\
2364     + mov %eax,%ebx ;\
2365     + mix_col(aes_fl_tab) ;\
2366     + pop %ebx ;\
2367     + xor %eax,%esi ;\
2368     + xor %esi,%ebp ;\
2369     + mov %esi,32*p1+16(%edi) ;\
2370     + mov %ebp,32*p1+20(%edi) ;\
2371     + xor %ebp,%edx ;\
2372     + xor %edx,%ebx ;\
2373     + mov %edx,32*p1+24(%edi) ;\
2374     + mov %ebx,32*p1+28(%edi)
2375     +
2376     + .align ALIGN32BYTES
2377     +aes_set_key:
2378     + pushfl
2379     + push %ebp
2380     + mov %esp,%ebp
2381     + sub $slen,%esp
2382     + push %ebx
2383     + push %esi
2384     + push %edi
2385     +
2386     + mov aes_cx(%ebp),%edx // edx -> AES context
2387     +
2388     + mov key_ln(%ebp),%ecx // key length
2389     + cmpl $128,%ecx
2390     + jb aes_30
2391     + shr $3,%ecx
2392     +aes_30: cmpl $32,%ecx
2393     + je aes_32
2394     + cmpl $24,%ecx
2395     + je aes_32
2396     + mov $16,%ecx
2397     +aes_32: shr $2,%ecx
2398     + mov %ecx,nkey(%edx)
2399     +
2400     + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
2401     + mov %eax,nrnd(%edx)
2402     +
2403     + mov in_key(%ebp),%esi // key input array
2404     + lea ekey(%edx),%edi // key position in AES context
2405     + cld
2406     + push %ebp
2407     + mov %ecx,%eax // save key length in eax
2408     + rep ; movsl // words in the key schedule
2409     + mov -4(%esi),%ebx // put some values in registers
2410     + mov -8(%esi),%edx // to allow faster code
2411     + mov -12(%esi),%ebp
2412     + mov -16(%esi),%esi
2413     +
2414     + cmpl $4,%eax // jump on key size
2415     + je aes_36
2416     + cmpl $6,%eax
2417     + je aes_35
2418     +
2419     + ksc8(0)
2420     + ksc8(1)
2421     + ksc8(2)
2422     + ksc8(3)
2423     + ksc8(4)
2424     + ksc8(5)
2425     + ksc8(6)
2426     + jmp aes_37
2427     +aes_35: ksc6(0)
2428     + ksc6(1)
2429     + ksc6(2)
2430     + ksc6(3)
2431     + ksc6(4)
2432     + ksc6(5)
2433     + ksc6(6)
2434     + ksc6(7)
2435     + jmp aes_37
2436     +aes_36: ksc4(0)
2437     + ksc4(1)
2438     + ksc4(2)
2439     + ksc4(3)
2440     + ksc4(4)
2441     + ksc4(5)
2442     + ksc4(6)
2443     + ksc4(7)
2444     + ksc4(8)
2445     + ksc4(9)
2446     +aes_37: pop %ebp
2447     + mov aes_cx(%ebp),%edx // edx -> AES context
2448     + cmpl $0,ed_flg(%ebp)
2449     + jne aes_39
2450     +
2451     +// compile decryption key schedule from encryption schedule - reverse
2452     +// order and do mix_column operation on round keys except first and last
2453     +
2454     + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
2455     + shl $2,%eax
2456     + lea dkey(%edx,%eax,4),%edi
2457     + lea ekey(%edx),%esi // kf = cx->e_key
2458     +
2459     + movsl // copy first round key (unmodified)
2460     + movsl
2461     + movsl
2462     + movsl
2463     + sub $32,%edi
2464     + movl $1,cnt(%ebp)
2465     +aes_38: // do mix column on each column of
2466     + lodsl // each round key
2467     + mov %eax,%ebx
2468     + mix_col(aes_im_tab)
2469     + stosl
2470     + lodsl
2471     + mov %eax,%ebx
2472     + mix_col(aes_im_tab)
2473     + stosl
2474     + lodsl
2475     + mov %eax,%ebx
2476     + mix_col(aes_im_tab)
2477     + stosl
2478     + lodsl
2479     + mov %eax,%ebx
2480     + mix_col(aes_im_tab)
2481     + stosl
2482     + sub $32,%edi
2483     +
2484     + incl cnt(%ebp)
2485     + mov cnt(%ebp),%eax
2486     + cmp nrnd(%edx),%eax
2487     + jb aes_38
2488     +
2489     + movsl // copy last round key (unmodified)
2490     + movsl
2491     + movsl
2492     + movsl
2493     +aes_39: pop %edi
2494     + pop %esi
2495     + pop %ebx
2496     + mov %ebp,%esp
2497     + pop %ebp
2498     + popfl
2499     + ret
2500     +
2501     +
2502     +// finite field multiplies by {02}, {04} and {08}
2503     +
2504     +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
2505     +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
2506     +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
2507     +
2508     +// finite field multiplies required in table generation
2509     +
2510     +#define f3(x) (f2(x) ^ x)
2511     +#define f9(x) (f8(x) ^ x)
2512     +#define fb(x) (f8(x) ^ f2(x) ^ x)
2513     +#define fd(x) (f8(x) ^ f4(x) ^ x)
2514     +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
2515     +
2516     +// These defines generate the forward table entries
2517     +
2518     +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
2519     +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
2520     +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
2521     +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
2522     +
2523     +// These defines generate the inverse table entries
2524     +
2525     +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
2526     +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
2527     +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
2528     +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
2529     +
2530     +// These defines generate entries for the last round tables
2531     +
2532     +#define w0(x) (x)
2533     +#define w1(x) (x << 8)
2534     +#define w2(x) (x << 16)
2535     +#define w3(x) (x << 24)
2536     +
2537     +// macro to generate inverse mix column tables (needed for the key schedule)
2538     +
2539     +#define im_data0(p1) \
2540     + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
2541     + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
2542     + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
2543     + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
2544     +#define im_data1(p1) \
2545     + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
2546     + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
2547     + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
2548     + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
2549     +#define im_data2(p1) \
2550     + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
2551     + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
2552     + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
2553     + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
2554     +#define im_data3(p1) \
2555     + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
2556     + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
2557     + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
2558     + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
2559     +#define im_data4(p1) \
2560     + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
2561     + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
2562     + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
2563     + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
2564     +#define im_data5(p1) \
2565     + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
2566     + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
2567     + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
2568     + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
2569     +#define im_data6(p1) \
2570     + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
2571     + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
2572     + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
2573     + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
2574     +#define im_data7(p1) \
2575     + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
2576     + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
2577     + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
2578     + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
2579     +
2580     +// S-box data - 256 entries
2581     +
2582     +#define sb_data0(p1) \
2583     + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
2584     + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
2585     + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
2586     + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
2587     +#define sb_data1(p1) \
2588     + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
2589     + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
2590     + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
2591     + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
2592     +#define sb_data2(p1) \
2593     + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
2594     + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
2595     + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
2596     + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
2597     +#define sb_data3(p1) \
2598     + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
2599     + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
2600     + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
2601     + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
2602     +#define sb_data4(p1) \
2603     + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
2604     + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
2605     + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
2606     + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
2607     +#define sb_data5(p1) \
2608     + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
2609     + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
2610     + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
2611     + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
2612     +#define sb_data6(p1) \
2613     + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
2614     + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
2615     + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
2616     + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
2617     +#define sb_data7(p1) \
2618     + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
2619     + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
2620     + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
2621     + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
2622     +
2623     +// Inverse S-box data - 256 entries
2624     +
2625     +#define ib_data0(p1) \
2626     + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
2627     + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
2628     + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
2629     + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
2630     +#define ib_data1(p1) \
2631     + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
2632     + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
2633     + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
2634     + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
2635     +#define ib_data2(p1) \
2636     + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
2637     + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
2638     + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
2639     + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
2640     +#define ib_data3(p1) \
2641     + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
2642     + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
2643     + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
2644     + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
2645     +#define ib_data4(p1) \
2646     + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
2647     + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
2648     + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
2649     + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
2650     +#define ib_data5(p1) \
2651     + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
2652     + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
2653     + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
2654     + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
2655     +#define ib_data6(p1) \
2656     + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
2657     + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
2658     + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
2659     + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
2660     +#define ib_data7(p1) \
2661     + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
2662     + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
2663     + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
2664     + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
2665     +
2666     +// The rcon_table (needed for the key schedule)
2667     +//
2668     +// Here is original Dr Brian Gladman's source code:
2669     +// _rcon_tab:
2670     +// %assign x 1
2671     +// %rep 29
2672     +// dd x
2673     +// %assign x f2(x)
2674     +// %endrep
2675     +//
2676     +// Here is precomputed output (it's more portable this way):
2677     +
2678     + .align ALIGN32BYTES
2679     +aes_rcon_tab:
2680     + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
2681     + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
2682     + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
2683     + .long 0xb3,0x7d,0xfa,0xef,0xc5
2684     +
2685     +// The forward xor tables
2686     +
2687     + .align ALIGN32BYTES
2688     +aes_ft_tab:
2689     + sb_data0(u0)
2690     + sb_data1(u0)
2691     + sb_data2(u0)
2692     + sb_data3(u0)
2693     + sb_data4(u0)
2694     + sb_data5(u0)
2695     + sb_data6(u0)
2696     + sb_data7(u0)
2697     +
2698     + sb_data0(u1)
2699     + sb_data1(u1)
2700     + sb_data2(u1)
2701     + sb_data3(u1)
2702     + sb_data4(u1)
2703     + sb_data5(u1)
2704     + sb_data6(u1)
2705     + sb_data7(u1)
2706     +
2707     + sb_data0(u2)
2708     + sb_data1(u2)
2709     + sb_data2(u2)
2710     + sb_data3(u2)
2711     + sb_data4(u2)
2712     + sb_data5(u2)
2713     + sb_data6(u2)
2714     + sb_data7(u2)
2715     +
2716     + sb_data0(u3)
2717     + sb_data1(u3)
2718     + sb_data2(u3)
2719     + sb_data3(u3)
2720     + sb_data4(u3)
2721     + sb_data5(u3)
2722     + sb_data6(u3)
2723     + sb_data7(u3)
2724     +
2725     + .align ALIGN32BYTES
2726     +aes_fl_tab:
2727     + sb_data0(w0)
2728     + sb_data1(w0)
2729     + sb_data2(w0)
2730     + sb_data3(w0)
2731     + sb_data4(w0)
2732     + sb_data5(w0)
2733     + sb_data6(w0)
2734     + sb_data7(w0)
2735     +
2736     + sb_data0(w1)
2737     + sb_data1(w1)
2738     + sb_data2(w1)
2739     + sb_data3(w1)
2740     + sb_data4(w1)
2741     + sb_data5(w1)
2742     + sb_data6(w1)
2743     + sb_data7(w1)
2744     +
2745     + sb_data0(w2)
2746     + sb_data1(w2)
2747     + sb_data2(w2)
2748     + sb_data3(w2)
2749     + sb_data4(w2)
2750     + sb_data5(w2)
2751     + sb_data6(w2)
2752     + sb_data7(w2)
2753     +
2754     + sb_data0(w3)
2755     + sb_data1(w3)
2756     + sb_data2(w3)
2757     + sb_data3(w3)
2758     + sb_data4(w3)
2759     + sb_data5(w3)
2760     + sb_data6(w3)
2761     + sb_data7(w3)
2762     +
2763     +// The inverse xor tables
2764     +
2765     + .align ALIGN32BYTES
2766     +aes_it_tab:
2767     + ib_data0(v0)
2768     + ib_data1(v0)
2769     + ib_data2(v0)
2770     + ib_data3(v0)
2771     + ib_data4(v0)
2772     + ib_data5(v0)
2773     + ib_data6(v0)
2774     + ib_data7(v0)
2775     +
2776     + ib_data0(v1)
2777     + ib_data1(v1)
2778     + ib_data2(v1)
2779     + ib_data3(v1)
2780     + ib_data4(v1)
2781     + ib_data5(v1)
2782     + ib_data6(v1)
2783     + ib_data7(v1)
2784     +
2785     + ib_data0(v2)
2786     + ib_data1(v2)
2787     + ib_data2(v2)
2788     + ib_data3(v2)
2789     + ib_data4(v2)
2790     + ib_data5(v2)
2791     + ib_data6(v2)
2792     + ib_data7(v2)
2793     +
2794     + ib_data0(v3)
2795     + ib_data1(v3)
2796     + ib_data2(v3)
2797     + ib_data3(v3)
2798     + ib_data4(v3)
2799     + ib_data5(v3)
2800     + ib_data6(v3)
2801     + ib_data7(v3)
2802     +
2803     + .align ALIGN32BYTES
2804     +aes_il_tab:
2805     + ib_data0(w0)
2806     + ib_data1(w0)
2807     + ib_data2(w0)
2808     + ib_data3(w0)
2809     + ib_data4(w0)
2810     + ib_data5(w0)
2811     + ib_data6(w0)
2812     + ib_data7(w0)
2813     +
2814     + ib_data0(w1)
2815     + ib_data1(w1)
2816     + ib_data2(w1)
2817     + ib_data3(w1)
2818     + ib_data4(w1)
2819     + ib_data5(w1)
2820     + ib_data6(w1)
2821     + ib_data7(w1)
2822     +
2823     + ib_data0(w2)
2824     + ib_data1(w2)
2825     + ib_data2(w2)
2826     + ib_data3(w2)
2827     + ib_data4(w2)
2828     + ib_data5(w2)
2829     + ib_data6(w2)
2830     + ib_data7(w2)
2831     +
2832     + ib_data0(w3)
2833     + ib_data1(w3)
2834     + ib_data2(w3)
2835     + ib_data3(w3)
2836     + ib_data4(w3)
2837     + ib_data5(w3)
2838     + ib_data6(w3)
2839     + ib_data7(w3)
2840     +
2841     +// The inverse mix column tables
2842     +
2843     + .align ALIGN32BYTES
2844     +aes_im_tab:
2845     + im_data0(v0)
2846     + im_data1(v0)
2847     + im_data2(v0)
2848     + im_data3(v0)
2849     + im_data4(v0)
2850     + im_data5(v0)
2851     + im_data6(v0)
2852     + im_data7(v0)
2853     +
2854     + im_data0(v1)
2855     + im_data1(v1)
2856     + im_data2(v1)
2857     + im_data3(v1)
2858     + im_data4(v1)
2859     + im_data5(v1)
2860     + im_data6(v1)
2861     + im_data7(v1)
2862     +
2863     + im_data0(v2)
2864     + im_data1(v2)
2865     + im_data2(v2)
2866     + im_data3(v2)
2867     + im_data4(v2)
2868     + im_data5(v2)
2869     + im_data6(v2)
2870     + im_data7(v2)
2871     +
2872     + im_data0(v3)
2873     + im_data1(v3)
2874     + im_data2(v3)
2875     + im_data3(v3)
2876     + im_data4(v3)
2877     + im_data5(v3)
2878     + im_data6(v3)
2879     + im_data7(v3)
2880     diff -urN linux-2.4.23/drivers/misc/aes.c linux-2.4.23-AES/drivers/misc/aes.c
2881     --- linux-2.4.23/drivers/misc/aes.c Thu Jan 1 02:00:00 1970
2882     +++ linux-2.4.23-AES/drivers/misc/aes.c Sat Nov 29 11:08:29 2003
2883     @@ -0,0 +1,1402 @@
2884     +// I retain copyright in this code but I encourage its free use provided
2885     +// that I don't carry any responsibility for the results. I am especially
2886     +// happy to see it used in free and open source software. If you do use
2887     +// it I would appreciate an acknowledgement of its origin in the code or
2888     +// the product that results and I would also appreciate knowing a little
2889     +// about the use to which it is being put. I am grateful to Frank Yellin
2890     +// for some ideas that are used in this implementation.
2891     +//
2892     +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
2893     +//
2894     +// This is an implementation of the AES encryption algorithm (Rijndael)
2895     +// designed by Joan Daemen and Vincent Rijmen. This version is designed
2896     +// to provide both fixed and dynamic block and key lengths and can also
2897     +// run with either big or little endian internal byte order (see aes.h).
2898     +// It inputs block and key lengths in bytes with the legal values being
2899     +// 16, 24 and 32.
2900     +
2901     +/*
2902     + * Modified by Jari Ruusu, May 1 2001
2903     + * - Fixed some compile warnings, code was ok but gcc warned anyway.
2904     + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
2905     + * - Major name space cleanup: Names visible to outside now begin
2906     + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
2907     + * - Removed C++ and DLL support as part of name space cleanup.
2908     + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
2909     + * - Merged precomputed constant tables to aes.c file.
2910     + * - Removed data alignment restrictions for portability reasons.
2911     + * - Made block and key lengths accept bit count (128/192/256)
2912     + * as well byte count (16/24/32).
2913     + * - Removed all error checks. This change also eliminated the need
2914     + * to preinitialize the context struct to zero.
2915     + * - Removed some totally unused constants.
2916     + */
2917     +
2918     +#include <linux/module.h>
2919     +#include "aes.h"
2920     +
2921     +// CONFIGURATION OPTIONS (see also aes.h)
2922     +//
2923     +// 1. Define UNROLL for full loop unrolling in encryption and decryption.
2924     +// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
2925     +// 3. Define FIXED_TABLES for compiled rather than dynamic tables.
2926     +// 4. Define FF_TABLES to use tables for field multiplies and inverses.
2927     +// Do not enable this without understanding stack space requirements.
2928     +// 5. Define ARRAYS to use arrays to hold the local state block. If this
2929     +// is not defined, individually declared 32-bit words are used.
2930     +// 6. Define FAST_VARIABLE if a high speed variable block implementation
2931     +// is needed (essentially three separate fixed block size code sequences)
2932     +// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
2933     +// version using 1 table (2 kbytes of table space) or 4 tables (8
2934     +// kbytes of table space) for higher speed.
2935     +// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
2936     +// increase by using tables for the last rounds but with more table
2937     +// space (2 or 8 kbytes extra).
2938     +// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
2939     +// slower version is provided.
2940     +// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
2941     +// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
2942     +
2943     +#define UNROLL
2944     +//#define PARTIAL_UNROLL
2945     +
2946     +#define FIXED_TABLES
2947     +//#define FF_TABLES
2948     +//#define ARRAYS
2949     +#define FAST_VARIABLE
2950     +
2951     +//#define ONE_TABLE
2952     +#define FOUR_TABLES
2953     +
2954     +//#define ONE_LR_TABLE
2955     +#define FOUR_LR_TABLES
2956     +
2957     +//#define ONE_IM_TABLE
2958     +#define FOUR_IM_TABLES
2959     +
2960     +#if defined(UNROLL) && defined (PARTIAL_UNROLL)
2961     +#error both UNROLL and PARTIAL_UNROLL are defined
2962     +#endif
2963     +
2964     +#if defined(ONE_TABLE) && defined (FOUR_TABLES)
2965     +#error both ONE_TABLE and FOUR_TABLES are defined
2966     +#endif
2967     +
2968     +#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
2969     +#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
2970     +#endif
2971     +
2972     +#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
2973     +#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
2974     +#endif
2975     +
2976     +#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
2977     +#error an illegal block size has been specified
2978     +#endif
2979     +
2980     +// upr(x,n): rotates bytes within words by n positions, moving bytes
2981     +// to higher index positions with wrap around into low positions
2982     +// ups(x,n): moves bytes by n positions to higher index positions in
2983     +// words but without wrap around
2984     +// bval(x,n): extracts a byte from a word
2985     +
2986     +#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
2987     +#define ups(x,n) ((x) << 8 * (n))
2988     +#define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
2989     +#define bytes2word(b0, b1, b2, b3) \
2990     + ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
2991     +
2992     +#if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386)
2993     +/* little endian processor without data alignment restrictions */
2994     +#define word_in(x) *(u_int32_t*)(x)
2995     +#define word_out(x,v) *(u_int32_t*)(x) = (v)
2996     +#else
2997     +/* slower but generic big endian or with data alignment restrictions */
2998     +#define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
2999     +#define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
3000     +#endif
3001     +
3002     +// Disable at least some poor combinations of options
3003     +
3004     +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
3005     +#define FIXED_TABLES
3006     +#undef UNROLL
3007     +#undef ONE_LR_TABLE
3008     +#undef FOUR_LR_TABLES
3009     +#undef ONE_IM_TABLE
3010     +#undef FOUR_IM_TABLES
3011     +#elif !defined(FOUR_TABLES)
3012     +#ifdef FOUR_LR_TABLES
3013     +#undef FOUR_LR_TABLES
3014     +#define ONE_LR_TABLE
3015     +#endif
3016     +#ifdef FOUR_IM_TABLES
3017     +#undef FOUR_IM_TABLES
3018     +#define ONE_IM_TABLE
3019     +#endif
3020     +#elif !defined(AES_BLOCK_SIZE)
3021     +#if defined(UNROLL)
3022     +#define PARTIAL_UNROLL
3023     +#undef UNROLL
3024     +#endif
3025     +#endif
3026     +
3027     +// the finite field modular polynomial and elements
3028     +
3029     +#define ff_poly 0x011b
3030     +#define ff_hi 0x80
3031     +
3032     +// multiply four bytes in GF(2^8) by 'x' {02} in parallel
3033     +
3034     +#define m1 0x80808080
3035     +#define m2 0x7f7f7f7f
3036     +#define m3 0x0000001b
3037     +#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
3038     +
3039     +// The following defines provide alternative definitions of FFmulX that might
3040     +// give improved performance if a fast 32-bit multiply is not available. Note
3041     +// that a temporary variable u needs to be defined where FFmulX is used.
3042     +
3043     +// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
3044     +// #define m4 0x1b1b1b1b
3045     +// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
3046     +
3047     +// perform column mix operation on four bytes in parallel
3048     +
3049     +#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
3050     +
3051     +#if defined(FIXED_TABLES)
3052     +
3053     +// the S-Box table
3054     +
3055     +static const unsigned char s_box[256] =
3056     +{
3057     + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
3058     + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
3059     + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
3060     + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
3061     + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
3062     + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
3063     + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
3064     + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
3065     + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
3066     + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
3067     + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
3068     + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
3069     + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
3070     + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
3071     + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
3072     + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
3073     + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
3074     + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
3075     + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
3076     + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
3077     + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
3078     + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
3079     + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
3080     + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
3081     + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
3082     + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
3083     + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
3084     + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
3085     + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
3086     + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
3087     + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
3088     + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
3089     +};
3090     +
3091     +// the inverse S-Box table
3092     +
3093     +static const unsigned char inv_s_box[256] =
3094     +{
3095     + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
3096     + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
3097     + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
3098     + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
3099     + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
3100     + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
3101     + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
3102     + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
3103     + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
3104     + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
3105     + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
3106     + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
3107     + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
3108     + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
3109     + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
3110     + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
3111     + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
3112     + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
3113     + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
3114     + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
3115     + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
3116     + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
3117     + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
3118     + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
3119     + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
3120     + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
3121     + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
3122     + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
3123     + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
3124     + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
3125     + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
3126     + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
3127     +};
3128     +
3129     +#define w0(p) 0x000000##p
3130     +
3131     +// Number of elements required in this table for different
3132     +// block and key lengths is:
3133     +//
3134     +// Nk = 4 6 8
3135     +// ----------
3136     +// Nb = 4 | 10 8 7
3137     +// 6 | 19 12 11
3138     +// 8 | 29 19 14
3139     +//
3140     +// this table can be a table of bytes if the key schedule
3141     +// code is adjusted accordingly
3142     +
3143     +static const u_int32_t rcon_tab[29] =
3144     +{
3145     + w0(01), w0(02), w0(04), w0(08),
3146     + w0(10), w0(20), w0(40), w0(80),
3147     + w0(1b), w0(36), w0(6c), w0(d8),
3148     + w0(ab), w0(4d), w0(9a), w0(2f),
3149     + w0(5e), w0(bc), w0(63), w0(c6),
3150     + w0(97), w0(35), w0(6a), w0(d4),
3151     + w0(b3), w0(7d), w0(fa), w0(ef),
3152     + w0(c5)
3153     +};
3154     +
3155     +#undef w0
3156     +
3157     +#define r0(p,q,r,s) 0x##p##q##r##s
3158     +#define r1(p,q,r,s) 0x##q##r##s##p
3159     +#define r2(p,q,r,s) 0x##r##s##p##q
3160     +#define r3(p,q,r,s) 0x##s##p##q##r
3161     +#define w0(p) 0x000000##p
3162     +#define w1(p) 0x0000##p##00
3163     +#define w2(p) 0x00##p##0000
3164     +#define w3(p) 0x##p##000000
3165     +
3166     +#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
3167     +
3168     +// data for forward tables (other than last round)
3169     +
3170     +#define f_table \
3171     + r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
3172     + r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
3173     + r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
3174     + r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
3175     + r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
3176     + r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
3177     + r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
3178     + r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
3179     + r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
3180     + r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
3181     + r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
3182     + r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
3183     + r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
3184     + r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
3185     + r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
3186     + r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
3187     + r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
3188     + r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
3189     + r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
3190     + r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
3191     + r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
3192     + r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
3193     + r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
3194     + r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
3195     + r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
3196     + r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
3197     + r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
3198     + r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
3199     + r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
3200     + r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
3201     + r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
3202     + r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
3203     + r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
3204     + r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
3205     + r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
3206     + r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
3207     + r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
3208     + r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
3209     + r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
3210     + r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
3211     + r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
3212     + r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
3213     + r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
3214     + r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
3215     + r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
3216     + r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
3217     + r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
3218     + r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
3219     + r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
3220     + r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
3221     + r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
3222     + r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
3223     + r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
3224     + r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
3225     + r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
3226     + r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
3227     + r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
3228     + r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
3229     + r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
3230     + r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
3231     + r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
3232     + r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
3233     + r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
3234     + r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
3235     +
3236     +// data for inverse tables (other than last round)
3237     +
3238     +#define i_table \
3239     + r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
3240     + r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
3241     + r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
3242     + r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
3243     + r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
3244     + r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
3245     + r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
3246     + r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
3247     + r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
3248     + r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
3249     + r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
3250     + r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
3251     + r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
3252     + r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
3253     + r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
3254     + r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
3255     + r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
3256     + r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
3257     + r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
3258     + r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
3259     + r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
3260     + r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
3261     + r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
3262     + r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
3263     + r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
3264     + r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
3265     + r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
3266     + r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
3267     + r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
3268     + r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
3269     + r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
3270     + r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
3271     + r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
3272     + r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
3273     + r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
3274     + r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
3275     + r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
3276     + r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
3277     + r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
3278     + r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
3279     + r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
3280     + r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
3281     + r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
3282     + r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
3283     + r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
3284     + r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
3285     + r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
3286     + r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
3287     + r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
3288     + r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
3289     + r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
3290     + r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
3291     + r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
3292     + r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
3293     + r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
3294     + r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
3295     + r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
3296     + r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
3297     + r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
3298     + r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
3299     + r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
3300     + r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
3301     + r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
3302     + r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
3303     +
3304     +// generate the required tables in the desired endian format
3305     +
3306     +#undef r
3307     +#define r r0
3308     +
3309     +#if defined(ONE_TABLE)
3310     +static const u_int32_t ft_tab[256] =
3311     + { f_table };
3312     +#elif defined(FOUR_TABLES)
3313     +static const u_int32_t ft_tab[4][256] =
3314     +{ { f_table },
3315     +#undef r
3316     +#define r r1
3317     + { f_table },
3318     +#undef r
3319     +#define r r2
3320     + { f_table },
3321     +#undef r
3322     +#define r r3
3323     + { f_table }
3324     +};
3325     +#endif
3326     +
3327     +#undef r
3328     +#define r r0
3329     +#if defined(ONE_TABLE)
3330     +static const u_int32_t it_tab[256] =
3331     + { i_table };
3332     +#elif defined(FOUR_TABLES)
3333     +static const u_int32_t it_tab[4][256] =
3334     +{ { i_table },
3335     +#undef r
3336     +#define r r1
3337     + { i_table },
3338     +#undef r
3339     +#define r r2
3340     + { i_table },
3341     +#undef r
3342     +#define r r3
3343     + { i_table }
3344     +};
3345     +#endif
3346     +
3347     +#endif
3348     +
3349     +#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
3350     +
3351     +// data for inverse tables (last round)
3352     +
3353     +#define li_table \
3354     + w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
3355     + w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
3356     + w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
3357     + w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
3358     + w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
3359     + w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
3360     + w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
3361     + w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
3362     + w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
3363     + w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
3364     + w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
3365     + w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
3366     + w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
3367     + w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
3368     + w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
3369     + w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
3370     + w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
3371     + w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
3372     + w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
3373     + w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
3374     + w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
3375     + w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
3376     + w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
3377     + w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
3378     + w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
3379     + w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
3380     + w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
3381     + w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
3382     + w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
3383     + w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
3384     + w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
3385     + w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
3386     +
3387     +// generate the required tables in the desired endian format
3388     +
3389     +#undef r
3390     +#define r(p,q,r,s) w0(q)
3391     +#if defined(ONE_LR_TABLE)
3392     +static const u_int32_t fl_tab[256] =
3393     + { f_table };
3394     +#elif defined(FOUR_LR_TABLES)
3395     +static const u_int32_t fl_tab[4][256] =
3396     +{ { f_table },
3397     +#undef r
3398     +#define r(p,q,r,s) w1(q)
3399     + { f_table },
3400     +#undef r
3401     +#define r(p,q,r,s) w2(q)
3402     + { f_table },
3403     +#undef r
3404     +#define r(p,q,r,s) w3(q)
3405     + { f_table }
3406     +};
3407     +#endif
3408     +
3409     +#undef w
3410     +#define w w0
3411     +#if defined(ONE_LR_TABLE)
3412     +static const u_int32_t il_tab[256] =
3413     + { li_table };
3414     +#elif defined(FOUR_LR_TABLES)
3415     +static const u_int32_t il_tab[4][256] =
3416     +{ { li_table },
3417     +#undef w
3418     +#define w w1
3419     + { li_table },
3420     +#undef w
3421     +#define w w2
3422     + { li_table },
3423     +#undef w
3424     +#define w w3
3425     + { li_table }
3426     +};
3427     +#endif
3428     +
3429     +#endif
3430     +
3431     +#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
3432     +
3433     +#define m_table \
3434     + r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
3435     + r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
3436     + r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
3437     + r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
3438     + r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
3439     + r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
3440     + r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
3441     + r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
3442     + r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
3443     + r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
3444     + r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
3445     + r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
3446     + r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
3447     + r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
3448     + r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
3449     + r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
3450     + r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
3451     + r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
3452     + r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
3453     + r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
3454     + r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
3455     + r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
3456     + r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
3457     + r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
3458     + r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
3459     + r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
3460     + r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
3461     + r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
3462     + r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
3463     + r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
3464     + r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
3465     + r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
3466     + r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
3467     + r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
3468     + r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
3469     + r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
3470     + r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
3471     + r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
3472     + r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
3473     + r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
3474     + r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
3475     + r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
3476     + r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
3477     + r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
3478     + r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
3479     + r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
3480     + r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
3481     + r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
3482     + r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
3483     + r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
3484     + r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
3485     + r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
3486     + r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
3487     + r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
3488     + r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
3489     + r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
3490     + r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
3491     + r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
3492     + r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
3493     + r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
3494     + r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
3495     + r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
3496     + r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
3497     + r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
3498     +
3499     +#undef r
3500     +#define r r0
3501     +
3502     +#if defined(ONE_IM_TABLE)
3503     +static const u_int32_t im_tab[256] =
3504     + { m_table };
3505     +#elif defined(FOUR_IM_TABLES)
3506     +static const u_int32_t im_tab[4][256] =
3507     +{ { m_table },
3508     +#undef r
3509     +#define r r1
3510     + { m_table },
3511     +#undef r
3512     +#define r r2
3513     + { m_table },
3514     +#undef r
3515     +#define r r3
3516     + { m_table }
3517     +};
3518     +#endif
3519     +
3520     +#endif
3521     +
3522     +#else
3523     +
3524     +static int tab_gen = 0;
3525     +
3526     +static unsigned char s_box[256]; // the S box
3527     +static unsigned char inv_s_box[256]; // the inverse S box
3528     +static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
3529     +
3530     +#if defined(ONE_TABLE)
3531     +static u_int32_t ft_tab[256];
3532     +static u_int32_t it_tab[256];
3533     +#elif defined(FOUR_TABLES)
3534     +static u_int32_t ft_tab[4][256];
3535     +static u_int32_t it_tab[4][256];
3536     +#endif
3537     +
3538     +#if defined(ONE_LR_TABLE)
3539     +static u_int32_t fl_tab[256];
3540     +static u_int32_t il_tab[256];
3541     +#elif defined(FOUR_LR_TABLES)
3542     +static u_int32_t fl_tab[4][256];
3543     +static u_int32_t il_tab[4][256];
3544     +#endif
3545     +
3546     +#if defined(ONE_IM_TABLE)
3547     +static u_int32_t im_tab[256];
3548     +#elif defined(FOUR_IM_TABLES)
3549     +static u_int32_t im_tab[4][256];
3550     +#endif
3551     +
3552     +// Generate the tables for the dynamic table option
3553     +
3554     +#if !defined(FF_TABLES)
3555     +
3556     +// It will generally be sensible to use tables to compute finite
3557     +// field multiplies and inverses but where memory is scarse this
3558     +// code might sometimes be better.
3559     +
3560     +// return 2 ^ (n - 1) where n is the bit number of the highest bit
3561     +// set in x with x in the range 1 < x < 0x00000200. This form is
3562     +// used so that locals within FFinv can be bytes rather than words
3563     +
3564     +static unsigned char hibit(const u_int32_t x)
3565     +{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
3566     +
3567     + r |= (r >> 2);
3568     + r |= (r >> 4);
3569     + return (r + 1) >> 1;
3570     +}
3571     +
3572     +// return the inverse of the finite field element x
3573     +
3574     +static unsigned char FFinv(const unsigned char x)
3575     +{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
3576     +
3577     + if(x < 2) return x;
3578     +
3579     + for(;;)
3580     + {
3581     + if(!n1) return v1;
3582     +
3583     + while(n2 >= n1)
3584     + {
3585     + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
3586     + }
3587     +
3588     + if(!n2) return v2;
3589     +
3590     + while(n1 >= n2)
3591     + {
3592     + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
3593     + }
3594     + }
3595     +}
3596     +
3597     +// define the finite field multiplies required for Rijndael
3598     +
3599     +#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
3600     +#define FFmul03(x) ((x) ^ FFmul02(x))
3601     +#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
3602     +#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
3603     +#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
3604     +#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
3605     +
3606     +#else
3607     +
3608     +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
3609     +
3610     +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
3611     +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
3612     +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
3613     +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
3614     +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
3615     +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
3616     +
3617     +#endif
3618     +
3619     +// The forward and inverse affine transformations used in the S-box
3620     +
3621     +#define fwd_affine(x) \
3622     + (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
3623     +
3624     +#define inv_affine(x) \
3625     + (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
3626     +
3627     +static void gen_tabs(void)
3628     +{ u_int32_t i, w;
3629     +
3630     +#if defined(FF_TABLES)
3631     +
3632     + unsigned char pow[512], log[256];
3633     +
3634     + // log and power tables for GF(2^8) finite field with
3635     + // 0x011b as modular polynomial - the simplest primitive
3636     + // root is 0x03, used here to generate the tables
3637     +
3638     + i = 0; w = 1;
3639     + do
3640     + {
3641     + pow[i] = (unsigned char)w;
3642     + pow[i + 255] = (unsigned char)w;
3643     + log[w] = (unsigned char)i++;
3644     + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
3645     + }
3646     + while (w != 1);
3647     +
3648     +#endif
3649     +
3650     + for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
3651     + {
3652     + rcon_tab[i] = bytes2word(w, 0, 0, 0);
3653     + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
3654     + }
3655     +
3656     + for(i = 0; i < 256; ++i)
3657     + { unsigned char b;
3658     +
3659     + s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
3660     +
3661     + w = bytes2word(b, 0, 0, 0);
3662     +#if defined(ONE_LR_TABLE)
3663     + fl_tab[i] = w;
3664     +#elif defined(FOUR_LR_TABLES)
3665     + fl_tab[0][i] = w;
3666     + fl_tab[1][i] = upr(w,1);
3667     + fl_tab[2][i] = upr(w,2);
3668     + fl_tab[3][i] = upr(w,3);
3669     +#endif
3670     + w = bytes2word(FFmul02(b), b, b, FFmul03(b));
3671     +#if defined(ONE_TABLE)
3672     + ft_tab[i] = w;
3673     +#elif defined(FOUR_TABLES)
3674     + ft_tab[0][i] = w;
3675     + ft_tab[1][i] = upr(w,1);
3676