/[pkg-loop-aes]/upstream/loop-aes/current/kernel-2.4.36.diff
ViewVC logotype

Contents of /upstream/loop-aes/current/kernel-2.4.36.diff

Parent Directory Parent Directory | Revision Log Revision Log


Revision 877 - (show annotations) (download)
Thu Sep 29 14:03:21 2005 UTC (7 years, 8 months ago) by max
Original Path: loop-aes-source/vendor/current/kernel-2.4.31.diff
File size: 211222 byte(s)
Load /tmp/tmp.vI0lgI/loop-aes-source-3.1d into
loop-aes-source/vendor/current.
1 Before this patch can be applied to kernel, drivers/block/loop.c and
2 include/linux/loop.h source files must be removed:
3
4 rm -f drivers/block/loop.c include/linux/loop.h
5
6 diff -urN linux-2.4.31-noloop/Documentation/Configure.help linux-2.4.31-AES/Documentation/Configure.help
7 --- linux-2.4.31-noloop/Documentation/Configure.help 2005-04-04 19:15:13.000000000 +0300
8 +++ linux-2.4.31-AES/Documentation/Configure.help 2005-08-31 18:32:29.000000000 +0300
9 @@ -622,6 +622,27 @@
10
11 If unsure, say N.
12
13 +AES encrypted loop device support
14 +CONFIG_BLK_DEV_LOOP_AES
15 + If you want to use AES encryption algorithm to encrypt loop devices,
16 + say Y here. If you don't know what to do here, say N.
17 +
18 +loop encryption key scrubbing support
19 +CONFIG_BLK_DEV_LOOP_KEYSCRUB
20 + Loop encryption key scrubbing moves and inverts key bits in
21 + kernel RAM so that the thin oxide which forms the storage
22 + capacitor dielectric of DRAM cells is not permitted to develop
23 + detectable property. For more info, see Peter Gutmann's paper:
24 + http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
25 +
26 + Paranoid tinfoil hat crowd say Y here, everyone else say N.
27 +
28 +VIA padlock hardware AES support
29 +CONFIG_BLK_DEV_LOOP_PADLOCK
30 + If you have VIA processor that supports padlock xcrypt instructions,
31 + say Y here. If enabled, presence of VIA padlock instructions is detected
32 + at run time, but code still works on non-padlock processors too.
33 +
34 ATA/IDE/MFM/RLL support
35 CONFIG_IDE
36 If you say Y here, your kernel will be able to manage low cost mass
37 diff -urN linux-2.4.31-noloop/drivers/block/Config.in linux-2.4.31-AES/drivers/block/Config.in
38 --- linux-2.4.31-noloop/drivers/block/Config.in 2004-08-08 14:15:13.000000000 +0300
39 +++ linux-2.4.31-AES/drivers/block/Config.in 2005-08-31 18:36:34.000000000 +0300
40 @@ -42,6 +42,15 @@
41 dep_tristate 'Promise SATA SX8 support' CONFIG_BLK_DEV_SX8 $CONFIG_PCI
42
43 tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
44 +if [ "$CONFIG_BLK_DEV_LOOP" != "n" ]; then
45 + bool ' AES encrypted loop device support' CONFIG_BLK_DEV_LOOP_AES
46 + bool ' loop encryption key scrubbing support' CONFIG_BLK_DEV_LOOP_KEYSCRUB
47 + if [ "$CONFIG_BLK_DEV_LOOP_AES" = "y" ]; then
48 + if [ "$CONFIG_X86" = "y" -o "$CONFIG_X86_64" = "y" ]; then
49 + bool ' VIA padlock hardware AES support' CONFIG_BLK_DEV_LOOP_PADLOCK
50 + fi
51 + fi
52 +fi
53 dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
54
55 tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
56 diff -urN linux-2.4.31-noloop/drivers/block/loop.c linux-2.4.31-AES/drivers/block/loop.c
57 --- linux-2.4.31-noloop/drivers/block/loop.c 1970-01-01 02:00:00.000000000 +0200
58 +++ linux-2.4.31-AES/drivers/block/loop.c 2005-09-20 19:39:29.000000000 +0300
59 @@ -0,0 +1,2271 @@
60 +/*
61 + * linux/drivers/block/loop.c
62 + *
63 + * Written by Theodore Ts'o, 3/29/93
64 + *
65 + * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
66 + * permitted under the GNU General Public License.
67 + *
68 + * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
69 + * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
70 + *
71 + * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
72 + * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
73 + *
74 + * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
75 + *
76 + * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
77 + *
78 + * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
79 + *
80 + * Loadable modules and other fixes by AK, 1998
81 + *
82 + * Make real block number available to downstream transfer functions, enables
83 + * CBC (and relatives) mode encryption requiring unique IVs per data block.
84 + * Reed H. Petty, rhp@draper.net
85 + *
86 + * Maximum number of loop devices now dynamic via max_loop module parameter.
87 + * Russell Kroll <rkroll@exploits.org> 19990701
88 + *
89 + * Maximum number of loop devices when compiled-in now selectable by passing
90 + * max_loop=<1-255> to the kernel on boot.
91 + * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
92 + *
93 + * Completely rewrite request handling to be make_request_fn style and
94 + * non blocking, pushing work to a helper thread. Lots of fixes from
95 + * Al Viro too.
96 + * Jens Axboe <axboe@suse.de>, Nov 2000
97 + *
98 + * Support up to 256 loop devices
99 + * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
100 + *
101 + * AES transfer added. IV is now passed as (512 byte) sector number.
102 + * Jari Ruusu, May 18 2001
103 + *
104 + * External encryption module locking bug fixed.
105 + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
106 + *
107 + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
108 + * Jari Ruusu, September 2 2001
109 + *
110 + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
111 + * Jari Ruusu, May 23 2002
112 + *
113 + * Backported struct loop_info64 ioctls from 2.6 kernels (64 bit offsets and
114 + * 64 bit sizelimits). Added support for removing offset from IV computations.
115 + * Jari Ruusu, September 21 2003
116 + *
117 + * Added support for MD5 IV computation and multi-key operation.
118 + * Jari Ruusu, October 8 2003
119 + *
120 + *
121 + * Still To Fix:
122 + * - Advisory locking is ignored here.
123 + * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
124 + */
125 +
126 +#include <linux/config.h>
127 +#include <linux/module.h>
128 +
129 +#include <linux/sched.h>
130 +#include <linux/fs.h>
131 +#include <linux/file.h>
132 +#include <linux/stat.h>
133 +#include <linux/errno.h>
134 +#include <linux/major.h>
135 +#include <linux/wait.h>
136 +#include <linux/blk.h>
137 +#include <linux/blkpg.h>
138 +#include <linux/init.h>
139 +#include <linux/devfs_fs_kernel.h>
140 +#include <linux/smp_lock.h>
141 +#include <linux/swap.h>
142 +#include <linux/slab.h>
143 +#include <linux/spinlock.h>
144 +
145 +#include <asm/uaccess.h>
146 +#include <asm/byteorder.h>
147 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
148 +# include <asm/processor.h>
149 +#endif
150 +
151 +#include <linux/loop.h>
152 +#include "../misc/aes.h"
153 +#include "../misc/md5.h"
154 +
155 +#define MAJOR_NR LOOP_MAJOR
156 +
157 +static int max_loop = 8;
158 +static struct loop_device *loop_dev;
159 +static int *loop_sizes;
160 +static int *loop_blksizes;
161 +static int *loop_hardsizes;
162 +static devfs_handle_t devfs_handle; /* For the directory */
163 +
164 +#if defined(__x86_64__) && defined(CONFIG_IA32_EMULATION)
165 +# include <asm/ioctl32.h>
166 +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl)
167 +#endif
168 +#if (defined(__sparc__) || defined(__sparc64__)) && defined(CONFIG_SPARC32_COMPAT)
169 + extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *));
170 + extern int unregister_ioctl32_conversion(unsigned int cmd);
171 + extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
172 +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl)
173 +#endif
174 +
175 +/*
176 + * Transfer functions
177 + */
178 +static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
179 + char *loop_buf, int size, int real_block)
180 +{
181 + /* this code is only called from file backed loop */
182 + /* and that code expects this function to be no-op */
183 +
184 + if (current->need_resched)
185 + {set_current_state(TASK_RUNNING);schedule();}
186 + return 0;
187 +}
188 +
189 +static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
190 + char *loop_buf, int size, int real_block)
191 +{
192 + char *in, *out, *key;
193 + int i, keysize;
194 +
195 + if (cmd == READ) {
196 + in = raw_buf;
197 + out = loop_buf;
198 + } else {
199 + in = loop_buf;
200 + out = raw_buf;
201 + }
202 +
203 + key = lo->lo_encrypt_key;
204 + keysize = lo->lo_encrypt_key_size;
205 + for (i = 0; i < size; i++)
206 + *out++ = *in++ ^ key[(i & 511) % keysize];
207 + if (current->need_resched)
208 + {set_current_state(TASK_RUNNING);schedule();}
209 + return 0;
210 +}
211 +
212 +static int none_status(struct loop_device *lo, struct loop_info *info)
213 +{
214 + return 0;
215 +}
216 +
217 +static int xor_status(struct loop_device *lo, struct loop_info *info)
218 +{
219 + if (info->lo_encrypt_key_size <= 0)
220 + return -EINVAL;
221 + return 0;
222 +}
223 +
224 +struct loop_func_table none_funcs = {
225 + number: LO_CRYPT_NONE,
226 + transfer: transfer_none,
227 + init: none_status,
228 +};
229 +
230 +struct loop_func_table xor_funcs = {
231 + number: LO_CRYPT_XOR,
232 + transfer: transfer_xor,
233 + init: xor_status,
234 +};
235 +
236 +#ifdef CONFIG_BLK_DEV_LOOP_AES
237 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
238 +# define KEY_ALLOC_COUNT 128
239 +#else
240 +# define KEY_ALLOC_COUNT 64
241 +#endif
242 +
243 +typedef struct {
244 + aes_context *keyPtr[KEY_ALLOC_COUNT];
245 + unsigned keyMask;
246 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
247 + u_int32_t *partialMD5;
248 + u_int32_t partialMD5buf[8];
249 + rwlock_t rwlock;
250 + unsigned reversed;
251 + unsigned blocked;
252 + struct timer_list timer;
253 +#else
254 + u_int32_t partialMD5[4];
255 +#endif
256 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
257 + u_int32_t padlock_cw_e;
258 + u_int32_t padlock_cw_d;
259 +#endif
260 +} AESmultiKey;
261 +
262 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
263 +/* This function allocates AES context structures at special address such */
264 +/* that returned address % 16 == 8 . That way expanded encryption and */
265 +/* decryption keys in AES context structure are always 16 byte aligned */
266 +static void *specialAligned_kmalloc(size_t size, unsigned int flags)
267 +{
268 + void *pn, **ps;
269 + pn = kmalloc(size + (16 + 8), flags);
270 + if(!pn) return (void *)0;
271 + ps = (void **)((((unsigned long)pn + 15) & ~((unsigned long)15)) + 8);
272 + *(ps - 1) = pn;
273 + return (void *)ps;
274 +}
275 +static void specialAligned_kfree(void *ps)
276 +{
277 + if(ps) kfree(*((void **)ps - 1));
278 +}
279 +# define specialAligned_ctxSize ((sizeof(aes_context) + 15) & ~15)
280 +#else
281 +# define specialAligned_kmalloc kmalloc
282 +# define specialAligned_kfree kfree
283 +# define specialAligned_ctxSize sizeof(aes_context)
284 +#endif
285 +
286 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
287 +static void keyScrubWork(AESmultiKey *m)
288 +{
289 + aes_context *a0, *a1;
290 + u_int32_t *p;
291 + int x, y, z;
292 +
293 + z = m->keyMask + 1;
294 + for(x = 0; x < z; x++) {
295 + a0 = m->keyPtr[x];
296 + a1 = m->keyPtr[x + z];
297 + memcpy(a1, a0, sizeof(aes_context));
298 + m->keyPtr[x] = a1;
299 + m->keyPtr[x + z] = a0;
300 + p = (u_int32_t *) a0;
301 + y = sizeof(aes_context) / sizeof(u_int32_t);
302 + while(y > 0) {
303 + *p ^= 0xFFFFFFFF;
304 + p++;
305 + y--;
306 + }
307 + }
308 +
309 + x = m->reversed; /* x is 0 or 4 */
310 + m->reversed ^= 4;
311 + y = m->reversed; /* y is 4 or 0 */
312 + p = &m->partialMD5buf[x];
313 + memcpy(&m->partialMD5buf[y], p, 16);
314 + m->partialMD5 = &m->partialMD5buf[y];
315 + p[0] ^= 0xFFFFFFFF;
316 + p[1] ^= 0xFFFFFFFF;
317 + p[2] ^= 0xFFFFFFFF;
318 + p[3] ^= 0xFFFFFFFF;
319 +
320 + /* try to flush dirty cache data to RAM */
321 +#if defined(CONFIG_X86_64) || (defined(CONFIG_X86) && !defined(CONFIG_M386) && !defined(CONFIG_CPU_386))
322 + __asm__ __volatile__ ("wbinvd": : :"memory");
323 +#else
324 + mb();
325 +#endif
326 +}
327 +
328 +/* called only from loop thread process context */
329 +static void keyScrubThreadFn(AESmultiKey *m)
330 +{
331 + write_lock(&m->rwlock);
332 + if(!m->blocked) keyScrubWork(m);
333 + write_unlock(&m->rwlock);
334 +}
335 +
336 +static void keyScrubTimerFn(unsigned long);
337 +
338 +static void keyScrubTimerInit(struct loop_device *lo)
339 +{
340 + AESmultiKey *m;
341 + unsigned long expire;
342 +
343 + m = (AESmultiKey *)lo->key_data;
344 + expire = jiffies + HZ;
345 + init_timer(&m->timer);
346 + m->timer.expires = expire;
347 + m->timer.data = (unsigned long)lo;
348 + m->timer.function = keyScrubTimerFn;
349 + add_timer(&m->timer);
350 +}
351 +
352 +/* called only from timer handler context */
353 +static void keyScrubTimerFn(unsigned long d)
354 +{
355 + struct loop_device *lo = (struct loop_device *)d;
356 + extern void loop_add_keyscrub_fn(struct loop_device *, void (*)(void *), void *);
357 +
358 + /* rw lock needs process context, so make loop thread do scrubbing */
359 + loop_add_keyscrub_fn(lo, (void (*)(void*))keyScrubThreadFn, lo->key_data);
360 + /* start timer again */
361 + keyScrubTimerInit(lo);
362 +}
363 +#endif
364 +
365 +static AESmultiKey *allocMultiKey(void)
366 +{
367 + AESmultiKey *m;
368 + aes_context *a;
369 + int x = 0, n;
370 +
371 + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL);
372 + if(!m) return 0;
373 + memset(m, 0, sizeof(AESmultiKey));
374 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
375 + m->partialMD5 = &m->partialMD5buf[0];
376 + rwlock_init(&m->rwlock);
377 + init_timer(&m->timer);
378 + again:
379 +#endif
380 +
381 + n = PAGE_SIZE / specialAligned_ctxSize;
382 + if(!n) n = 1;
383 +
384 + a = (aes_context *) specialAligned_kmalloc(specialAligned_ctxSize * n, GFP_KERNEL);
385 + if(!a) {
386 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
387 + if(x) specialAligned_kfree(m->keyPtr[0]);
388 +#endif
389 + kfree(m);
390 + return 0;
391 + }
392 +
393 + while((x < KEY_ALLOC_COUNT) && n) {
394 + m->keyPtr[x] = a;
395 + a = (aes_context *)((unsigned char *)a + specialAligned_ctxSize);
396 + x++;
397 + n--;
398 + }
399 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
400 + if(x < 2) goto again;
401 +#endif
402 + return m;
403 +}
404 +
405 +static void clearAndFreeMultiKey(AESmultiKey *m)
406 +{
407 + aes_context *a;
408 + int x, n;
409 +
410 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
411 + /* stop scrub timer. loop thread was killed earlier */
412 + del_timer_sync(&m->timer);
413 + /* make sure allocated keys are in original order */
414 + if(m->reversed) keyScrubWork(m);
415 +#endif
416 + n = PAGE_SIZE / specialAligned_ctxSize;
417 + if(!n) n = 1;
418 +
419 + x = 0;
420 + while(x < KEY_ALLOC_COUNT) {
421 + a = m->keyPtr[x];
422 + if(!a) break;
423 + memset(a, 0, specialAligned_ctxSize * n);
424 + specialAligned_kfree(a);
425 + x += n;
426 + }
427 +
428 + memset(m, 0, sizeof(AESmultiKey));
429 + kfree(m);
430 +}
431 +
432 +static int multiKeySetup(struct loop_device *lo, unsigned char *k, int version3)
433 +{
434 + AESmultiKey *m;
435 + aes_context *a;
436 + int x, y, n, err = 0;
437 + union {
438 + u_int32_t w[16];
439 + unsigned char b[64];
440 + } un;
441 +
442 + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN))
443 + return -EPERM;
444 +
445 + m = (AESmultiKey *)lo->key_data;
446 + if(!m) return -ENXIO;
447 +
448 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
449 + /* temporarily prevent loop thread from messing with keys */
450 + write_lock(&m->rwlock);
451 + m->blocked = 1;
452 + /* make sure allocated keys are in original order */
453 + if(m->reversed) keyScrubWork(m);
454 + write_unlock(&m->rwlock);
455 +#endif
456 + n = PAGE_SIZE / specialAligned_ctxSize;
457 + if(!n) n = 1;
458 +
459 + x = 0;
460 + while(x < KEY_ALLOC_COUNT) {
461 + if(!m->keyPtr[x]) {
462 + a = (aes_context *) specialAligned_kmalloc(specialAligned_ctxSize * n, GFP_KERNEL);
463 + if(!a) {
464 + err = -ENOMEM;
465 + goto error_out;
466 + }
467 + y = x;
468 + while((y < (x + n)) && (y < KEY_ALLOC_COUNT)) {
469 + m->keyPtr[y] = a;
470 + a = (aes_context *)((unsigned char *)a + specialAligned_ctxSize);
471 + y++;
472 + }
473 + }
474 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
475 + if(x >= 64) {
476 + x++;
477 + continue;
478 + }
479 +#endif
480 + if(copy_from_user(&un.b[0], k, 32)) {
481 + err = -EFAULT;
482 + goto error_out;
483 + }
484 + aes_set_key(m->keyPtr[x], &un.b[0], lo->lo_encrypt_key_size, 0);
485 + k += 32;
486 + x++;
487 + }
488 +
489 + m->partialMD5[0] = 0x67452301;
490 + m->partialMD5[1] = 0xefcdab89;
491 + m->partialMD5[2] = 0x98badcfe;
492 + m->partialMD5[3] = 0x10325476;
493 + if(version3) {
494 + /* only first 128 bits of iv-key is used */
495 + if(copy_from_user(&un.b[0], k, 16)) {
496 + err = -EFAULT;
497 + goto error_out;
498 + }
499 +#if defined(__BIG_ENDIAN)
500 + un.w[0] = cpu_to_le32(un.w[0]);
501 + un.w[1] = cpu_to_le32(un.w[1]);
502 + un.w[2] = cpu_to_le32(un.w[2]);
503 + un.w[3] = cpu_to_le32(un.w[3]);
504 +#endif
505 + memset(&un.b[16], 0, 48);
506 + md5_transform_CPUbyteorder(&m->partialMD5[0], &un.w[0]);
507 + lo->lo_flags |= 0x080000; /* multi-key-v3 (info exported to user space) */
508 + }
509 +
510 + m->keyMask = 0x3F; /* range 0...63 */
511 + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */
512 + memset(&un.b[0], 0, 32);
513 +error_out:
514 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
515 + /* re-enable loop thread key scrubbing */
516 + write_lock(&m->rwlock);
517 + m->blocked = 0;
518 + write_unlock(&m->rwlock);
519 +#endif
520 + return err;
521 +}
522 +
523 +void loop_compute_sector_iv(int devSect, u_int32_t *ivout)
524 +{
525 + ivout[0] = cpu_to_le32(devSect);
526 + ivout[3] = ivout[2] = ivout[1] = 0;
527 +}
528 +
529 +void loop_compute_md5_iv_v3(int devSect, u_int32_t *ivout, u_int32_t *data)
530 +{
531 + int x;
532 +#if defined(__BIG_ENDIAN)
533 + int y, e;
534 +#endif
535 + u_int32_t buf[16];
536 +
537 +#if defined(__BIG_ENDIAN)
538 + y = 7;
539 + e = 16;
540 + do {
541 + if (!y) {
542 + e = 12;
543 + /* md5_transform_CPUbyteorder wants data in CPU byte order */
544 + /* devSect is already in CPU byte order -- no need to convert */
545 + /* 32 bits of sector number + 24 zero bits */
546 + buf[12] = devSect;
547 + buf[13] = 0x80000000;
548 + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
549 + /* For version 3 on-disk format this really should be 4536 bits, but can't be */
550 + /* changed without breaking compatibility. V3 uses MD5-with-wrong-length IV */
551 + buf[14] = 4024;
552 + buf[15] = 0;
553 + }
554 + x = 0;
555 + do {
556 + buf[x ] = cpu_to_le32(data[0]);
557 + buf[x + 1] = cpu_to_le32(data[1]);
558 + buf[x + 2] = cpu_to_le32(data[2]);
559 + buf[x + 3] = cpu_to_le32(data[3]);
560 + x += 4;
561 + data += 4;
562 + } while (x < e);
563 + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
564 + } while (--y >= 0);
565 + ivout[0] = cpu_to_le32(ivout[0]);
566 + ivout[1] = cpu_to_le32(ivout[1]);
567 + ivout[2] = cpu_to_le32(ivout[2]);
568 + ivout[3] = cpu_to_le32(ivout[3]);
569 +#else
570 + x = 6;
571 + do {
572 + md5_transform_CPUbyteorder(&ivout[0], data);
573 + data += 16;
574 + } while (--x >= 0);
575 + memcpy(buf, data, 48);
576 + /* md5_transform_CPUbyteorder wants data in CPU byte order */
577 + /* devSect is already in CPU byte order -- no need to convert */
578 + /* 32 bits of sector number + 24 zero bits */
579 + buf[12] = devSect;
580 + buf[13] = 0x80000000;
581 + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
582 + /* For version 3 on-disk format this really should be 4536 bits, but can't be */
583 + /* changed without breaking compatibility. V3 uses MD5-with-wrong-length IV */
584 + buf[14] = 4024;
585 + buf[15] = 0;
586 + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
587 +#endif
588 +}
589 +
590 +/* this function exists for compatibility with old external cipher modules */
591 +void loop_compute_md5_iv(int devSect, u_int32_t *ivout, u_int32_t *data)
592 +{
593 + ivout[0] = 0x67452301;
594 + ivout[1] = 0xefcdab89;
595 + ivout[2] = 0x98badcfe;
596 + ivout[3] = 0x10325476;
597 + loop_compute_md5_iv_v3(devSect, ivout, data);
598 +}
599 +
600 +/* Some external modules do not know if md5_transform_CPUbyteorder() */
601 +/* is asmlinkage or not, so here is C language wrapper for them. */
602 +void md5_transform_CPUbyteorder_C(u_int32_t *hash, u_int32_t const *in)
603 +{
604 + md5_transform_CPUbyteorder(hash, in);
605 +}
606 +
607 +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
608 + char *loop_buf, int size, int devSect)
609 +{
610 + aes_context *a;
611 + AESmultiKey *m;
612 + int x;
613 + unsigned y;
614 + u_int32_t iv[8];
615 +
616 + if(!size || (size & 511)) {
617 + return -EINVAL;
618 + }
619 + m = (AESmultiKey *)lo->key_data;
620 + y = m->keyMask;
621 + if(cmd == READ) {
622 + while(size) {
623 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
624 + read_lock(&m->rwlock);
625 +#endif
626 + a = m->keyPtr[((unsigned)devSect) & y];
627 + if(y) {
628 + memcpy(&iv[0], raw_buf, 16);
629 + raw_buf += 16;
630 + loop_buf += 16;
631 + } else {
632 + loop_compute_sector_iv(devSect, &iv[0]);
633 + }
634 + x = 15;
635 + do {
636 + memcpy(&iv[4], raw_buf, 16);
637 + aes_decrypt(a, raw_buf, loop_buf);
638 + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0];
639 + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1];
640 + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2];
641 + *((u_int32_t *)(&loop_buf[12])) ^= iv[3];
642 + if(y && !x) {
643 + raw_buf -= 496;
644 + loop_buf -= 496;
645 + memcpy(&iv[4], &m->partialMD5[0], 16);
646 + loop_compute_md5_iv_v3(devSect, &iv[4], (u_int32_t *)(&loop_buf[16]));
647 + } else {
648 + raw_buf += 16;
649 + loop_buf += 16;
650 + memcpy(&iv[0], raw_buf, 16);
651 + }
652 + aes_decrypt(a, raw_buf, loop_buf);
653 + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4];
654 + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5];
655 + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6];
656 + *((u_int32_t *)(&loop_buf[12])) ^= iv[7];
657 + if(y && !x) {
658 + raw_buf += 512;
659 + loop_buf += 512;
660 + } else {
661 + raw_buf += 16;
662 + loop_buf += 16;
663 + }
664 + } while(--x >= 0);
665 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
666 + read_unlock(&m->rwlock);
667 +#endif
668 + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
669 + size -= 512;
670 + devSect++;
671 + }
672 + } else {
673 + while(size) {
674 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
675 + read_lock(&m->rwlock);
676 +#endif
677 + a = m->keyPtr[((unsigned)devSect) & y];
678 + if(y) {
679 + /* on 2.4 and later kernels, real raw_buf is not doing */
680 + /* any writes now so it can be used as temp buffer */
681 + memcpy(raw_buf, loop_buf, 512);
682 + memcpy(&iv[0], &m->partialMD5[0], 16);
683 + loop_compute_md5_iv_v3(devSect, &iv[0], (u_int32_t *)(&raw_buf[16]));
684 + x = 15;
685 + do {
686 + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
687 + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
688 + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
689 + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
690 + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
691 + memcpy(&iv[0], raw_buf, 16);
692 + raw_buf += 16;
693 + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
694 + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
695 + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
696 + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
697 + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
698 + memcpy(&iv[0], raw_buf, 16);
699 + raw_buf += 16;
700 + } while(--x >= 0);
701 + loop_buf += 512;
702 + } else {
703 + loop_compute_sector_iv(devSect, &iv[0]);
704 + x = 15;
705 + do {
706 + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
707 + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
708 + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
709 + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
710 + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
711 + memcpy(&iv[0], raw_buf, 16);
712 + loop_buf += 16;
713 + raw_buf += 16;
714 + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
715 + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
716 + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
717 + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
718 + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
719 + memcpy(&iv[0], raw_buf, 16);
720 + loop_buf += 16;
721 + raw_buf += 16;
722 + } while(--x >= 0);
723 + }
724 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
725 + read_unlock(&m->rwlock);
726 +#endif
727 + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
728 + size -= 512;
729 + devSect++;
730 + }
731 + }
732 + return(0);
733 +}
734 +
735 +static int keySetup_aes(struct loop_device *lo, struct loop_info *info)
736 +{
737 + AESmultiKey *m;
738 + union {
739 + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
740 + unsigned char b[32];
741 + } un;
742 +
743 + lo->key_data = m = allocMultiKey();
744 + if(!m) return(-ENOMEM);
745 + memcpy(&un.b[0], &info->lo_encrypt_key[0], 32);
746 + aes_set_key(m->keyPtr[0], &un.b[0], info->lo_encrypt_key_size, 0);
747 + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
748 + memset(&un.b[0], 0, 32);
749 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
750 + switch(info->lo_encrypt_key_size) {
751 + case 256: /* bits */
752 + case 32: /* bytes */
753 + /* 14 rounds, AES, software key gen, normal oper, encrypt, 256-bit key */
754 + m->padlock_cw_e = 14 | (1<<7) | (2<<10);
755 + /* 14 rounds, AES, software key gen, normal oper, decrypt, 256-bit key */
756 + m->padlock_cw_d = 14 | (1<<7) | (1<<9) | (2<<10);
757 + break;
758 + case 192: /* bits */
759 + case 24: /* bytes */
760 + /* 12 rounds, AES, software key gen, normal oper, encrypt, 192-bit key */
761 + m->padlock_cw_e = 12 | (1<<7) | (1<<10);
762 + /* 12 rounds, AES, software key gen, normal oper, decrypt, 192-bit key */
763 + m->padlock_cw_d = 12 | (1<<7) | (1<<9) | (1<<10);
764 + break;
765 + default:
766 + /* 10 rounds, AES, software key gen, normal oper, encrypt, 128-bit key */
767 + m->padlock_cw_e = 10 | (1<<7);
768 + /* 10 rounds, AES, software key gen, normal oper, decrypt, 128-bit key */
769 + m->padlock_cw_d = 10 | (1<<7) | (1<<9);
770 + break;
771 + }
772 +#endif
773 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
774 + keyScrubTimerInit(lo);
775 +#endif
776 + return(0);
777 +}
778 +
779 +static int keyClean_aes(struct loop_device *lo)
780 +{
781 + if(lo->key_data) {
782 + clearAndFreeMultiKey((AESmultiKey *)lo->key_data);
783 + lo->key_data = 0;
784 + }
785 + return(0);
786 +}
787 +
788 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
789 +static __inline__ void padlock_flush_key_context(void)
790 +{
791 + __asm__ __volatile__("pushf; popf" : : : "cc");
792 +}
793 +
794 +static __inline__ void padlock_rep_xcryptcbc(void *cw, void *k, void *s, void *d, void *iv, unsigned long cnt)
795 +{
796 + __asm__ __volatile__(".byte 0xF3,0x0F,0xA7,0xD0"
797 + : "+a" (iv), "+c" (cnt), "+S" (s), "+D" (d) /*output*/
798 + : "b" (k), "d" (cw) /*input*/
799 + : "cc", "memory" /*modified*/ );
800 +}
801 +
802 +typedef struct {
803 + u_int32_t iv[4];
804 + u_int32_t cw[4];
805 + u_int32_t dummy1[4];
806 +} Padlock_IV_CW;
807 +
808 +static int transfer_padlock_aes(struct loop_device *lo, int cmd, char *raw_buf,
809 + char *loop_buf, int size, int devSect)
810 +{
811 + aes_context *a;
812 + AESmultiKey *m;
813 + unsigned y;
814 + Padlock_IV_CW ivcwua;
815 + Padlock_IV_CW *ivcw;
816 +
817 + /* ivcw->iv and ivcw->cw must have 16 byte alignment */
818 + ivcw = (Padlock_IV_CW *)(((unsigned long)&ivcwua + 15) & ~((unsigned long)15));
819 +
820 + if(!size || (size & 511) || (((unsigned long)raw_buf | (unsigned long)loop_buf) & 15)) {
821 + return -EINVAL;
822 + }
823 + m = (AESmultiKey *)lo->key_data;
824 + y = m->keyMask;
825 + if(cmd == READ) {
826 + while(size) {
827 + padlock_flush_key_context();
828 + ivcw->cw[0] = m->padlock_cw_d;
829 + ivcw->cw[3] = ivcw->cw[2] = ivcw->cw[1] = 0;
830 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
831 + read_lock(&m->rwlock);
832 +#endif
833 + a = m->keyPtr[((unsigned)devSect) & y];
834 + if(y) {
835 + memcpy(&ivcw->iv[0], raw_buf, 16);
836 + padlock_rep_xcryptcbc(&ivcw->cw[0], &a->aes_d_key[0], raw_buf + 16, loop_buf + 16, &ivcw->iv[0], 31);
837 + memcpy(&ivcw->iv[0], &m->partialMD5[0], 16);
838 + loop_compute_md5_iv_v3(devSect, &ivcw->iv[0], (u_int32_t *)(&loop_buf[16]));
839 + padlock_rep_xcryptcbc(&ivcw->cw[0], &a->aes_d_key[0], raw_buf, loop_buf, &ivcw->iv[0], 1);
840 + } else {
841 + loop_compute_sector_iv(devSect, &ivcw->iv[0]);
842 + padlock_rep_xcryptcbc(&ivcw->cw[0], &a->aes_d_key[0], raw_buf, loop_buf, &ivcw->iv[0], 32);
843 + }
844 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
845 + read_unlock(&m->rwlock);
846 +#endif
847 +#if LINUX_VERSION_CODE >= 0x20600
848 + cond_resched();
849 +#else
850 + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
851 +#endif
852 + size -= 512;
853 + raw_buf += 512;
854 + loop_buf += 512;
855 + devSect++;
856 + }
857 + } else {
858 + while(size) {
859 + padlock_flush_key_context();
860 + ivcw->cw[0] = m->padlock_cw_e;
861 + ivcw->cw[3] = ivcw->cw[2] = ivcw->cw[1] = 0;
862 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
863 + read_lock(&m->rwlock);
864 +#endif
865 + a = m->keyPtr[((unsigned)devSect) & y];
866 + if(y) {
867 + memcpy(raw_buf, loop_buf, 512);
868 + memcpy(&ivcw->iv[0], &m->partialMD5[0], 16);
869 + loop_compute_md5_iv_v3(devSect, &ivcw->iv[0], (u_int32_t *)(&raw_buf[16]));
870 + padlock_rep_xcryptcbc(&ivcw->cw[0], &a->aes_e_key[0], raw_buf, raw_buf, &ivcw->iv[0], 32);
871 + } else {
872 + loop_compute_sector_iv(devSect, &ivcw->iv[0]);
873 + padlock_rep_xcryptcbc(&ivcw->cw[0], &a->aes_e_key[0], loop_buf, raw_buf, &ivcw->iv[0], 32);
874 + }
875 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
876 + read_unlock(&m->rwlock);
877 +#endif
878 +#if LINUX_VERSION_CODE >= 0x20600
879 + cond_resched();
880 +#else
881 + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
882 +#endif
883 + size -= 512;
884 + raw_buf += 512;
885 + loop_buf += 512;
886 + devSect++;
887 + }
888 + }
889 + return(0);
890 +}
891 +#endif
892 +
893 +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg)
894 +{
895 + int err;
896 +
897 + switch (cmd) {
898 + case LOOP_MULTI_KEY_SETUP:
899 + err = multiKeySetup(lo, (unsigned char *)arg, 0);
900 + break;
901 + case LOOP_MULTI_KEY_SETUP_V3:
902 + err = multiKeySetup(lo, (unsigned char *)arg, 1);
903 + break;
904 + default:
905 + err = -EINVAL;
906 + }
907 + return err;
908 +}
909 +
910 +static struct loop_func_table funcs_aes = {
911 + number: 16, /* 16 == AES */
912 + transfer: transfer_aes,
913 + init: keySetup_aes,
914 + release: keyClean_aes,
915 + ioctl: handleIoctl_aes
916 +};
917 +
918 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
919 +static struct loop_func_table funcs_padlock_aes = {
920 + number: 16, /* 16 == AES */
921 + transfer: transfer_padlock_aes,
922 + init: keySetup_aes,
923 + release: keyClean_aes,
924 + ioctl: handleIoctl_aes
925 +};
926 +#endif
927 +
928 +EXPORT_SYMBOL(loop_compute_sector_iv);
929 +EXPORT_SYMBOL(loop_compute_md5_iv_v3);
930 +EXPORT_SYMBOL(loop_compute_md5_iv);
931 +EXPORT_SYMBOL(md5_transform_CPUbyteorder_C);
932 +#endif /* CONFIG_BLK_DEV_LOOP_AES */
933 +
934 +/* xfer_funcs[0] is special - its release function is never called */
935 +struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
936 + &none_funcs,
937 + &xor_funcs,
938 +#ifdef CONFIG_BLK_DEV_LOOP_AES
939 + [LO_CRYPT_AES] = &funcs_aes,
940 +#endif
941 +};
942 +
943 +/*
944 + * First number of 'lo_prealloc' is the default number of RAM pages
945 + * to pre-allocate for each device backed loop. Every (configured)
946 + * device backed loop pre-allocates this amount of RAM pages unless
947 + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
948 + * overrides are defined in pairs: loop_index,number_of_pages
949 + */
950 +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 };
951 +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
952 +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
953 +
954 +#ifdef MODULE
955 +MODULE_PARM(lo_prealloc, "1-9i");
956 +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
957 +#else
958 +static int __init lo_prealloc_setup(char *str)
959 +{
960 + int x, y, z;
961 +
962 + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
963 + z = get_option(&str, &y);
964 + if (z > 0)
965 + lo_prealloc[x] = y;
966 + if (z < 2)
967 + break;
968 + }
969 + return 1;
970 +}
971 +__setup("lo_prealloc=", lo_prealloc_setup);
972 +#endif
973 +
974 +/*
975 + * This is loop helper thread nice value in range
976 + * from 0 (low priority) to -20 (high priority).
977 + */
978 +#if defined(DEF_NICE) && defined(DEF_COUNTER)
979 +static int lo_nice = -20; /* old scheduler default */
980 +#else
981 +static int lo_nice = -1; /* O(1) scheduler default */
982 +#endif
983 +
984 +#ifdef MODULE
985 +MODULE_PARM(lo_nice, "1i");
986 +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
987 +#else
988 +static int __init lo_nice_setup(char *str)
989 +{
990 + int y;
991 +
992 + if (get_option(&str, &y) == 1)
993 + lo_nice = y;
994 + return 1;
995 +}
996 +__setup("lo_nice=", lo_nice_setup);
997 +#endif
998 +
999 +typedef struct {
1000 + struct buffer_head **q0;
1001 + struct buffer_head **q1;
1002 + struct buffer_head **q2;
1003 + int x0;
1004 + int x1;
1005 + int x2;
1006 +} que_look_up_table;
1007 +
1008 +static void loop_prealloc_cleanup(struct loop_device *lo)
1009 +{
1010 + struct buffer_head *bh;
1011 +
1012 + while ((bh = lo->lo_bh_free)) {
1013 + __free_page(bh->b_page);
1014 + lo->lo_bh_free = bh->b_reqnext;
1015 + bh->b_reqnext = NULL;
1016 + kmem_cache_free(bh_cachep, bh);
1017 + }
1018 +}
1019 +
1020 +static int loop_prealloc_init(struct loop_device *lo, int y)
1021 +{
1022 + struct buffer_head *bh;
1023 + int x;
1024 +
1025 + if(!y) {
1026 + y = lo_prealloc[0];
1027 + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
1028 + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
1029 + y = lo_prealloc[x + 1];
1030 + break;
1031 + }
1032 + }
1033 + }
1034 + lo->lo_bh_flsh = (y * 3) / 4;
1035 +
1036 + for (x = 0; x < y; x++) {
1037 + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
1038 + if (!bh) {
1039 + loop_prealloc_cleanup(lo);
1040 + return 1;
1041 + }
1042 + bh->b_page = alloc_page(GFP_KERNEL);
1043 + if (!bh->b_page) {
1044 + bh->b_reqnext = NULL;
1045 + kmem_cache_free(bh_cachep, bh);
1046 + loop_prealloc_cleanup(lo);
1047 + return 1;
1048 + }
1049 + bh->b_reqnext = lo->lo_bh_free;
1050 + lo->lo_bh_free = bh;
1051 + }
1052 + return 0;
1053 +}
1054 +
1055 +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
1056 +{
1057 + unsigned long flags;
1058 +
1059 + spin_lock_irqsave(&lo->lo_lock, flags);
1060 + if (*q) {
1061 + bh->b_reqnext = (*q)->b_reqnext;
1062 + (*q)->b_reqnext = bh;
1063 + } else {
1064 + bh->b_reqnext = bh;
1065 + }
1066 + *q = bh;
1067 + spin_unlock_irqrestore(&lo->lo_lock, flags);
1068 +
1069 + if (waitqueue_active(&lo->lo_bh_wait))
1070 + wake_up_interruptible(&lo->lo_bh_wait);
1071 +}
1072 +
1073 +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
1074 +{
1075 + spin_lock_irq(&lo->lo_lock);
1076 + if (*q) {
1077 + bh->b_reqnext = (*q)->b_reqnext;
1078 + (*q)->b_reqnext = bh;
1079 + } else {
1080 + bh->b_reqnext = bh;
1081 + *q = bh;
1082 + }
1083 + spin_unlock_irq(&lo->lo_lock);
1084 +}
1085 +
1086 +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr,
1087 + que_look_up_table *qt)
1088 +{
1089 + struct buffer_head *bh = NULL, *last;
1090 +
1091 + spin_lock_irq(&lo->lo_lock);
1092 + if ((last = *qt->q0)) {
1093 + bh = last->b_reqnext;
1094 + if (bh == last)
1095 + *qt->q0 = NULL;
1096 + else
1097 + last->b_reqnext = bh->b_reqnext;
1098 + bh->b_reqnext = NULL;
1099 + *list_nr = qt->x0;
1100 + } else if ((last = *qt->q1)) {
1101 + bh = last->b_reqnext;
1102 + if (bh == last)
1103 + *qt->q1 = NULL;
1104 + else
1105 + last->b_reqnext = bh->b_reqnext;
1106 + bh->b_reqnext = NULL;
1107 + *list_nr = qt->x1;
1108 + } else if ((last = *qt->q2)) {
1109 + bh = last->b_reqnext;
1110 + if (bh == last)
1111 + *qt->q2 = NULL;
1112 + else
1113 + last->b_reqnext = bh->b_reqnext;
1114 + bh->b_reqnext = NULL;
1115 + *list_nr = qt->x2;
1116 + }
1117 + spin_unlock_irq(&lo->lo_lock);
1118 + return bh;
1119 +}
1120 +
1121 +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b)
1122 +{
1123 + unsigned long flags;
1124 + int wk;
1125 +
1126 + spin_lock_irqsave(&lo->lo_lock, flags);
1127 + b->b_reqnext = lo->lo_bh_free;
1128 + lo->lo_bh_free = b;
1129 + wk = lo->lo_bh_need;
1130 + spin_unlock_irqrestore(&lo->lo_lock, flags);
1131 +
1132 + if (wk && waitqueue_active(&lo->lo_bh_wait))
1133 + wake_up_interruptible(&lo->lo_bh_wait);
1134 +}
1135 +
1136 +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate)
1137 +{
1138 + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
1139 + struct buffer_head *rbh = bh->b_private;
1140 +
1141 + rbh->b_reqnext = NULL;
1142 + rbh->b_end_io(rbh, uptodate);
1143 + loop_put_buffer(lo, bh);
1144 + if (atomic_dec_and_test(&lo->lo_pending))
1145 + wake_up_interruptible(&lo->lo_bh_wait);
1146 +}
1147 +
1148 +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate)
1149 +{
1150 + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
1151 +
1152 + if (!uptodate)
1153 + loop_end_io_transfer_wr(bh, uptodate);
1154 + else
1155 + loop_add_queue_last(lo, bh, &lo->lo_bh_que0);
1156 +}
1157 +
1158 +static struct buffer_head *loop_get_buffer(struct loop_device *lo,
1159 + struct buffer_head *rbh, int from_thread, int rw)
1160 +{
1161 + struct buffer_head *bh;
1162 + struct page *p;
1163 + unsigned long flags;
1164 +
1165 + spin_lock_irqsave(&lo->lo_lock, flags);
1166 + bh = lo->lo_bh_free;
1167 + if (bh) {
1168 + lo->lo_bh_free = bh->b_reqnext;
1169 + if (from_thread)
1170 + lo->lo_bh_need = 0;
1171 + } else {
1172 + if (from_thread)
1173 + lo->lo_bh_need = 1;
1174 + }
1175 + spin_unlock_irqrestore(&lo->lo_lock, flags);
1176 + if (!bh)
1177 + return (struct buffer_head *)0;
1178 +
1179 + p = bh->b_page;
1180 + memset(bh, 0, sizeof(struct buffer_head));
1181 + bh->b_page = p;
1182 +
1183 + bh->b_private = rbh;
1184 + bh->b_size = rbh->b_size;
1185 + bh->b_dev = rbh->b_rdev;
1186 + bh->b_rdev = lo->lo_device;
1187 + bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
1188 + bh->b_data = page_address(bh->b_page);
1189 + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd;
1190 + bh->b_rsector = rbh->b_rsector + lo->lo_offs_sec;
1191 + init_waitqueue_head(&bh->b_wait);
1192 +
1193 + return bh;
1194 +}
1195 +
1196 +static int figure_loop_size(struct loop_device *lo)
1197 +{
1198 + loff_t size, offs;
1199 + unsigned int x;
1200 + int err = 0;
1201 + kdev_t lodev = lo->lo_device;
1202 +
1203 + offs = lo->lo_offset;
1204 + if (S_ISREG(lo->lo_backing_file->f_dentry->d_inode->i_mode)) {
1205 + size = lo->lo_backing_file->f_dentry->d_inode->i_size;
1206 + } else {
1207 + offs &= ~((loff_t)511);
1208 + if (blk_size[MAJOR(lodev)])
1209 + size = (loff_t)(blk_size[MAJOR(lodev)][MINOR(lodev)]) << BLOCK_SIZE_BITS;
1210 + else
1211 + size = 1024*1024*1024; /* unknown size */
1212 + }
1213 + if ((offs > 0) && (offs < size)) {
1214 + size -= offs;
1215 + } else {
1216 + if (offs)
1217 + err = -EINVAL;
1218 + lo->lo_offset = 0;
1219 + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1220 + }
1221 + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
1222 + size = lo->lo_sizelimit;
1223 + } else {
1224 + if (lo->lo_sizelimit)
1225 + err = -EINVAL;
1226 + lo->lo_sizelimit = 0;
1227 + }
1228 + size >>= BLOCK_SIZE_BITS;
1229 +
1230 + /*
1231 + * Unfortunately, if we want to do I/O on the device,
1232 + * the number of 1024-byte blocks has to fit into unsigned int
1233 + */
1234 + x = (unsigned int)size;
1235 + if ((loff_t)x != size) {
1236 + err = -EFBIG;
1237 + size = 0;
1238 + }
1239 +
1240 + loop_sizes[lo->lo_number] = size;
1241 + return err;
1242 +}
1243 +
1244 +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
1245 +{
1246 + mm_segment_t fs;
1247 + int x, y, z;
1248 +
1249 + y = 0;
1250 + do {
1251 + z = size - y;
1252 + fs = get_fs();
1253 + set_fs(get_ds());
1254 + if (w) {
1255 + x = file->f_op->write(file, buf + y, z, ppos);
1256 + set_fs(fs);
1257 + } else {
1258 + x = file->f_op->read(file, buf + y, z, ppos);
1259 + set_fs(fs);
1260 + if (!x)
1261 + return 1;
1262 + }
1263 + if (x < 0) {
1264 + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
1265 + run_task_queue(&tq_disk);
1266 + set_current_state(TASK_INTERRUPTIBLE);
1267 + schedule_timeout(HZ / 2);
1268 + continue;
1269 + }
1270 + return 1;
1271 + }
1272 + y += x;
1273 + } while (y < size);
1274 + return 0;
1275 +}
1276 +
1277 +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
1278 +{
1279 + loff_t pos;
1280 + struct file *file = lo->lo_backing_file;
1281 + char *data, *buf;
1282 + unsigned int size, len;
1283 + unsigned long IV;
1284 +
1285 + pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
1286 + buf = page_address(lo->lo_bh_free->b_page);
1287 + len = bh->b_size;
1288 + data = bh_kmap(bh);
1289 + IV = bh->b_rsector;
1290 + if (!lo->lo_iv_remove)
1291 + IV += lo->lo_offs_sec;
1292 + while (len > 0) {
1293 + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
1294 + /* this code relies that NONE transfer is a no-op */
1295 + buf = data;
1296 + }
1297 + size = PAGE_SIZE;
1298 + if (size > len)
1299 + size = len;
1300 + if (rw == WRITE) {
1301 + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
1302 + printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV);
1303 + goto kunmap_and_out;
1304 + }
1305 + if (loop_file_io(file, buf, size, &pos, 1)) {
1306 + printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV);
1307 + goto kunmap_and_out;
1308 + }
1309 + } else {
1310 + if (loop_file_io(file, buf, size, &pos, 0)) {
1311 + printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV);
1312 + goto kunmap_and_out;
1313 + }
1314 + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
1315 + printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV);
1316 + goto kunmap_and_out;
1317 + }
1318 + flush_dcache_page(bh->b_page);
1319 + }
1320 + data += size;
1321 + len -= size;
1322 + IV += size >> 9;
1323 + }
1324 + bh_kunmap(bh);
1325 + return 0;
1326 +
1327 +kunmap_and_out:
1328 + bh_kunmap(bh);
1329 + return 1;
1330 +}
1331 +
1332 +static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
1333 +{
1334 + struct buffer_head *bh;
1335 + struct loop_device *lo;
1336 + char *md;
1337 +
1338 + set_current_state(TASK_RUNNING);
1339 + if (!buffer_locked(rbh))
1340 + BUG();
1341 +
1342 + if (MINOR(rbh->b_rdev) >= max_loop)
1343 + goto out;
1344 +
1345 + lo = &loop_dev[MINOR(rbh->b_rdev)];
1346 + spin_lock_irq(&lo->lo_lock);
1347 + if (lo->lo_state != Lo_bound)
1348 + goto inactive;
1349 + atomic_inc(&lo->lo_pending);
1350 + spin_unlock_irq(&lo->lo_lock);
1351 +
1352 + if (rw == WRITE) {
1353 + if (lo->lo_flags & LO_FLAGS_READ_ONLY)
1354 + goto err;
1355 + } else if (rw == READA) {
1356 + rw = READ;
1357 + } else if (rw != READ) {
1358 + printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw);
1359 + goto err;
1360 + }
1361 +
1362 + /*
1363 + * file backed, queue for loop_thread to handle
1364 + */
1365 + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1366 + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que0);
1367 + return 0;
1368 + }
1369 +
1370 + /*
1371 + * device backed, just remap rdev & rsector for NONE transfer
1372 + */
1373 + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
1374 + rbh->b_rsector += lo->lo_offs_sec;
1375 + rbh->b_rdev = lo->lo_device;
1376 + generic_make_request(rw, rbh);
1377 + if (atomic_dec_and_test(&lo->lo_pending))
1378 + wake_up_interruptible(&lo->lo_bh_wait);
1379 + return 0;
1380 + }
1381 +
1382 + /*
1383 + * device backed, start reads and writes now if buffer available
1384 + */
1385 + bh = loop_get_buffer(lo, rbh, 0, rw);
1386 + if (!bh) {
1387 + /* just queue request and let thread handle alloc later */
1388 + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
1389 + return 0;
1390 + }
1391 + if (rw == WRITE) {
1392 + int trv;
1393 + md = bh_kmap(rbh);
1394 + trv = lo_do_transfer(lo, WRITE, bh->b_data, md, bh->b_size, bh->b_rsector - lo->lo_iv_remove);
1395 + bh_kunmap(rbh);
1396 + if (trv) {
1397 + loop_put_buffer(lo, bh);
1398 + goto err;
1399 + }
1400 + }
1401 + generic_make_request(rw, bh);
1402 + return 0;
1403 +
1404 +err:
1405 + if (atomic_dec_and_test(&lo->lo_pending))
1406 + wake_up_interruptible(&lo->lo_bh_wait);
1407 +out:
1408 + buffer_IO_error(rbh);
1409 + return 0;
1410 +inactive:
1411 + spin_unlock_irq(&lo->lo_lock);
1412 + goto out;
1413 +}
1414 +
1415 +/*
1416 + * worker thread that handles reads/writes to file backed loop devices,
1417 + * to avoid blocking in our make_request_fn. it also does loop decrypting
1418 + * on reads for block backed loop, as that is too heavy to do from
1419 + * b_end_io context where irqs may be disabled.
1420 + */
1421 +static int loop_thread(void *data)
1422 +{
1423 + struct loop_device *lo = data;
1424 + struct buffer_head *bh, *xbh;
1425 + int x, rw, qi = 0, flushcnt = 0;
1426 + wait_queue_t waitq;
1427 + que_look_up_table qt[4] = {
1428 + { &lo->lo_bh_que0, &lo->lo_bh_que1, &lo->lo_bh_que2, 0, 1, 2 },
1429 + { &lo->lo_bh_que2, &lo->lo_bh_que0, &lo->lo_bh_que1, 2, 0, 1 },
1430 + { &lo->lo_bh_que0, &lo->lo_bh_que2, &lo->lo_bh_que1, 0, 2, 1 },
1431 + { &lo->lo_bh_que1, &lo->lo_bh_que0, &lo->lo_bh_que2, 1, 0, 2 }
1432 + };
1433 + char *md;
1434 + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
1435 +
1436 + init_waitqueue_entry(&waitq, current);
1437 + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
1438 + daemonize();
1439 + exit_files(current);
1440 + reparent_to_init();
1441 +
1442 + sprintf(current->comm, "loop%d", lo->lo_number);
1443 +
1444 + spin_lock_irq(&current->sigmask_lock);
1445 + sigfillset(&current->blocked);
1446 + flush_signals(current);
1447 + spin_unlock_irq(&current->sigmask_lock);
1448 +
1449 + if (lo_nice > 0)
1450 + lo_nice = 0;
1451 + if (lo_nice < -20)
1452 + lo_nice = -20;
1453 +#if defined(DEF_NICE) && defined(DEF_COUNTER)
1454 + /* old scheduler syntax */
1455 + current->policy = SCHED_OTHER;
1456 + current->nice = lo_nice;
1457 +#else
1458 + /* O(1) scheduler syntax */
1459 + set_user_nice(current, lo_nice);
1460 +#endif
1461 +
1462 + spin_lock_irq(&lo->lo_lock);
1463 + lo->lo_state = Lo_bound;
1464 + atomic_inc(&lo->lo_pending);
1465 + spin_unlock_irq(&lo->lo_lock);
1466 +
1467 + current->flags |= PF_NOIO;
1468 +#if defined(PF_NOFREEZE)
1469 + current->flags |= PF_NOFREEZE;
1470 +#elif defined(PF_IOTHREAD)
1471 + current->flags |= PF_IOTHREAD;
1472 +#endif
1473 +
1474 + /*
1475 + * up sem, we are running
1476 + */
1477 + up(&lo->lo_sem);
1478 +
1479 + for (;;) {
1480 + add_wait_queue(&lo->lo_bh_wait, &waitq);
1481 + for (;;) {
1482 + set_current_state(TASK_INTERRUPTIBLE);
1483 + if (!atomic_read(&lo->lo_pending))
1484 + break;
1485 +
1486 + x = 0;
1487 + spin_lock_irq(&lo->lo_lock);
1488 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
1489 + if(lo->lo_keyscrub_fn) x = 1;
1490 +#endif
1491 + if (lo->lo_bh_que0) {
1492 + x = 1;
1493 + } else if (lo->lo_bh_que1 || lo->lo_bh_que2) {
1494 + /* file backed works too because lo->lo_bh_need == 0 */
1495 + if (lo->lo_bh_free || !lo->lo_bh_need)
1496 + x = 1;
1497 + }
1498 + spin_unlock_irq(&lo->lo_lock);
1499 + if (x)
1500 + break;
1501 +
1502 + schedule();
1503 + }
1504 + set_current_state(TASK_RUNNING);
1505 + remove_wait_queue(&lo->lo_bh_wait, &waitq);
1506 +
1507 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
1508 + if(lo->lo_keyscrub_fn) {
1509 + (*lo->lo_keyscrub_fn)(lo->lo_keyscrub_ptr);
1510 + lo->lo_keyscrub_fn = 0;
1511 + }
1512 +#endif
1513 + /*
1514 + * could be woken because of tear-down, not because of
1515 + * pending work
1516 + */
1517 + if (!atomic_read(&lo->lo_pending))
1518 + break;
1519 +
1520 + /*
1521 + * read queues using alternating order to prevent starvation
1522 + */
1523 + bh = loop_get_bh(lo, &x, &qt[++qi & 3]);
1524 + if (!bh)
1525 + continue;
1526 +
1527 + /*
1528 + * x list tag usage(buffer-allocated)
1529 + * --- -------------- -----------------------
1530 + * 0 lo->lo_bh_que0 dev-read(y) / file-read
1531 + * 1 lo->lo_bh_que1 dev-write(n) / file-write
1532 + * 2 lo->lo_bh_que2 dev-read(n)
1533 + */
1534 + rw = (x == 1) ? WRITE : READ;
1535 + if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) {
1536 + /* loop_make_request didn't allocate a buffer, do that now */
1537 + xbh = loop_get_buffer(lo, bh, 1, rw);
1538 + if (!xbh) {
1539 + run_task_queue(&tq_disk);
1540 + flushcnt = 0;
1541 + loop_add_queue_first(lo, bh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
1542 + /* lo->lo_bh_need should be 1 now, go back to sleep */
1543 + continue;
1544 + }
1545 + if (rw == WRITE) {
1546 + int trv;
1547 + md = bh_kmap(bh);
1548 + trv = lo_do_transfer(lo, WRITE, xbh->b_data, md, xbh->b_size, xbh->b_rsector - lo->lo_iv_remove);
1549 + bh_kunmap(bh);
1550 + if (trv) {
1551 + loop_put_buffer(lo, xbh);
1552 + buffer_IO_error(bh);
1553 + atomic_dec(&lo->lo_pending);
1554 + continue;
1555 + }
1556 + }
1557 + generic_make_request(rw, xbh);
1558 +
1559 + /* start I/O if there are no more requests lacking buffers */
1560 + x = 0;
1561 + spin_lock_irq(&lo->lo_lock);
1562 + if (!lo->lo_bh_que1 && !lo->lo_bh_que2)
1563 + x = 1;
1564 + spin_unlock_irq(&lo->lo_lock);
1565 + if (x || (++flushcnt >= lo->lo_bh_flsh)) {
1566 + run_task_queue(&tq_disk);
1567 + flushcnt = 0;
1568 + }
1569 +
1570 + /* request not completely processed yet */
1571 + continue;
1572 + }
1573 + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1574 + /* request is for file backed device */
1575 + x = do_bh_filebacked(lo, bh, rw);
1576 + bh->b_reqnext = NULL;
1577 + bh->b_end_io(bh, !x);
1578 + } else {
1579 + /* device backed read has completed, do decrypt now */
1580 + xbh = bh->b_private;
1581 + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */
1582 + /* instead, recompute IV from original request */
1583 + md = bh_kmap(xbh);
1584 + x = lo_do_transfer(lo, READ, bh->b_data, md, bh->b_size, xbh->b_rsector + lo->lo_offs_sec - lo->lo_iv_remove);
1585 + flush_dcache_page(xbh->b_page);
1586 + bh_kunmap(xbh);
1587 + xbh->b_reqnext = NULL;
1588 + xbh->b_end_io(xbh, !x);
1589 + loop_put_buffer(lo, bh);
1590 + }
1591 +
1592 + /*
1593 + * woken both for pending work and tear-down, lo_pending
1594 + * will hit zero then
1595 + */
1596 + if (atomic_dec_and_test(&lo->lo_pending))
1597 + break;
1598 + }
1599 +
1600 + up(&lo->lo_sem);
1601 + return 0;
1602 +}
1603 +
1604 +static void loop_set_softblksz(struct loop_device *lo, kdev_t dev)
1605 +{
1606 + int bs = 0, x;
1607 +
1608 + if (blksize_size[MAJOR(lo->lo_device)])
1609 + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
1610 + if (!bs)
1611 + bs = BLOCK_SIZE;
1612 + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
1613 + x = loop_sizes[lo->lo_number];
1614 + if ((bs == 8192) && (x & 7))
1615 + bs = 4096;
1616 + if ((bs == 4096) && (x & 3))
1617 + bs = 2048;
1618 + if ((bs == 2048) && (x & 1))
1619 + bs = 1024;
1620 + }
1621 + set_blocksize(dev, bs);
1622 +}
1623 +
1624 +static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev,
1625 + unsigned int arg)
1626 +{
1627 + struct file *file;
1628 + struct inode *inode;
1629 + kdev_t lo_device;
1630 + int lo_flags = 0, hardsz = 512;
1631 + int error;
1632 +
1633 + MOD_INC_USE_COUNT;
1634 +
1635 + error = -EBUSY;
1636 + if (lo->lo_state != Lo_unbound)
1637 + goto out;
1638 +
1639 + error = -EBADF;
1640 + file = fget(arg);
1641 + if (!file)
1642 + goto out;
1643 +
1644 + error = -EINVAL;
1645 + inode = file->f_dentry->d_inode;
1646 +
1647 + if (!(file->f_mode & FMODE_WRITE))
1648 + lo_flags |= LO_FLAGS_READ_ONLY;
1649 +
1650 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
1651 + lo->lo_keyscrub_fn = 0;
1652 +#endif
1653 + lo->lo_offset = lo->lo_sizelimit = 0;
1654 + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1655 + lo->lo_bh_free = lo->lo_bh_que2 = lo->lo_bh_que1 = lo->lo_bh_que0 = NULL;
1656 + lo->lo_bh_need = lo->lo_bh_flsh = 0;
1657 + init_waitqueue_head(&lo->lo_bh_wait);
1658 + if (S_ISBLK(inode->i_mode)) {
1659 + lo_device = inode->i_rdev;
1660 + if (lo_device == dev) {
1661 + error = -EBUSY;
1662 + goto out_putf;
1663 + }
1664 + if (loop_prealloc_init(lo, 0)) {
1665 + error = -ENOMEM;
1666 + goto out_putf;
1667 + }
1668 + hardsz = get_hardsect_size(lo_device);
1669 + } else if (S_ISREG(inode->i_mode)) {
1670 + /*
1671 + * If we can't read - sorry. If we only can't write - well,
1672 + * it's going to be read-only.
1673 + */
1674 + if (!file->f_op || !file->f_op->read)
1675 + goto out_putf;
1676 +
1677 + if (!file->f_op->write)
1678 + lo_flags |= LO_FLAGS_READ_ONLY;
1679 +
1680 + lo_device = inode->i_dev;
1681 + lo_flags |= LO_FLAGS_DO_BMAP;
1682 + if (loop_prealloc_init(lo, 1)) {
1683 + error = -ENOMEM;
1684 + goto out_putf;
1685 + }
1686 + error = 0;
1687 + } else
1688 + goto out_putf;
1689 +
1690 + get_file(file);
1691 +
1692 + if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device)
1693 + || !(lo_file->f_mode & FMODE_WRITE))
1694 + lo_flags |= LO_FLAGS_READ_ONLY;
1695 +
1696 + set_device_ro(dev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
1697 +
1698 + lo->lo_device = lo_device;
1699 + lo->lo_flags = lo_flags;
1700 + if(lo_flags & LO_FLAGS_READ_ONLY)
1701 + lo->lo_flags |= 0x200000; /* export to user space */
1702 + lo->lo_backing_file = file;
1703 + lo->transfer = NULL;
1704 + lo->ioctl = NULL;
1705 + if (figure_loop_size(lo)) {
1706 + error = -EFBIG;
1707 + goto out_cleanup;
1708 + }
1709 +
1710 + if (lo_flags & LO_FLAGS_DO_BMAP) {
1711 + lo->old_gfp_mask = inode->i_mapping->gfp_mask;
1712 + inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);
1713 + inode->i_mapping->gfp_mask |= __GFP_HIGH;
1714 + } else {
1715 + lo->old_gfp_mask = -1;
1716 + }
1717 +
1718 + loop_hardsizes[MINOR(dev)] = hardsz;
1719 + loop_set_softblksz(lo, dev);
1720 +
1721 + error = kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
1722 + if(error < 0)
1723 + goto out_mapping;
1724 + down(&lo->lo_sem);
1725 + fput(file);
1726 + return 0;
1727 +
1728 + out_mapping:
1729 + if(lo->old_gfp_mask != -1)
1730 + inode->i_mapping->gfp_mask = lo->old_gfp_mask;
1731 + out_cleanup:
1732 + loop_prealloc_cleanup(lo);
1733 + fput(file);
1734 + out_putf:
1735 + fput(file);
1736 + out:
1737 + MOD_DEC_USE_COUNT;
1738 + return error;
1739 +}
1740 +
1741 +static int loop_release_xfer(struct loop_device *lo)
1742 +{
1743 + int err = 0;
1744 + if (lo->lo_encrypt_type) {
1745 + struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type];
1746 + lo->transfer = NULL;
1747 + if (xfer && xfer->release)
1748 + err = xfer->release(lo);
1749 + if (xfer && xfer->unlock)
1750 + xfer->unlock(lo);
1751 + lo->lo_encrypt_type = 0;
1752 + }
1753 + return err;
1754 +}
1755 +
1756 +static int loop_init_xfer(struct loop_device *lo, int type,struct loop_info *i)
1757 +{
1758 + int err = 0;
1759 + if (type) {
1760 + struct loop_func_table *xfer = xfer_funcs[type];
1761 + if (xfer->init)
1762 + err = xfer->init(lo, i);
1763 + if (!err) {
1764 + lo->lo_encrypt_type = type;
1765 + if (xfer->lock)
1766 + xfer->lock(lo);
1767 + }
1768 + }
1769 + return err;
1770 +}
1771 +
1772 +static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1773 +{
1774 + struct file *filp = lo->lo_backing_file;
1775 + int gfp = lo->old_gfp_mask;
1776 +
1777 + if (lo->lo_state != Lo_bound)
1778 + return -ENXIO;
1779 + if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
1780 + return -EBUSY;
1781 + if (filp==NULL)
1782 + return -EINVAL;
1783 +
1784 + spin_lock_irq(&lo->lo_lock);
1785 + lo->lo_state = Lo_rundown;
1786 + if (atomic_dec_and_test(&lo->lo_pending))
1787 + wake_up_interruptible(&lo->lo_bh_wait);
1788 + spin_unlock_irq(&lo->lo_lock);
1789 +
1790 + down(&lo->lo_sem);
1791 +
1792 + loop_prealloc_cleanup(lo);
1793 + lo->lo_backing_file = NULL;
1794 +
1795 + loop_release_xfer(lo);
1796 + lo->transfer = NULL;
1797 + lo->ioctl = NULL;
1798 + lo->lo_device = 0;
1799 + lo->lo_encrypt_type = 0;
1800 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
1801 + lo->lo_keyscrub_fn = 0;
1802 +#endif
1803 + lo->lo_offset = lo->lo_sizelimit = 0;
1804 + lo->lo_offs_sec = lo->lo_iv_remove = 0;
1805 + lo->lo_encrypt_key_size = 0;
1806 + lo->lo_flags = 0;
1807 + memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1808 + memset(lo->lo_name, 0, LO_NAME_SIZE);
1809 + memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1810 + loop_sizes[lo->lo_number] = 0;
1811 + invalidate_bdev(bdev, 0);
1812 + if (gfp != -1)
1813 + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
1814 + lo->lo_state = Lo_unbound;
1815 + fput(filp);
1816 + MOD_DEC_USE_COUNT;
1817 + return 0;
1818 +}
1819 +
1820 +static void
1821 +loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1822 +{
1823 + memset(info64, 0, sizeof(*info64));
1824 + info64->lo_number = info->lo_number;
1825 + info64->lo_device = info->lo_device;
1826 + info64->lo_inode = info->lo_inode;
1827 + info64->lo_rdevice = info->lo_rdevice;
1828 + info64->lo_offset = info->lo_offset;
1829 + info64->lo_encrypt_type = info->lo_encrypt_type;
1830 + info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1831 + info64->lo_flags = info->lo_flags;
1832 + info64->lo_init[0] = info->lo_init[0];
1833 + info64->lo_init[1] = info->lo_init[1];
1834 + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
1835 + memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1836 + else
1837 + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1838 + memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1839 +}
1840 +
1841 +static int
1842 +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
1843 +{
1844 + memset(info, 0, sizeof(*info));
1845 + info->lo_number = info64->lo_number;
1846 + info->lo_device = info64->lo_device;
1847 + info->lo_inode = info64->lo_inode;
1848 + info->lo_rdevice = info64->lo_rdevice;
1849 + info->lo_offset = info64->lo_offset;
1850 + info->lo_encrypt_type = info64->lo_encrypt_type;
1851 + info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1852 + info->lo_flags = info64->lo_flags;
1853 + info->lo_init[0] = info64->lo_init[0];
1854 + info->lo_init[1] = info64->lo_init[1];
1855 + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
1856 + memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1857 + else
1858 + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1859 + memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1860 +
1861 + /* error in case values were truncated */
1862 + if (info->lo_device != info64->lo_device ||
1863 + info->lo_rdevice != info64->lo_rdevice ||
1864 + info->lo_inode != info64->lo_inode ||
1865 + info->lo_offset != info64->lo_offset ||
1866 + info64->lo_sizelimit)
1867 + return -EOVERFLOW;
1868 +
1869 + return 0;
1870 +}
1871 +
1872 +static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loop_info64 *info, struct loop_info *oldinfo)
1873 +{
1874 + int err;
1875 + unsigned int type;
1876 +
1877 + if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
1878 + !capable(CAP_SYS_ADMIN))
1879 + return -EPERM;
1880 + if (lo->lo_state != Lo_bound)
1881 + return -ENXIO;
1882 + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1883 + return -EINVAL;
1884 + type = info->lo_encrypt_type;
1885 + if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL)
1886 + return -EINVAL;
1887 + if (type == LO_CRYPT_XOR && info->lo_encrypt_key_size == 0)
1888 + return -EINVAL;
1889 + err = loop_release_xfer(lo);
1890 + if (err)
1891 + return err;
1892 +
1893 + if ((loff_t)info->lo_offset < 0) {
1894 + /* negative offset == remove offset from IV computations */
1895 + lo->lo_offset = -(info->lo_offset);
1896 + lo->lo_iv_remove = lo->lo_offset >> 9;
1897 + } else {
1898 + /* positive offset == include offset in IV computations */
1899 + lo->lo_offset = info->lo_offset;
1900 + lo->lo_iv_remove = 0;
1901 + }
1902 + lo->lo_offs_sec = lo->lo_offset >> 9;
1903 + lo->lo_sizelimit = info->lo_sizelimit;
1904 + err = figure_loop_size(lo);
1905 + if (err)
1906 + return err;
1907 + loop_set_softblksz(lo, dev);
1908 +
1909 + /* transfer init function for 2.4 kernels takes old style struct */
1910 + err = loop_init_xfer(lo, type, oldinfo);
1911 + /* copy key -- just in case transfer init func modified it */
1912 + memcpy(info->lo_encrypt_key, oldinfo->lo_encrypt_key, sizeof(info->lo_encrypt_key));
1913 + if (err)
1914 + return err;
1915 +
1916 + strncpy(lo->lo_name, info->lo_file_name, LO_NAME_SIZE);
1917 + strncpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1918 + lo->transfer = xfer_funcs[type]->transfer;
1919 + lo->ioctl = xfer_funcs[type]->ioctl;
1920 + lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1921 + lo->lo_init[0] = info->lo_init[0];
1922 + lo->lo_init[1] = info->lo_init[1];
1923 + if (info->lo_encrypt_key_size) {
1924 + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1925 + info->lo_encrypt_key_size);
1926 + lo->lo_key_owner = current->uid;
1927 + }
1928 +
1929 + return 0;
1930 +}
1931 +
1932 +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1933 +{
1934 + struct file *file = lo->lo_backing_file;
1935 +
1936 + if (lo->lo_state != Lo_bound)
1937 + return -ENXIO;
1938 + memset(info, 0, sizeof(*info));
1939 + info->lo_number = lo->lo_number;
1940 + info->lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
1941 + info->lo_inode = file->f_dentry->d_inode->i_ino;
1942 + info->lo_rdevice = kdev_t_to_nr(lo->lo_device);
1943 + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
1944 + info->lo_sizelimit = lo->lo_sizelimit;
1945 + info->lo_flags = lo->lo_flags;
1946 + strncpy(info->lo_file_name, lo->lo_name, LO_NAME_SIZE);
1947 + strncpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1948 + info->lo_encrypt_type = lo->lo_encrypt_type;
1949 + if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1950 + info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1951 + memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1952 + lo->lo_encrypt_key_size);
1953 + info->lo_init[0] = lo->lo_init[0];
1954 + info->lo_init[1] = lo->lo_init[1];
1955 + }
1956 + return 0;
1957 +}
1958 +
1959 +static int
1960 +loop_set_status_n(struct loop_device *lo, kdev_t dev, void *arg, int n)
1961 +{
1962 + struct loop_info info;
1963 + struct loop_info64 info64;
1964 + int err;
1965 +
1966 + if (n) {
1967 + if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1968 + return -EFAULT;
1969 + /* truncation errors can be ignored here as transfer init func only wants key bits */
1970 + loop_info64_to_old(&info64, &info);
1971 + } else {
1972 + if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1973 + return -EFAULT;
1974 + loop_info64_from_old(&info, &info64);
1975 + }
1976 + err = loop_set_status(lo, dev, &info64, &info);
1977 + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
1978 + memset(&info64.lo_encrypt_key[0], 0, sizeof(info64.lo_encrypt_key));
1979 + return err;
1980 +}
1981 +
1982 +static int
1983 +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) {
1984 + struct loop_info info;
1985 + struct loop_info64 info64;
1986 + int err = 0;
1987 +
1988 + if (!arg)
1989 + err = -EINVAL;
1990 + if (!err)
1991 + err = loop_get_status(lo, &info64);
1992 + if (!err)
1993 + err = loop_info64_to_old(&info64, &info);
1994 + if (!err && copy_to_user(arg, &info, sizeof(info)))
1995 + err = -EFAULT;
1996 +
1997 + return err;
1998 +}
1999 +
2000 +static int
2001 +loop_get_status64(struct loop_device *lo, struct loop_info64 *arg) {
2002 + struct loop_info64 info64;
2003 + int err = 0;
2004 +
2005 + if (!arg)
2006 + err = -EINVAL;
2007 + if (!err)
2008 + err = loop_get_status(lo, &info64);
2009 + if (!err && copy_to_user(arg, &info64, sizeof(info64)))
2010 + err = -EFAULT;
2011 +
2012 + return err;
2013 +}
2014 +
2015 +static int lo_ioctl(struct inode * inode, struct file * file,
2016 + unsigned int cmd, unsigned long arg)
2017 +{
2018 + struct loop_device *lo;
2019 + int dev, err;
2020 +
2021 + if (!inode)
2022 + return -EINVAL;
2023 + if (MAJOR(inode->i_rdev) != MAJOR_NR) {
2024 + printk(KERN_WARNING "lo_ioctl: pseudo-major != %d\n",
2025 + MAJOR_NR);
2026 + return -ENODEV;
2027 + }
2028 + dev = MINOR(inode->i_rdev);
2029 + if (dev >= max_loop)
2030 + return -ENODEV;
2031 + lo = &loop_dev[dev];
2032 + down(&lo->lo_ctl_mutex);
2033 + switch (cmd) {
2034 + case LOOP_SET_FD:
2035 + err = loop_set_fd(lo, file, inode->i_rdev, arg);
2036 + break;
2037 + case LOOP_CLR_FD:
2038 + err = loop_clr_fd(lo, inode->i_bdev);
2039 + break;
2040 + case LOOP_SET_STATUS:
2041 + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 0);
2042 + break;
2043 + case LOOP_GET_STATUS:
2044 + err = loop_get_status_old(lo, (struct loop_info *) arg);
2045 + break;
2046 + case LOOP_SET_STATUS64:
2047 + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 1);
2048 + break;
2049 + case LOOP_GET_STATUS64:
2050 + err = loop_get_status64(lo, (struct loop_info64 *) arg);
2051 + break;
2052 + case LOOP_RECOMPUTE_DEV_SIZE:
2053 + if (lo->lo_state != Lo_bound) {
2054 + err = -ENXIO;
2055 + break;
2056 + }
2057 + err = figure_loop_size(lo);
2058 + break;
2059 + case BLKGETSIZE:
2060 + if (lo->lo_state != Lo_bound) {
2061 + err = -ENXIO;
2062 + break;
2063 + }
2064 + err = put_user((unsigned long)loop_sizes[lo->lo_number] << 1, (unsigned long *) arg);
2065 + break;
2066 + case BLKGETSIZE64:
2067 + if (lo->lo_state != Lo_bound) {
2068 + err = -ENXIO;
2069 + break;
2070 + }
2071 + err = put_user((u64)loop_sizes[lo->lo_number] << 10, (u64*)arg);
2072 + break;
2073 + case BLKBSZGET:
2074 + case BLKBSZSET:
2075 + case BLKSSZGET:
2076 + case BLKROGET:
2077 + case BLKROSET:
2078 + err = blk_ioctl(inode->i_rdev, cmd, arg);
2079 + break;
2080 + default:
2081 + err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
2082 + }
2083 + up(&lo->lo_ctl_mutex);
2084 + return err;
2085 +}
2086 +
2087 +static int lo_open(struct inode *inode, struct file *file)
2088 +{
2089 + struct loop_device *lo;
2090 + int dev;
2091 +
2092 + if (!inode)
2093 + return -EINVAL;
2094 + if (MAJOR(inode->i_rdev) != MAJOR_NR) {
2095 + printk(KERN_WARNING "lo_open: pseudo-major != %d\n", MAJOR_NR);
2096 + return -ENODEV;
2097 + }
2098 + dev = MINOR(inode->i_rdev);
2099 + if (dev >= max_loop)
2100 + return -ENODEV;
2101 +
2102 + lo = &loop_dev[dev];
2103 + MOD_INC_USE_COUNT;
2104 + down(&lo->lo_ctl_mutex);
2105 + lo->lo_refcnt++;
2106 + up(&lo->lo_ctl_mutex);
2107 + return 0;
2108 +}
2109 +
2110 +static int lo_release(struct inode *inode, struct file *file)
2111 +{
2112 + struct loop_device *lo;
2113 + int dev;
2114 +
2115 + if (!inode)
2116 + return 0;
2117 + if (MAJOR(inode->i_rdev) != MAJOR_NR) {
2118 + printk(KERN_WARNING "lo_release: pseudo-major != %d\n",
2119 + MAJOR_NR);
2120 + return 0;
2121 + }
2122 + dev = MINOR(inode->i_rdev);
2123 + if (dev >= max_loop)
2124 + return 0;
2125 +
2126 + lo = &loop_dev[dev];
2127 + down(&lo->lo_ctl_mutex);
2128 + --lo->lo_refcnt;
2129 + up(&lo->lo_ctl_mutex);
2130 + MOD_DEC_USE_COUNT;
2131 + return 0;
2132 +}
2133 +
2134 +static struct block_device_operations lo_fops = {
2135 + owner: THIS_MODULE,
2136 + open: lo_open,
2137 + release: lo_release,
2138 + ioctl: lo_ioctl,
2139 +};
2140 +
2141 +/*
2142 + * And now the modules code and kernel interface.
2143 + */
2144 +MODULE_PARM(max_loop, "i");
2145 +MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)");
2146 +MODULE_LICENSE("GPL");
2147 +
2148 +int loop_register_transfer(struct loop_func_table *funcs)
2149 +{
2150 + if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number])
2151 + return -EINVAL;
2152 + xfer_funcs[funcs->number] = funcs;
2153 + return 0;
2154 +}
2155 +
2156 +int loop_unregister_transfer(int number)
2157 +{
2158 + struct loop_device *lo;
2159 +
2160 + if ((unsigned)number >= MAX_LO_CRYPT)
2161 + return -EINVAL;
2162 + for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
2163 + int type = lo->lo_encrypt_type;
2164 + if (type == number) {
2165 + loop_release_xfer(lo);
2166 + }
2167 + }
2168 + xfer_funcs[number] = NULL;
2169 + return 0;
2170 +}
2171 +
2172 +EXPORT_SYMBOL(loop_register_transfer);
2173 +EXPORT_SYMBOL(loop_unregister_transfer);
2174 +
2175 +int __init loop_init(void)
2176 +{
2177 + int i;
2178 +
2179 +#ifdef CONFIG_BLK_DEV_LOOP_AES
2180 +#if defined(CONFIG_BLK_DEV_LOOP_PADLOCK) && (defined(CONFIG_X86) || defined(CONFIG_X86_64))
2181 + if((boot_cpu_data.x86 >= 6) && (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
2182 + && (cpuid_eax(0xC0000000) >= 0xC0000001) && ((cpuid_edx(0xC0000001) & 0xC0) == 0xC0)) {
2183 + xfer_funcs[LO_CRYPT_AES] = &funcs_padlock_aes;
2184 + printk(KERN_INFO "loop: padlock hardware AES enabled\n");
2185 + }
2186 +#endif
2187 +#endif
2188 +
2189 + if ((max_loop < 1) || (max_loop > 256)) {
2190 + printk(KERN_WARNING "loop: invalid max_loop (must be between"
2191 + " 1 and 256), using default (8)\n");
2192 + max_loop = 8;
2193 + }
2194 +
2195 + if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
2196 + printk(KERN_WARNING "Unable to get major number %d for loop"
2197 + " device\n", MAJOR_NR);
2198 + return -EIO;
2199 + }
2200 +
2201 + loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
2202 + if (!loop_dev)
2203 + goto out_dev;
2204 +
2205 + loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
2206 + if (!loop_sizes)
2207 + goto out_sizes;
2208 +
2209 + loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
2210 + if (!loop_blksizes)
2211 + goto out_blksizes;
2212 +
2213 + loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
2214 + if (!loop_hardsizes)
2215 + goto out_hardsizes;
2216 +
2217 + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
2218 +
2219 + for (i = 0; i < max_loop; i++) {
2220 + struct loop_device *lo = &loop_dev[i];
2221 + memset(lo, 0, sizeof(struct loop_device));
2222 + init_MUTEX(&lo->lo_ctl_mutex);
2223 + init_MUTEX_LOCKED(&lo->lo_sem);
2224 + lo->lo_number = i;
2225 + spin_lock_init(&lo->lo_lock);
2226 + }
2227 +
2228 + memset(loop_sizes, 0, max_loop * sizeof(int));
2229 + memset(loop_blksizes, 0, max_loop * sizeof(int));
2230 + memset(loop_hardsizes, 0, max_loop * sizeof(int));
2231 + blk_size[MAJOR_NR] = loop_sizes;
2232 + blksize_size[MAJOR_NR] = loop_blksizes;
2233 + hardsect_size[MAJOR_NR] = loop_hardsizes;
2234 + for (i = 0; i < max_loop; i++)
2235 + register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
2236 +
2237 + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
2238 + if (!lo_prealloc[i])
2239 + continue;
2240 + if (lo_prealloc[i] < LO_PREALLOC_MIN)
2241 + lo_prealloc[i] = LO_PREALLOC_MIN;
2242 + if (lo_prealloc[i] > LO_PREALLOC_MAX)
2243 + lo_prealloc[i] = LO_PREALLOC_MAX;
2244 + }
2245 +
2246 +#if defined(IOCTL32_COMPATIBLE_PTR)
2247 + lock_kernel();
2248 + register_ioctl32_conversion(LOOP_SET_STATUS64, IOCTL32_COMPATIBLE_PTR);
2249 + register_ioctl32_conversion(LOOP_GET_STATUS64, IOCTL32_COMPATIBLE_PTR);
2250 + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP, IOCTL32_COMPATIBLE_PTR);
2251 + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP_V3, IOCTL32_COMPATIBLE_PTR);
2252 + register_ioctl32_conversion(LOOP_RECOMPUTE_DEV_SIZE, IOCTL32_COMPATIBLE_PTR);
2253 + unlock_kernel();
2254 +#endif
2255 +
2256 + devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
2257 + devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
2258 + MAJOR_NR, 0,
2259 + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
2260 + &lo_fops, NULL);
2261 +
2262 +#ifdef CONFIG_BLK_DEV_LOOP_AES
2263 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
2264 + printk(KERN_INFO "loop: AES key scrubbing enabled\n");
2265 +#endif
2266 +#endif
2267 + printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
2268 + return 0;
2269 +
2270 +out_hardsizes:
2271 + kfree(loop_blksizes);
2272 +out_blksizes:
2273 + kfree(loop_sizes);
2274 +out_sizes:
2275 + kfree(loop_dev);
2276 +out_dev:
2277 + if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
2278 + printk(KERN_WARNING "loop: cannot unregister blkdev\n");
2279 + printk(KERN_ERR "loop: ran out of memory\n");
2280 + return -ENOMEM;
2281 +}
2282 +
2283 +void loop_exit(void)
2284 +{
2285 + devfs_unregister(devfs_handle);
2286 + if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
2287 + printk(KERN_WARNING "loop: cannot unregister blkdev\n");
2288 +
2289 + blk_size[MAJOR_NR] = 0;
2290 + blksize_size[MAJOR_NR] = 0;
2291 + hardsect_size[MAJOR_NR] = 0;
2292 + kfree(loop_dev);
2293 + kfree(loop_sizes);
2294 + kfree(loop_blksizes);
2295 + kfree(loop_hardsizes);
2296 +
2297 +#if defined(IOCTL32_COMPATIBLE_PTR)
2298 + lock_kernel();
2299 + unregister_ioctl32_conversion(LOOP_SET_STATUS64);
2300 + unregister_ioctl32_conversion(LOOP_GET_STATUS64);
2301 + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP);
2302 + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP_V3);
2303 + unregister_ioctl32_conversion(LOOP_RECOMPUTE_DEV_SIZE);
2304 + unlock_kernel();
2305 +#endif
2306 +}
2307 +
2308 +module_init(loop_init);
2309 +module_exit(loop_exit);
2310 +
2311 +#ifndef MODULE
2312 +static int __init max_loop_setup(char *str)
2313 +{
2314 + max_loop = simple_strtol(str, NULL, 0);
2315 + return 1;
2316 +}
2317 +
2318 +__setup("max_loop=", max_loop_setup);
2319 +#endif
2320 +
2321 +#ifdef CONFIG_BLK_DEV_LOOP_KEYSCRUB
2322 +void loop_add_keyscrub_fn(struct loop_device *lo, void (*fn)(void *), void *ptr)
2323 +{
2324 + lo->lo_keyscrub_ptr = ptr;
2325 + wmb();
2326 + lo->lo_keyscrub_fn = fn;
2327 + wake_up_interruptible(&lo->lo_bh_wait);
2328 +}
2329 +EXPORT_SYMBOL(loop_add_keyscrub_fn);
2330 +#endif
2331 diff -urN linux-2.4.31-noloop/drivers/misc/Makefile linux-2.4.31-AES/drivers/misc/Makefile
2332 --- linux-2.4.31-noloop/drivers/misc/Makefile 2000-12-30 00:07:22.000000000 +0200
2333 +++ linux-2.4.31-AES/drivers/misc/Makefile 2005-06-01 20:59:27.000000000 +0300
2334 @@ -9,8 +9,35 @@
2335 # parent makes..
2336 #
2337
2338 +.S.o:
2339 + $(CC) $(AFLAGS) $(AFLAGS_$@) -c $< -o $*.o
2340 +
2341 O_TARGET := misc.o
2342
2343 +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y)
2344 +AES_X86_ASM=n
2345 +ifeq ($(CONFIG_X86),y)
2346 +ifneq ($(CONFIG_X86_64),y)
2347 + AES_X86_ASM=y
2348 +endif
2349 +endif
2350 +ifeq ($(AES_X86_ASM),y)
2351 + export-objs += crypto-ksym.o
2352 + obj-y += aes-x86.o md5-x86.o crypto-ksym.o
2353 + AFLAGS_aes-x86.o := -DUSE_UNDERLINE=1
2354 +else
2355 +ifeq ($(CONFIG_X86_64),y)
2356 + export-objs += crypto-ksym.o
2357 + obj-y += aes-amd64.o md5-amd64.o crypto-ksym.o
2358 + AFLAGS_aes-amd64.o := -DUSE_UNDERLINE=1
2359 +else
2360 + export-objs += crypto-ksym.o
2361 + obj-y += aes.o md5.o crypto-ksym.o
2362 + CFLAGS_aes.o := -DDATA_ALWAYS_ALIGNED=1
2363 +endif
2364 +endif
2365 +endif
2366 +
2367 include $(TOPDIR)/Rules.make
2368
2369 fastdep:
2370 diff -urN linux-2.4.31-noloop/drivers/misc/aes-amd64.S linux-2.4.31-AES/drivers/misc/aes-amd64.S
2371 --- linux-2.4.31-noloop/drivers/misc/aes-amd64.S 1970-01-01 02:00:00.000000000 +0200
2372 +++ linux-2.4.31-AES/drivers/misc/aes-amd64.S 2005-06-01 20:59:27.000000000 +0300
2373 @@ -0,0 +1,893 @@
2374 +//
2375 +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
2376 +// All rights reserved.
2377 +//
2378 +// TERMS
2379 +//
2380 +// Redistribution and use in source and binary forms, with or without
2381 +// modification, are permitted subject to the following conditions:
2382 +//
2383 +// 1. Redistributions of source code must retain the above copyright
2384 +// notice, this list of conditions and the following disclaimer.
2385 +//
2386 +// 2. Redistributions in binary form must reproduce the above copyright
2387 +// notice, this list of conditions and the following disclaimer in the
2388 +// documentation and/or other materials provided with the distribution.
2389 +//
2390 +// 3. The copyright holder's name must not be used to endorse or promote
2391 +// any products derived from this software without his specific prior
2392 +// written permission.
2393 +//
2394 +// This software is provided 'as is' with no express or implied warranties
2395 +// of correctness or fitness for purpose.
2396 +
2397 +// Modified by Jari Ruusu, December 24 2001
2398 +// - Converted syntax to GNU CPP/assembler syntax
2399 +// - C programming interface converted back to "old" API
2400 +// - Minor portability cleanups and speed optimizations
2401 +
2402 +// Modified by Jari Ruusu, April 11 2002
2403 +// - Added above copyright and terms to resulting object code so that
2404 +// binary distributions can avoid legal trouble
2405 +
2406 +// Modified by Jari Ruusu, June 12 2004
2407 +// - Converted 32 bit x86 code to 64 bit AMD64 code
2408 +// - Re-wrote encrypt and decrypt code from scratch
2409 +
2410 +// An AES (Rijndael) implementation for the AMD64. This version only
2411 +// implements the standard AES block length (128 bits, 16 bytes). This code
2412 +// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the
2413 +// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are
2414 +// preserved across calls.
2415 +
2416 +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
2417 +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
2418 +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
2419 +
2420 +#if defined(USE_UNDERLINE)
2421 +# define aes_set_key _aes_set_key
2422 +# define aes_encrypt _aes_encrypt
2423 +# define aes_decrypt _aes_decrypt
2424 +#endif
2425 +#if !defined(ALIGN64BYTES)
2426 +# define ALIGN64BYTES 64
2427 +#endif
2428 +
2429 + .file "aes-amd64.S"
2430 + .globl aes_set_key
2431 + .globl aes_encrypt
2432 + .globl aes_decrypt
2433 +
2434 + .section .rodata
2435 +copyright:
2436 + .ascii " \000"
2437 + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
2438 + .ascii "All rights reserved.\000"
2439 + .ascii " \000"
2440 + .ascii "TERMS\000"
2441 + .ascii " \000"
2442 + .ascii " Redistribution and use in source and binary forms, with or without\000"
2443 + .ascii " modification, are permitted subject to the following conditions:\000"
2444 + .ascii " \000"
2445 + .ascii " 1. Redistributions of source code must retain the above copyright\000"
2446 + .ascii " notice, this list of conditions and the following disclaimer.\000"
2447 + .ascii " \000"
2448 + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
2449 + .ascii " notice, this list of conditions and the following disclaimer in the\000"
2450 + .ascii " documentation and/or other materials provided with the distribution.\000"
2451 + .ascii " \000"
2452 + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
2453 + .ascii " any products derived from this software without his specific prior\000"
2454 + .ascii " written permission.\000"
2455 + .ascii " \000"
2456 + .ascii " This software is provided 'as is' with no express or implied warranties\000"
2457 + .ascii " of correctness or fitness for purpose.\000"
2458 + .ascii " \000"
2459 +
2460 +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
2461 +
2462 +// offsets in context structure
2463 +
2464 +#define nkey 0 // key length, size 4
2465 +#define nrnd 4 // number of rounds, size 4
2466 +#define ekey 8 // encryption key schedule base address, size 256
2467 +#define dkey 264 // decryption key schedule base address, size 256
2468 +
2469 +// This macro performs a forward encryption cycle. It is entered with
2470 +// the first previous round column values in I1E, I2E, I3E and I4E and
2471 +// exits with the final values OU1, OU2, OU3 and OU4 registers.
2472 +
2473 +#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \
2474 + movl p2(%rbp),OU1 ;\
2475 + movl p2+4(%rbp),OU2 ;\
2476 + movl p2+8(%rbp),OU3 ;\
2477 + movl p2+12(%rbp),OU4 ;\
2478 + movzbl I1B,%edi ;\
2479 + movzbl I2B,%esi ;\
2480 + movzbl I3B,%r8d ;\
2481 + movzbl I4B,%r13d ;\
2482 + shrl $8,I3E ;\
2483 + shrl $8,I4E ;\
2484 + xorl p1(,%rdi,4),OU1 ;\
2485 + xorl p1(,%rsi,4),OU2 ;\
2486 + xorl p1(,%r8,4),OU3 ;\
2487 + xorl p1(,%r13,4),OU4 ;\
2488 + movzbl I2H,%esi ;\
2489 + movzbl I3B,%r8d ;\
2490 + movzbl I4B,%r13d ;\
2491 + movzbl I1H,%edi ;\
2492 + shrl $8,I3E ;\
2493 + shrl $8,I4E ;\
2494 + xorl p1+tlen(,%rsi,4),OU1 ;\
2495 + xorl p1+tlen(,%r8,4),OU2 ;\
2496 + xorl p1+tlen(,%r13,4),OU3 ;\
2497 + xorl p1+tlen(,%rdi,4),OU4 ;\
2498 + shrl $16,I1E ;\
2499 + shrl $16,I2E ;\
2500 + movzbl I3B,%r8d ;\
2501 + movzbl I4B,%r13d ;\
2502 + movzbl I1B,%edi ;\
2503 + movzbl I2B,%esi ;\
2504 + xorl p1+2*tlen(,%r8,4),OU1 ;\
2505 + xorl p1+2*tlen(,%r13,4),OU2 ;\
2506 + xorl p1+2*tlen(,%rdi,4),OU3 ;\
2507 + xorl p1+2*tlen(,%rsi,4),OU4 ;\
2508 + shrl $8,I4E ;\
2509 + movzbl I1H,%edi ;\
2510 + movzbl I2H,%esi ;\
2511 + shrl $8,I3E ;\
2512 + xorl p1+3*tlen(,I4R,4),OU1 ;\
2513 + xorl p1+3*tlen(,%rdi,4),OU2 ;\
2514 + xorl p1+3*tlen(,%rsi,4),OU3 ;\
2515 + xorl p1+3*tlen(,I3R,4),OU4
2516 +
2517 +// This macro performs an inverse encryption cycle. It is entered with
2518 +// the first previous round column values in I1E, I2E, I3E and I4E and
2519 +// exits with the final values OU1, OU2, OU3 and OU4 registers.
2520 +
2521 +#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \
2522 + movl p2+12(%rbp),OU4 ;\
2523 + movl p2+8(%rbp),OU3 ;\
2524 + movl p2+4(%rbp),OU2 ;\
2525 + movl p2(%rbp),OU1 ;\
2526 + movzbl I4B,%edi ;\
2527 + movzbl I3B,%esi ;\
2528 + movzbl I2B,%r8d ;\
2529 + movzbl I1B,%r13d ;\
2530 + shrl $8,I2E ;\
2531 + shrl $8,I1E ;\
2532 + xorl p1(,%rdi,4),OU4 ;\
2533 + xorl p1(,%rsi,4),OU3 ;\
2534 + xorl p1(,%r8,4),OU2 ;\
2535 + xorl p1(,%r13,4),OU1 ;\
2536 + movzbl I3H,%esi ;\
2537 + movzbl I2B,%r8d ;\
2538 + movzbl I1B,%r13d ;\
2539 + movzbl I4H,%edi ;\
2540 + shrl $8,I2E ;\
2541 + shrl $8,I1E ;\
2542 + xorl p1+tlen(,%rsi,4),OU4 ;\
2543 + xorl p1+tlen(,%r8,4),OU3 ;\
2544 + xorl p1+tlen(,%r13,4),OU2 ;\
2545 + xorl p1+tlen(,%rdi,4),OU1 ;\
2546 + shrl $16,I4E ;\
2547 + shrl $16,I3E ;\
2548 + movzbl I2B,%r8d ;\
2549 + movzbl I1B,%r13d ;\
2550 + movzbl I4B,%edi ;\
2551 + movzbl I3B,%esi ;\
2552 + xorl p1+2*tlen(,%r8,4),OU4 ;\
2553 + xorl p1+2*tlen(,%r13,4),OU3 ;\
2554 + xorl p1+2*tlen(,%rdi,4),OU2 ;\
2555 + xorl p1+2*tlen(,%rsi,4),OU1 ;\
2556 + shrl $8,I1E ;\
2557 + movzbl I4H,%edi ;\
2558 + movzbl I3H,%esi ;\
2559 + shrl $8,I2E ;\
2560 + xorl p1+3*tlen(,I1R,4),OU4 ;\
2561 + xorl p1+3*tlen(,%rdi,4),OU3 ;\
2562 + xorl p1+3*tlen(,%rsi,4),OU2 ;\
2563 + xorl p1+3*tlen(,I2R,4),OU1
2564 +
2565 +// AES (Rijndael) Encryption Subroutine
2566 +
2567 +// rdi = pointer to AES context
2568 +// rsi = pointer to input plaintext bytes
2569 +// rdx = pointer to output ciphertext bytes
2570 +
2571 + .text
2572 + .align ALIGN64BYTES
2573 +aes_encrypt:
2574 + movl (%rsi),%eax // read in plaintext
2575 + movl 4(%rsi),%ecx
2576 + movl 8(%rsi),%r10d
2577 + movl 12(%rsi),%r11d
2578 +
2579 + pushq %rbp
2580 + leaq ekey+16(%rdi),%rbp // encryption key pointer
2581 + movq %rdx,%r9 // pointer to out block
2582 + movl nrnd(%rdi),%edx // number of rounds
2583 + pushq %rbx
2584 + pushq %r13
2585 + pushq %r14
2586 + pushq %r15
2587 +
2588 + xorl -16(%rbp),%eax // xor in first round key
2589 + xorl -12(%rbp),%ecx
2590 + xorl -8(%rbp),%r10d
2591 + xorl -4(%rbp),%r11d
2592 +
2593 + subl $10,%edx
2594 + je aes_15
2595 + addq $32,%rbp
2596 + subl $2,%edx
2597 + je aes_13
2598 + addq $32,%rbp
2599 +
2600 + fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2601 + fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2602 + jmp aes_13
2603 + .align ALIGN64BYTES
2604 +aes_13: fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2605 + fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2606 + jmp aes_15
2607 + .align ALIGN64BYTES
2608 +aes_15: fwd_rnd(aes_ft_tab,0, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2609 + fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2610 + fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2611 + fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2612 + fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2613 + fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2614 + fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2615 + fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2616 + fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
2617 + fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
2618 +
2619 + popq %r15
2620 + popq %r14
2621 + popq %r13
2622 + popq %rbx
2623 + popq %rbp
2624 +
2625 + movl %eax,(%r9) // move final values to the output array.
2626 + movl %ecx,4(%r9)
2627 + movl %r10d,8(%r9)
2628 + movl %r11d,12(%r9)
2629 + ret
2630 +
2631 +// AES (Rijndael) Decryption Subroutine
2632 +
2633 +// rdi = pointer to AES context
2634 +// rsi = pointer to input ciphertext bytes
2635 +// rdx = pointer to output plaintext bytes
2636 +
2637 + .align ALIGN64BYTES
2638 +aes_decrypt:
2639 + movl 12(%rsi),%eax // read in ciphertext
2640 + movl 8(%rsi),%ecx
2641 + movl 4(%rsi),%r10d
2642 + movl (%rsi),%r11d
2643 +
2644 + pushq %rbp
2645 + leaq dkey+16(%rdi),%rbp // decryption key pointer
2646 + movq %rdx,%r9 // pointer to out block
2647 + movl nrnd(%rdi),%edx // number of rounds
2648 + pushq %rbx
2649 + pushq %r13
2650 + pushq %r14
2651 + pushq %r15
2652 +
2653 + xorl -4(%rbp),%eax // xor in first round key
2654 + xorl -8(%rbp),%ecx
2655 + xorl -12(%rbp),%r10d
2656 + xorl -16(%rbp),%r11d
2657 +
2658 + subl $10,%edx
2659 + je aes_25
2660 + addq $32,%rbp
2661 + subl $2,%edx
2662 + je aes_23
2663 + addq $32,%rbp
2664 +
2665 + inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2666 + inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2667 + jmp aes_23
2668 + .align ALIGN64BYTES
2669 +aes_23: inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2670 + inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2671 + jmp aes_25
2672 + .align ALIGN64BYTES
2673 +aes_25: inv_rnd(aes_it_tab,0, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2674 + inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2675 + inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2676 + inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2677 + inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2678 + inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2679 + inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2680 + inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2681 + inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
2682 + inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
2683 +
2684 + popq %r15
2685 + popq %r14
2686 + popq %r13
2687 + popq %rbx
2688 + popq %rbp
2689 +
2690 + movl %eax,12(%r9) // move final values to the output array.
2691 + movl %ecx,8(%r9)
2692 + movl %r10d,4(%r9)
2693 + movl %r11d,(%r9)
2694 + ret
2695 +
2696 +// AES (Rijndael) Key Schedule Subroutine
2697 +
2698 +// This macro performs a column mixing operation on an input 32-bit
2699 +// word to give a 32-bit result. It uses each of the 4 bytes in the
2700 +// the input column to index 4 different tables of 256 32-bit words
2701 +// that are xored together to form the output value.
2702 +
2703 +#define mix_col(p1) \
2704 + movzbl %bl,%ecx ;\
2705 + movl p1(,%rcx,4),%eax ;\
2706 + movzbl %bh,%ecx ;\
2707 + ror $16,%ebx ;\
2708 + xorl p1+tlen(,%rcx,4),%eax ;\
2709 + movzbl %bl,%ecx ;\
2710 + xorl p1+2*tlen(,%rcx,4),%eax ;\
2711 + movzbl %bh,%ecx ;\
2712 + xorl p1+3*tlen(,%rcx,4),%eax
2713 +
2714 +// Key Schedule Macros
2715 +
2716 +#define ksc4(p1) \
2717 + rol $24,%ebx ;\
2718 + mix_col(aes_fl_tab) ;\
2719 + ror $8,%ebx ;\
2720 + xorl 4*p1+aes_rcon_tab,%eax ;\
2721 + xorl %eax,%esi ;\
2722 + xorl %esi,%ebp ;\
2723 + movl %esi,16*p1(%rdi) ;\
2724 + movl %ebp,16*p1+4(%rdi) ;\
2725 + xorl %ebp,%edx ;\
2726 + xorl %edx,%ebx ;\
2727 + movl %edx,16*p1+8(%rdi) ;\
2728 + movl %ebx,16*p1+12(%rdi)
2729 +
2730 +#define ksc6(p1) \
2731 + rol $24,%ebx ;\
2732 + mix_col(aes_fl_tab) ;\
2733 + ror $8,%ebx ;\
2734 + xorl 4*p1+aes_rcon_tab,%eax ;\
2735 + xorl 24*p1-24(%rdi),%eax ;\
2736 + movl %eax,24*p1(%rdi) ;\
2737 + xorl 24*p1-20(%rdi),%eax ;\
2738 + movl %eax,24*p1+4(%rdi) ;\
2739 + xorl %eax,%esi ;\
2740 + xorl %esi,%ebp ;\
2741 + movl %esi,24*p1+8(%rdi) ;\
2742 + movl %ebp,24*p1+12(%rdi) ;\
2743 + xorl %ebp,%edx ;\
2744 + xorl %edx,%ebx ;\
2745 + movl %edx,24*p1+16(%rdi) ;\
2746 + movl %ebx,24*p1+20(%rdi)
2747 +
2748 +#define ksc8(p1) \
2749 + rol $24,%ebx ;\
2750 + mix_col(aes_fl_tab) ;\
2751 + ror $8,%ebx ;\
2752 + xorl 4*p1+aes_rcon_tab,%eax ;\
2753 + xorl 32*p1-32(%rdi),%eax ;\
2754 + movl %eax,32*p1(%rdi) ;\
2755 + xorl 32*p1-28(%rdi),%eax ;\
2756 + movl %eax,32*p1+4(%rdi) ;\
2757 + xorl 32*p1-24(%rdi),%eax ;\
2758 + movl %eax,32*p1+8(%rdi) ;\
2759 + xorl 32*p1-20(%rdi),%eax ;\
2760 + movl %eax,32*p1+12(%rdi) ;\
2761 + pushq %rbx ;\
2762 + movl %eax,%ebx ;\
2763 + mix_col(aes_fl_tab) ;\
2764 + popq %rbx ;\
2765 + xorl %eax,%esi ;\
2766 + xorl %esi,%ebp ;\
2767 + movl %esi,32*p1+16(%rdi) ;\
2768 + movl %ebp,32*p1+20(%rdi) ;\
2769 + xorl %ebp,%edx ;\
2770 + xorl %edx,%ebx ;\
2771 + movl %edx,32*p1+24(%rdi) ;\
2772 + movl %ebx,32*p1+28(%rdi)
2773 +
2774 +// rdi = pointer to AES context
2775 +// rsi = pointer to key bytes
2776 +// rdx = key length, bytes or bits
2777 +// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt
2778 +
2779 + .align ALIGN64BYTES
2780 +aes_set_key:
2781 + pushfq
2782 + pushq %rbp
2783 + pushq %rbx
2784 +
2785 + movq %rcx,%r11 // ed_flg
2786 + movq %rdx,%rcx // key length
2787 + movq %rdi,%r10 // AES context
2788 +
2789 + cmpl $128,%ecx
2790 + jb aes_30
2791 + shrl $3,%ecx
2792 +aes_30: cmpl $32,%ecx
2793 + je aes_32
2794 + cmpl $24,%ecx
2795 + je aes_32
2796 + movl $16,%ecx
2797 +aes_32: shrl $2,%ecx
2798 + movl %ecx,nkey(%r10)
2799 + leaq 6(%rcx),%rax // 10/12/14 for 4/6/8 32-bit key length
2800 + movl %eax,nrnd(%r10)
2801 + leaq ekey(%r10),%rdi // key position in AES context
2802 + cld
2803 + movl %ecx,%eax // save key length in eax
2804 + rep ; movsl // words in the key schedule
2805 + movl -4(%rsi),%ebx // put some values in registers
2806 + movl -8(%rsi),%edx // to allow faster code
2807 + movl -12(%rsi),%ebp
2808 + movl -16(%rsi),%esi
2809 +
2810 + cmpl $4,%eax // jump on key size
2811 + je aes_36
2812 + cmpl $6,%eax
2813 + je aes_35
2814 +
2815 + ksc8(0)
2816 + ksc8(1)
2817 + ksc8(2)
2818 + ksc8(3)
2819 + ksc8(4)
2820 + ksc8(5)
2821 + ksc8(6)
2822 + jmp aes_37
2823 +aes_35: ksc6(0)
2824 + ksc6(1)
2825 + ksc6(2)
2826 + ksc6(3)
2827 + ksc6(4)
2828 + ksc6(5)
2829 + ksc6(6)
2830 + ksc6(7)
2831 + jmp aes_37
2832 +aes_36: ksc4(0)
2833 + ksc4(1)
2834 + ksc4(2)
2835 + ksc4(3)
2836 + ksc4(4)
2837 + ksc4(5)
2838 + ksc4(6)
2839 + ksc4(7)
2840 + ksc4(8)
2841 + ksc4(9)
2842 +aes_37: cmpl $0,%r11d // ed_flg
2843 + jne aes_39
2844 +
2845 +// compile decryption key schedule from encryption schedule - reverse
2846 +// order and do mix_column operation on round keys except first and last
2847 +
2848 + movl nrnd(%r10),%eax // kt = cx->d_key + nc * cx->Nrnd
2849 + shl $2,%rax
2850 + leaq dkey(%r10,%rax,4),%rdi
2851 + leaq ekey(%r10),%rsi // kf = cx->e_key
2852 +
2853 + movsq // copy first round key (unmodified)
2854 + movsq
2855 + subq $32,%rdi
2856 + movl $1,%r9d
2857 +aes_38: // do mix column on each column of
2858 + lodsl // each round key
2859 + movl %eax,%ebx
2860 + mix_col(aes_im_tab)
2861 + stosl
2862 + lodsl
2863 + movl %eax,%ebx
2864 + mix_col(aes_im_tab)
2865 + stosl
2866 + lodsl
2867 + movl %eax,%ebx
2868 + mix_col(aes_im_tab)
2869 + stosl
2870 + lodsl
2871 + movl %eax,%ebx
2872 + mix_col(aes_im_tab)
2873 + stosl
2874 + subq $32,%rdi
2875 +
2876 + incl %r9d
2877 + cmpl nrnd(%r10),%r9d
2878 + jb aes_38
2879 +
2880 + movsq // copy last round key (unmodified)
2881 + movsq
2882 +aes_39: popq %rbx
2883 + popq %rbp
2884 + popfq
2885 + ret
2886 +
2887 +
2888 +// finite field multiplies by {02}, {04} and {08}
2889 +
2890 +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
2891 +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
2892 +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
2893 +
2894 +// finite field multiplies required in table generation
2895 +
2896 +#define f3(x) (f2(x) ^ x)
2897 +#define f9(x) (f8(x) ^ x)
2898 +#define fb(x) (f8(x) ^ f2(x) ^ x)
2899 +#define fd(x) (f8(x) ^ f4(x) ^ x)
2900 +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
2901 +
2902 +// These defines generate the forward table entries
2903 +
2904 +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
2905 +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
2906 +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
2907 +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
2908 +
2909 +// These defines generate the inverse table entries
2910 +
2911 +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
2912 +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
2913 +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
2914 +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
2915 +
2916 +// These defines generate entries for the last round tables
2917 +
2918 +#define w0(x) (x)
2919 +#define w1(x) (x << 8)
2920 +#define w2(x) (x << 16)
2921 +#define w3(x) (x << 24)
2922 +
2923 +// macro to generate inverse mix column tables (needed for the key schedule)
2924 +
2925 +#define im_data0(p1) \
2926 + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
2927 + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
2928 + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
2929 + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
2930 +#define im_data1(p1) \
2931 + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
2932 + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
2933 + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
2934 + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
2935 +#define im_data2(p1) \
2936 + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
2937 + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
2938 + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
2939 + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
2940 +#define im_data3(p1) \
2941 + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
2942 + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
2943 + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
2944 + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
2945 +#define im_data4(p1) \
2946 + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
2947 + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
2948 + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
2949 + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
2950 +#define im_data5(p1) \
2951 + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
2952 + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
2953 + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
2954 + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
2955 +#define im_data6(p1) \
2956 + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
2957 + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
2958 + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
2959 + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
2960 +#define im_data7(p1) \
2961 + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
2962 + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
2963 + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
2964 + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
2965 +
2966 +// S-box data - 256 entries
2967 +
2968 +#define sb_data0(p1) \
2969 + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
2970 + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
2971 + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
2972 + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
2973 +#define sb_data1(p1) \
2974 + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
2975 + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
2976 + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
2977 + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
2978 +#define sb_data2(p1) \
2979 + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
2980 + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
2981 + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
2982 + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
2983 +#define sb_data3(p1) \
2984 + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
2985 + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
2986 + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
2987 + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
2988 +#define sb_data4(p1) \
2989 + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
2990 + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
2991 + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
2992 + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
2993 +#define sb_data5(p1) \
2994 + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
2995 + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
2996 + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
2997 + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
2998 +#define sb_data6(p1) \
2999 + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
3000 + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
3001 + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
3002 + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
3003 +#define sb_data7(p1) \
3004 + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
3005 + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
3006 + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
3007 + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
3008 +
3009 +// Inverse S-box data - 256 entries
3010 +
3011 +#define ib_data0(p1) \
3012 + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
3013 + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
3014 + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
3015 + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
3016 +#define ib_data1(p1) \
3017 + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
3018 + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
3019 + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
3020 + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
3021 +#define ib_data2(p1) \
3022 + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
3023 + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
3024 + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
3025 + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
3026 +#define ib_data3(p1) \
3027 + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
3028 + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
3029 + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
3030 + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
3031 +#define ib_data4(p1) \
3032 + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
3033 + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
3034 + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
3035 + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
3036 +#define ib_data5(p1) \
3037 + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
3038 + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
3039 + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
3040 + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
3041 +#define ib_data6(p1) \
3042 + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
3043 + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
3044 + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
3045 + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
3046 +#define ib_data7(p1) \
3047 + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
3048 + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
3049 + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
3050 + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
3051 +
3052 +// The rcon_table (needed for the key schedule)
3053 +//
3054 +// Here is original Dr Brian Gladman's source code:
3055 +// _rcon_tab:
3056 +// %assign x 1
3057 +// %rep 29
3058 +// dd x
3059 +// %assign x f2(x)
3060 +// %endrep
3061 +//
3062 +// Here is precomputed output (it's more portable this way):
3063 +
3064 + .section .rodata
3065 + .align ALIGN64BYTES
3066 +aes_rcon_tab:
3067 + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
3068 + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
3069 + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
3070 + .long 0xb3,0x7d,0xfa,0xef,0xc5
3071 +
3072 +// The forward xor tables
3073 +
3074 + .align ALIGN64BYTES
3075 +aes_ft_tab:
3076 + sb_data0(u0)
3077 + sb_data1(u0)
3078 + sb_data2(u0)
3079 + sb_data3(u0)
3080 + sb_data4(u0)
3081 + sb_data5(u0)
3082 + sb_data6(u0)
3083 + sb_data7(u0)
3084 +
3085 + sb_data0(u1)
3086 + sb_data1(u1)
3087 + sb_data2(u1)
3088 + sb_data3(u1)
3089 + sb_data4(u1)
3090 + sb_data5(u1)
3091 + sb_data6(u1)
3092 + sb_data7(u1)
3093 +
3094 + sb_data0(u2)
3095 + sb_data1(u2)
3096 + sb_data2(u2)
3097 + sb_data3(u2)
3098 + sb_data4(u2)
3099 + sb_data5(u2)
3100 + sb_data6(u2)
3101 + sb_data7(u2)
3102 +
3103 + sb_data0(u3)
3104 + sb_data1(u3)
3105 + sb_data2(u3)
3106 + sb_data3(u3)
3107 + sb_data4(u3)
3108 + sb_data5(u3)
3109 + sb_data6(u3)
3110 + sb_data7(u3)
3111 +
3112 + .align ALIGN64BYTES
3113 +aes_fl_tab:
3114 + sb_data0(w0)
3115 + sb_data1(w0)
3116 + sb_data2(w0)
3117 + sb_data3(w0)
3118 + sb_data4(w0)
3119 + sb_data5(w0)
3120 + sb_data6(w0)
3121 + sb_data7(w0)
3122 +
3123 + sb_data0(w1)
3124 + sb_data1(w1)
3125 + sb_data2(w1)
3126 + sb_data3(w1)
3127 + sb_data4(w1)
3128 + sb_data5(w1)
3129 + sb_data6(w1)
3130 + sb_data7(w1)
3131 +
3132 + sb_data0(w2)
3133 + sb_data1(w2)
3134 + sb_data2(w2)
3135 + sb_data3(w2)
3136 + sb_data4(w2)
3137 + sb_data5(w2)
3138 + sb_data6(w2)
3139 + sb_data7(w2)
3140 +
3141 + sb_data0(w3)
3142 + sb_data1(w3)
3143 + sb_data2(w3)
3144 + sb_data3(w3)
3145 + sb_data4(w3)
3146 + sb_data5(w3)
3147 + sb_data6(w3)
3148 + sb_data7(w3)
3149 +
3150 +// The inverse xor tables
3151 +
3152 + .align ALIGN64BYTES
3153 +aes_it_tab:
3154 + ib_data0(v0)
3155 + ib_data1(v0)
3156 + ib_data2(v0)
3157 + ib_data3(v0)
3158 + ib_data4(v0)
3159 + ib_data5(v0)
3160 + ib_data6(v0)
3161 + ib_data7(v0)
3162 +
3163 + ib_data0(v1)
3164 + ib_data1(v1)
3165 + ib_data2(v1)
3166 + ib_data3(v1)
3167 + ib_data4(v1)
3168 + ib_data5(v1)
3169 + ib_data6(v1)
3170 + ib_data7(v1)
3171 +
3172 + ib_data0(v2)
3173 + ib_data1(v2)
3174 + ib_data2(v2)
3175 + ib_data3(v2)
3176 + ib_data4(v2)
3177 + ib_data5(v2)
3178 + ib_data6(v2)
3179 + ib_data7(v2)
3180 +
3181 + ib_data0(v3)
3182 + ib_data1(v3)
3183 + ib_data2(v3)
3184 + ib_data3(v3)
3185 + ib_data4(v3)
3186 + ib_data5(v3)
3187 + ib_data6(v3)
3188 + ib_data7(v3)
3189 +
3190 + .align ALIGN64BYTES
3191 +aes_il_tab:
3192 + ib_data0(w0)
3193 + ib_data1(w0)
3194 + ib_data2(w0)
3195 + ib_data3(w0)
3196 + ib_data4(w0)
3197 + ib_data5(w0)
3198 + ib_data6(w0)
3199 + ib_data7(w0)
3200 +
3201 + ib_data0(w1)
3202 + ib_data1(w1)
3203 + ib_data2(w1)
3204 + ib_data3(w1)
3205 + ib_data4(w1)
3206 + ib_data5(w1)
3207 + ib_data6(w1)
3208 + ib_data7(w1)
3209 +
3210 + ib_data0(w2)
3211 + ib_data1(w2)
3212 + ib_data2(w2)
3213 + ib_data3(w2)
3214 + ib_data4(w2)
3215 + ib_data5(w2)
3216 + ib_data6(w2)
3217 + ib_data7(w2)
3218 +
3219 + ib_data0(w3)
3220 + ib_data1(w3)
3221 + ib_data2(w3)
3222 + ib_data3(w3)
3223 + ib_data4(w3)
3224 + ib_data5(w3)
3225 + ib_data6(w3)
3226 + ib_data7(w3)
3227 +
3228 +// The inverse mix column tables
3229 +
3230 + .align ALIGN64BYTES
3231 +aes_im_tab:
3232 + im_data0(v0)
3233 + im_data1(v0)
3234 + im_data2(v0)
3235 + im_data3(v0)
3236 + im_data4(v0)
3237 + im_data5(v0)
3238 + im_data6(v0)
3239 + im_data7(v0)
3240 +
3241 + im_data0(v1)
3242 + im_data1(v1)
3243 + im_data2(v1)
3244 + im_data3(v1)
3245 + im_data4(v1)
3246 + im_data5(v1)
3247 + im_data6(v1)
3248 + im_data7(v1)
3249 +
3250 + im_data0(v2)
3251 + im_data1(v2)
3252 + im_data2(v2)
3253 + im_data3(v2)
3254 + im_data4(v2)
3255 + im_data5(v2)
3256 + im_data6(v2)
3257 + im_data7(v2)
3258 +
3259 + im_data0(v3)
3260 + im_data1(v3)
3261 + im_data2(v3)
3262 + im_data3(v3)
3263 + im_data4(v3)
3264 + im_data5(v3)
3265 + im_data6(v3)
3266 + im_data7(v3)
3267 diff -urN linux-2.4.31-noloop/drivers/misc/aes-x86.S linux-2.4.31-AES/drivers/misc/aes-x86.S
3268 --- linux-2.4.31-noloop/drivers/misc/aes-x86.S 1970-01-01 02:00:00.000000000 +0200
3269 +++ linux-2.4.31-AES/drivers/misc/aes-x86.S 2005-06-01 20:59:27.000000000 +0300
3270 @@ -0,0 +1,922 @@
3271 +//
3272 +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
3273 +// All rights reserved.
3274 +//
3275 +// TERMS
3276 +//
3277 +// Redistribution and use in source and binary forms, with or without
3278 +// modification, are permitted subject to the following conditions:
3279 +//
3280 +// 1. Redistributions of source code must retain the above copyright
3281 +// notice, this list of conditions and the following disclaimer.
3282 +//
3283 +// 2. Redistributions in binary form must reproduce the above copyright
3284 +// notice, this list of conditions and the following disclaimer in the
3285 +// documentation and/or other materials provided with the distribution.
3286 +//
3287 +// 3. The copyright holder's name must not be used to endorse or promote
3288 +// any products derived from this software without his specific prior
3289 +// written permission.
3290 +//
3291 +// This software is provided 'as is' with no express or implied warranties
3292 +// of correctness or fitness for purpose.
3293 +
3294 +// Modified by Jari Ruusu, December 24 2001
3295 +// - Converted syntax to GNU CPP/assembler syntax
3296 +// - C programming interface converted back to "old" API
3297 +// - Minor portability cleanups and speed optimizations
3298 +
3299 +// Modified by Jari Ruusu, April 11 2002
3300 +// - Added above copyright and terms to resulting object code so that
3301 +// binary distributions can avoid legal trouble
3302 +
3303 +// An AES (Rijndael) implementation for x86 compatible processors. This
3304 +// version uses i386 instruction set but instruction scheduling is optimized
3305 +// for Pentium-2. This version only implements the standard AES block length
3306 +// (128 bits, 16 bytes). This code does not preserve the eax, ecx or edx
3307 +// registers or the artihmetic status flags. However, the ebx, esi, edi, and
3308 +// ebp registers are preserved across calls.
3309 +
3310 +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
3311 +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
3312 +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
3313 +
3314 +#if defined(USE_UNDERLINE)
3315 +# define aes_set_key _aes_set_key
3316 +# define aes_encrypt _aes_encrypt
3317 +# define aes_decrypt _aes_decrypt
3318 +#endif
3319 +#if !defined(ALIGN32BYTES)
3320 +# define ALIGN32BYTES 32
3321 +#endif
3322 +
3323 + .file "aes-x86.S"
3324 + .globl aes_set_key
3325 + .globl aes_encrypt
3326 + .globl aes_decrypt
3327 +
3328 + .text
3329 +copyright:
3330 + .ascii " \000"
3331 + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
3332 + .ascii "All rights reserved.\000"
3333 + .ascii " \000"
3334 + .ascii "TERMS\000"
3335 + .ascii " \000"
3336 + .ascii " Redistribution and use in source and binary forms, with or without\000"
3337 + .ascii " modification, are permitted subject to the following conditions:\000"
3338 + .ascii " \000"
3339 + .ascii " 1. Redistributions of source code must retain the above copyright\000"
3340 + .ascii " notice, this list of conditions and the following disclaimer.\000"
3341 + .ascii " \000"
3342 + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
3343 + .ascii " notice, this list of conditions and the following disclaimer in the\000"
3344 + .ascii " documentation and/or other materials provided with the distribution.\000"
3345 + .ascii " \000"
3346 + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
3347 + .ascii " any products derived from this software without his specific prior\000"
3348 + .ascii " written permission.\000"
3349 + .ascii " \000"
3350 + .ascii " This software is provided 'as is' with no express or implied warranties\000"
3351 + .ascii " of correctness or fitness for purpose.\000"
3352 + .ascii " \000"
3353 +
3354 +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
3355 +
3356 +// offsets to parameters with one register pushed onto stack
3357 +
3358 +#define ctx 8 // AES context structure
3359 +#define in_blk 12 // input byte array address parameter
3360 +#define out_blk 16 // output byte array address parameter
3361 +
3362 +// offsets in context structure
3363 +
3364 +#define nkey 0 // key length, size 4
3365 +#define nrnd 4 // number of rounds, size 4
3366 +#define ekey 8 // encryption key schedule base address, size 256
3367 +#define dkey 264 // decryption key schedule base address, size 256
3368 +
3369 +// This macro performs a forward encryption cycle. It is entered with
3370 +// the first previous round column values in %eax, %ebx, %esi and %edi and
3371 +// exits with the final values in the same registers.
3372 +
3373 +#define fwd_rnd(p1,p2) \
3374 + mov %ebx,(%esp) ;\
3375 + movzbl %al,%edx ;\
3376 + mov %eax,%ecx ;\
3377 + mov p2(%ebp),%eax ;\
3378 + mov %edi,4(%esp) ;\
3379 + mov p2+12(%ebp),%edi ;\
3380 + xor p1(,%edx,4),%eax ;\
3381 + movzbl %ch,%edx ;\
3382 + shr $16,%ecx ;\
3383 + mov p2+4(%ebp),%ebx ;\
3384 + xor p1+tlen(,%edx,4),%edi ;\
3385 + movzbl %cl,%edx ;\
3386 + movzbl %ch,%ecx ;\
3387 + xor p1+3*tlen(,%ecx,4),%ebx ;\
3388 + mov %esi,%ecx ;\
3389 + mov p1+2*tlen(,%edx,4),%esi ;\
3390 + movzbl %cl,%edx ;\
3391 + xor p1(,%edx,4),%esi ;\
3392 + movzbl %ch,%edx ;\
3393 + shr $16,%ecx ;\
3394 + xor p1+tlen(,%edx,4),%ebx ;\
3395 + movzbl %cl,%edx ;\
3396 + movzbl %ch,%ecx ;\
3397 + xor p1+2*tlen(,%edx,4),%eax ;\
3398 + mov (%esp),%edx ;\
3399 + xor p1+3*tlen(,%ecx,4),%edi ;\
3400 + movzbl %dl,%ecx ;\
3401 + xor p2+8(%ebp),%esi ;\
3402 + xor p1(,%ecx,4),%ebx ;\
3403 + movzbl %dh,%ecx ;\
3404 + shr $16,%edx ;\
3405 + xor p1+tlen(,%ecx,4),%eax ;\
3406 + movzbl %dl,%ecx ;\
3407 + movzbl %dh,%edx ;\
3408 + xor p1+2*tlen(,%ecx,4),%edi ;\
3409 + mov 4(%esp),%ecx ;\
3410 + xor p1+3*tlen(,%edx,4),%esi ;\
3411 + movzbl %cl,%edx ;\
3412 + xor p1(,%edx,4),%edi ;\
3413 + movzbl %ch,%edx ;\
3414 + shr $16,%ecx ;\
3415 + xor p1+tlen(,%edx,4),%esi ;\
3416 + movzbl %cl,%edx ;\
3417 + movzbl %ch,%ecx ;\
3418 + xor p1+2*tlen(,%edx,4),%ebx ;\
3419 + xor p1+3*tlen(,%ecx,4),%eax
3420 +
3421 +// This macro performs an inverse encryption cycle. It is entered with
3422 +// the first previous round column values in %eax, %ebx, %esi and %edi and
3423 +// exits with the final values in the same registers.
3424 +
3425 +#define inv_rnd(p1,p2) \
3426 + movzbl %al,%edx ;\
3427 + mov %ebx,(%esp) ;\
3428 + mov %eax,%ecx ;\
3429 + mov p2(%ebp),%eax ;\
3430 + mov %edi,4(%esp) ;\
3431 + mov p2+4(%ebp),%ebx ;\
3432 + xor p1(,%edx,4),%eax ;\
3433 + movzbl %ch,%edx ;\
3434 + shr $16,%ecx ;\
3435 + mov p2+12(%ebp),%edi ;\
3436 + xor p1+tlen(,%edx,4),%ebx ;\
3437 + movzbl %cl,%edx ;\
3438 + movzbl %ch,%ecx ;\
3439 + xor p1+3*tlen(,%ecx,4),%edi ;\
3440 + mov %esi,%ecx ;\
3441 + mov p1+2*tlen(,%edx,4),%esi ;\
3442 + movzbl %cl,%edx ;\
3443 + xor p1(,%edx,4),%esi ;\
3444 + movzbl %ch,%edx ;\
3445 + shr $16,%ecx ;\
3446 + xor p1+tlen(,%edx,4),%edi ;\
3447 + movzbl %cl,%edx ;\
3448 + movzbl %ch,%ecx ;\
3449 + xor p1+2*tlen(,%edx,4),%eax ;\
3450 + mov (%esp),%edx ;\
3451 + xor p1+3*tlen(,%ecx,4),%ebx ;\
3452 + movzbl %dl,%ecx ;\
3453 + xor p2+8(%ebp),%esi ;\
3454 + xor p1(,%ecx,4),%ebx ;\
3455 + movzbl %dh,%ecx ;\
3456 + shr $16,%edx ;\
3457 + xor p1+tlen(,%ecx,4),%esi ;\
3458 + movzbl %dl,%ecx ;\
3459 + movzbl %dh,%edx ;\
3460 + xor p1+2*tlen(,%ecx,4),%edi ;\
3461 + mov 4(%esp),%ecx ;\
3462 + xor p1+3*tlen(,%edx,4),%eax ;\
3463 + movzbl %cl,%edx ;\
3464 + xor p1(,%edx,4),%edi ;\
3465 + movzbl %ch,%edx ;\
3466 + shr $16,%ecx ;\
3467 + xor p1+tlen(,%edx,4),%eax ;\
3468 + movzbl %cl,%edx ;\
3469 + movzbl %ch,%ecx ;\
3470 + xor p1+2*tlen(,%edx,4),%ebx ;\
3471 + xor p1+3*tlen(,%ecx,4),%esi
3472 +
3473 +// AES (Rijndael) Encryption Subroutine
3474 +
3475 + .text
3476 + .align ALIGN32BYTES
3477 +aes_encrypt:
3478 + push %ebp
3479 + mov ctx(%esp),%ebp // pointer to context
3480 + mov in_blk(%esp),%ecx
3481 + push %ebx
3482 + push %esi
3483 + push %edi
3484 + mov nrnd(%ebp),%edx // number of rounds
3485 + lea ekey+16(%ebp),%ebp // key pointer
3486 +
3487 +// input four columns and xor in first round key
3488 +
3489 + mov (%ecx),%eax
3490 + mov 4(%ecx),%ebx
3491 + mov 8(%ecx),%esi
3492 + mov 12(%ecx),%edi
3493 + xor -16(%ebp),%eax
3494 + xor -12(%ebp),%ebx
3495 + xor -8(%ebp),%esi
3496 + xor -4(%ebp),%edi
3497 +
3498 + sub $8,%esp // space for register saves on stack
3499 +
3500 + sub $10,%edx
3501 + je aes_15
3502 + add $32,%ebp
3503 + sub $2,%edx
3504 + je aes_13
3505 + add $32,%ebp
3506 +
3507 + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
3508 + fwd_rnd(aes_ft_tab,-48)
3509 +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
3510 + fwd_rnd(aes_ft_tab,-16)
3511 +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
3512 + fwd_rnd(aes_ft_tab,16)
3513 + fwd_rnd(aes_ft_tab,32)
3514 + fwd_rnd(aes_ft_tab,48)
3515 + fwd_rnd(aes_ft_tab,64)
3516 + fwd_rnd(aes_ft_tab,80)
3517 + fwd_rnd(aes_ft_tab,96)
3518 + fwd_rnd(aes_ft_tab,112)
3519 + fwd_rnd(aes_ft_tab,128)
3520 + fwd_rnd(aes_fl_tab,144) // last round uses a different table
3521 +
3522 +// move final values to the output array.
3523 +
3524 + mov out_blk+20(%esp),%ebp
3525 + add $8,%esp
3526 + mov %eax,(%ebp)
3527 + mov %ebx,4(%ebp)
3528 + mov %esi,8(%ebp)
3529 + mov %edi,12(%ebp)
3530 + pop %edi
3531 + pop %esi
3532 + pop %ebx
3533 + pop %ebp
3534 + ret
3535 +
3536 +
3537 +// AES (Rijndael) Decryption Subroutine
3538 +
3539 + .align ALIGN32BYTES
3540 +aes_decrypt:
3541 + push %ebp
3542 + mov ctx(%esp),%ebp // pointer to context
3543 + mov in_blk(%esp),%ecx
3544 + push %ebx
3545 + push %esi
3546 + push %edi
3547 + mov nrnd(%ebp),%edx // number of rounds
3548 + lea dkey+16(%ebp),%ebp // key pointer
3549 +
3550 +// input four columns and xor in first round key
3551 +
3552 + mov (%ecx),%eax
3553 + mov 4(%ecx),%ebx
3554 + mov 8(%ecx),%esi
3555 + mov 12(%ecx),%edi
3556 + xor -16(%ebp),%eax
3557 + xor -12(%ebp),%ebx
3558 + xor -8(%ebp),%esi
3559 + xor -4(%ebp),%edi
3560 +
3561 + sub $8,%esp // space for register saves on stack
3562 +
3563 + sub $10,%edx
3564 + je aes_25
3565 + add $32,%ebp
3566 + sub $2,%edx
3567 + je aes_23
3568 + add $32,%ebp
3569 +
3570 + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
3571 + inv_rnd(aes_it_tab,-48)
3572 +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
3573 + inv_rnd(aes_it_tab,-16)
3574 +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
3575 + inv_rnd(aes_it_tab,16)
3576 + inv_rnd(aes_it_tab,32)
3577 + inv_rnd(aes_it_tab,48)
3578 + inv_rnd(aes_it_tab,64)
3579 + inv_rnd(aes_it_tab,80)
3580 + inv_rnd(aes_it_tab,96)
3581 + inv_rnd(aes_it_tab,112)
3582 + inv_rnd(aes_it_tab,128)
3583 + inv_rnd(aes_il_tab,144) // last round uses a different table
3584 +
3585 +// move final values to the output array.
3586 +
3587 + mov out_blk+20(%esp),%ebp
3588 + add $8,%esp
3589 + mov %eax,(%ebp)
3590 + mov %ebx,4(%ebp)
3591 + mov %esi,8(%ebp)
3592 + mov %edi,12(%ebp)
3593 + pop %edi
3594 + pop %esi
3595 + pop %ebx
3596 + pop %ebp
3597 + ret
3598 +
3599 +// AES (Rijndael) Key Schedule Subroutine
3600 +
3601 +// input/output parameters
3602 +
3603 +#define aes_cx 12 // AES context
3604 +#define in_key 16 // key input array address
3605 +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
3606 +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
3607 +
3608 +// offsets for locals
3609 +
3610 +#define cnt -4
3611 +#define slen 8
3612 +
3613 +// This macro performs a column mixing operation on an input 32-bit
3614 +// word to give a 32-bit result. It uses each of the 4 bytes in the
3615 +// the input column to index 4 different tables of 256 32-bit words
3616 +// that are xored together to form the output value.
3617 +
3618 +#define mix_col(p1) \
3619 + movzbl %bl,%ecx ;\
3620 + mov p1(,%ecx,4),%eax ;\
3621 + movzbl %bh,%ecx ;\
3622 + ror $16,%ebx ;\
3623 + xor p1+tlen(,%ecx,4),%eax ;\
3624 + movzbl %bl,%ecx ;\
3625 + xor p1+2*tlen(,%ecx,4),%eax ;\
3626 + movzbl %bh,%ecx ;\
3627 + xor p1+3*tlen(,%ecx,4),%eax
3628 +
3629 +// Key Schedule Macros
3630 +
3631 +#define ksc4(p1) \
3632 + rol $24,%ebx ;\
3633 + mix_col(aes_fl_tab) ;\
3634 + ror $8,%ebx ;\
3635 + xor 4*p1+aes_rcon_tab,%eax ;\
3636 + xor %eax,%esi ;\
3637 + xor %esi,%ebp ;\
3638 + mov %esi,16*p1(%edi) ;\
3639 + mov %ebp,16*p1+4(%edi) ;\
3640 + xor %ebp,%edx ;\
3641 + xor %edx,%ebx ;\
3642 + mov %edx,16*p1+8(%edi) ;\
3643 + mov %ebx,16*p1+12(%edi)
3644 +
3645 +#define ksc6(p1) \
3646 + rol $24,%ebx ;\
3647 + mix_col(aes_fl_tab) ;\
3648 + ror $8,%ebx ;\
3649 + xor 4*p1+aes_rcon_tab,%eax ;\
3650 + xor 24*p1-24(%edi),%eax ;\
3651 + mov %eax,24*p1(%edi) ;\
3652 + xor 24*p1-20(%edi),%eax ;\
3653 + mov %eax,24*p1+4(%edi) ;\
3654 + xor %eax,%esi ;\
3655 + xor %esi,%ebp ;\
3656 + mov %esi,24*p1+8(%edi) ;\
3657 + mov %ebp,24*p1+12(%edi) ;\
3658 + xor %ebp,%edx ;\
3659 + xor %edx,%ebx ;\
3660 + mov %edx,24*p1+16(%edi) ;\
3661 + mov %ebx,24*p1+20(%edi)
3662 +
3663 +#define ksc8(p1) \
3664 + rol $24,%ebx ;\
3665 + mix_col(aes_fl_tab) ;\
3666 + ror $8,%ebx ;\
3667 + xor 4*p1+aes_rcon_tab,%eax ;\
3668 + xor 32*p1-32(%edi),%eax ;\
3669 + mov %eax,32*p1(%edi) ;\
3670 + xor 32*p1-28(%edi),%eax ;\
3671 + mov %eax,32*p1+4(%edi) ;\
3672 + xor 32*p1-24(%edi),%eax ;\
3673 + mov %eax,32*p1+8(%edi) ;\
3674 + xor 32*p1-20(%edi),%eax ;\
3675 + mov %eax,32*p1+12(%edi) ;\
3676 + push %ebx ;\
3677 + mov %eax,%ebx ;\
3678 + mix_col(aes_fl_tab) ;\
3679 + pop %ebx ;\
3680 + xor %eax,%esi ;\
3681 + xor %esi,%ebp ;\
3682 + mov %esi,32*p1+16(%edi) ;\
3683 + mov %ebp,32*p1+20(%edi) ;\
3684 + xor %ebp,%edx ;\
3685 + xor %edx,%ebx ;\
3686 + mov %edx,32*p1+24(%edi) ;\
3687 + mov %ebx,32*p1+28(%edi)
3688 +
3689 + .align ALIGN32BYTES
3690 +aes_set_key:
3691 + pushfl
3692 + push %ebp
3693 + mov %esp,%ebp
3694 + sub $slen,%esp
3695 + push %ebx
3696 + push %esi
3697 + push %edi
3698 +
3699 + mov aes_cx(%ebp),%edx // edx -> AES context
3700 +
3701 + mov key_ln(%ebp),%ecx // key length
3702 + cmpl $128,%ecx
3703 + jb aes_30
3704 + shr $3,%ecx
3705 +aes_30: cmpl $32,%ecx
3706 + je aes_32
3707 + cmpl $24,%ecx
3708 + je aes_32
3709 + mov $16,%ecx
3710 +aes_32: shr $2,%ecx
3711 + mov %ecx,nkey(%edx)
3712 +
3713 + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
3714 + mov %eax,nrnd(%edx)
3715 +
3716 + mov in_key(%ebp),%esi // key input array
3717 + lea ekey(%edx),%edi // key position in AES context
3718 + cld
3719 + push %ebp
3720 + mov %ecx,%eax // save key length in eax
3721 + rep ; movsl // words in the key schedule
3722 + mov -4(%esi),%ebx // put some values in registers
3723 + mov -8(%esi),%edx // to allow faster code
3724 + mov -12(%esi),%ebp
3725 + mov -16(%esi),%esi
3726 +
3727 + cmpl $4,%eax // jump on key size
3728 + je aes_36
3729 + cmpl $6,%eax
3730 + je aes_35
3731 +
3732 + ksc8(0)
3733 + ksc8(1)
3734 + ksc8(2)
3735 + ksc8(3)
3736 + ksc8(4)
3737 + ksc8(5)
3738 + ksc8(6)
3739 + jmp aes_37
3740 +aes_35: ksc6(0)
3741 + ksc6(1)
3742 + ksc6(2)
3743 + ksc6(3)
3744 + ksc6(4)
3745 + ksc6(5)
3746 + ksc6(6)
3747 + ksc6(7)
3748 + jmp aes_37
3749 +aes_36: ksc4(0)
3750 + ksc4(1)
3751 + ksc4(2)
3752 + ksc4(3)
3753 + ksc4(4)
3754 + ksc4(5)
3755 + ksc4(6)
3756 + ksc4(7)
3757 + ksc4(8)
3758 + ksc4(9)
3759 +aes_37: pop %ebp
3760 + mov aes_cx(%ebp),%edx // edx -> AES context
3761 + cmpl $0,ed_flg(%ebp)
3762 + jne aes_39
3763 +
3764 +// compile decryption key schedule from encryption schedule - reverse
3765 +// order and do mix_column operation on round keys except first and last
3766 +
3767 + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
3768 + shl $2,%eax
3769 + lea dkey(%edx,%eax,4),%edi
3770 + lea ekey(%edx),%esi // kf = cx->e_key
3771 +
3772 + movsl // copy first round key (unmodified)
3773 + movsl
3774 + movsl
3775 + movsl
3776 + sub $32,%edi
3777 + movl $1,cnt(%ebp)
3778 +aes_38: // do mix column on each column of
3779 + lodsl // each round key
3780 + mov %eax,%ebx
3781 + mix_col(aes_im_tab)
3782 + stosl
3783 + lodsl
3784 + mov %eax,%ebx
3785 + mix_col(aes_im_tab)
3786 + stosl
3787 + lodsl
3788 + mov %eax,%ebx
3789 + mix_col(aes_im_tab)
3790 + stosl
3791 + lodsl
3792 + mov %eax,%ebx
3793 + mix_col(aes_im_tab)
3794 + stosl
3795 + sub $32,%edi
3796 +
3797 + incl cnt(%ebp)
3798 + mov cnt(%ebp),%eax
3799 + cmp nrnd(%edx),%eax
3800 + jb aes_38
3801 +
3802 + movsl // copy last round key (unmodified)
3803 + movsl
3804 + movsl
3805 + movsl
3806 +aes_39: pop %edi
3807 + pop %esi
3808 + pop %ebx
3809 + mov %ebp,%esp
3810 + pop %ebp
3811 + popfl
3812 + ret
3813 +
3814 +
3815 +// finite field multiplies by {02}, {04} and {08}
3816 +
3817 +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
3818 +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
3819 +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
3820 +
3821 +// finite field multiplies required in table generation
3822 +
3823 +#define f3(x) (f2(x) ^ x)
3824 +#define f9(x) (f8(x) ^ x)
3825 +#define fb(x) (f8(x) ^ f2(x) ^ x)
3826 +#define fd(x) (f8(x) ^ f4(x) ^ x)
3827 +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
3828 +
3829 +// These defines generate the forward table entries
3830 +
3831 +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
3832 +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
3833 +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
3834 +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
3835 +
3836 +// These defines generate the inverse table entries
3837 +
3838 +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
3839 +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
3840 +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
3841 +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
3842 +
3843 +// These defines generate entries for the last round tables
3844 +
3845 +#define w0(x) (x)
3846 +#define w1(x) (x << 8)
3847 +#define w2(x) (x << 16)
3848 +#define w3(x) (x << 24)
3849 +
3850 +// macro to generate inverse mix column tables (needed for the key schedule)
3851 +
3852 +#define im_data0(p1) \
3853 + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
3854 + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
3855 + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
3856 + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
3857 +#define im_data1(p1) \
3858 + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
3859 + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
3860 + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
3861 + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
3862 +#define im_data2(p1) \
3863 + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
3864 + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
3865 + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
3866 + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
3867 +#define im_data3(p1) \
3868 + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
3869 + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
3870 + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
3871 + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
3872 +#define im_data4(p1) \
3873 + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
3874 + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
3875 + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
3876 + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
3877 +#define im_data5(p1) \
3878 + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
3879 + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
3880 + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
3881 + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
3882 +#define im_data6(p1) \
3883 + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
3884 + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
3885 + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
3886 + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
3887 +#define im_data7(p1) \
3888 + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
3889 + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
3890 + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
3891 + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
3892 +
3893 +// S-box data - 256 entries
3894 +
3895 +#define sb_data0(p1) \
3896 + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
3897 + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
3898 + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
3899 + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
3900 +#define sb_data1(p1) \
3901 + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
3902 + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
3903 + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
3904 + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
3905 +#define sb_data2(p1) \
3906 + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
3907 + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
3908 + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
3909 + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
3910 +#define sb_data3(p1) \
3911 + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
3912 + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
3913 + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
3914 + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
3915 +#define sb_data4(p1) \
3916 + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
3917 + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
3918 + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
3919 + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
3920 +#define sb_data5(p1) \
3921 + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
3922 + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
3923 + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
3924 + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
3925 +#define sb_data6(p1) \
3926 + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
3927 + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
3928 + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
3929 + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
3930 +#define sb_data7(p1) \
3931 + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
3932 + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
3933 + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
3934 + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
3935 +
3936 +// Inverse S-box data - 256 entries
3937 +
3938 +#define ib_data0(p1) \
3939 + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
3940 + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
3941 + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
3942 + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
3943 +#define ib_data1(p1) \
3944 + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
3945 + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
3946 + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
3947 + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
3948 +#define ib_data2(p1) \
3949 + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
3950 + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
3951 + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
3952 + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
3953 +#define ib_data3(p1) \
3954 + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
3955 + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
3956 + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
3957 + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
3958 +#define ib_data4(p1) \
3959 + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
3960 + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
3961 + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
3962 + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
3963 +#define ib_data5(p1) \
3964 + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
3965 + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
3966 + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
3967 + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
3968 +#define ib_data6(p1) \
3969 + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
3970 + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
3971 + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
3972 + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
3973 +#define ib_data7(p1) \
3974 + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
3975 + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
3976 + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
3977 + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
3978 +
3979 +// The rcon_table (needed for the key schedule)
3980 +//
3981 +// Here is original Dr Brian Gladman's source code:
3982 +// _rcon_tab:
3983 +// %assign x 1
3984 +// %rep 29
3985 +// dd x
3986 +// %assign x f2(x)
3987 +// %endrep
3988 +//
3989 +// Here is precomputed output (it's more portable this way):
3990 +
3991 + .align ALIGN32BYTES
3992 +aes_rcon_tab:
3993 + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
3994 + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
3995 + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
3996 + .long 0xb3,0x7d,0xfa,0xef,0xc5
3997 +
3998 +// The forward xor tables
3999 +
4000 + .align ALIGN32BYTES
4001 +aes_ft_tab:
4002 + sb_data0(u0)
4003 + sb_data1(u0)
4004 + sb_data2(u0)
4005 + sb_data3(u0)
4006 + sb_data4(u0)
4007 + sb_data5(u0)
4008 + sb_data6(u0)
4009 + sb_data7(u0)
4010 +
4011 + sb_data0(u1)
4012 + sb_data1(u1)
4013 + sb_data2(u1)
4014 + sb_data3(u1)
4015 + sb_data4(u1)
4016 + sb_data5(u1)
4017 + sb_data6(u1)
4018 + sb_data7(u1)
4019 +
4020 + sb_data0(u2)
4021 + sb_data1(u2)
4022 + sb_data2(u2)
4023 + sb_data3(u2)
4024 + sb_data4(u2)
4025 + sb_data5(u2)
4026 + sb_data6(u2)
4027 + sb_data7(u2)
4028 +
4029 + sb_data0(u3)
4030 + sb_data1(u3)
4031 + sb_data2(u3)
4032 + sb_data3(u3)
4033 + sb_data4(u3)
4034 + sb_data5(u3)
4035 + sb_data6(u3)
4036 + sb_data7(u3)
4037 +
4038 + .align ALIGN32BYTES
4039 +aes_fl_tab:
4040 + sb_data0(w0)
4041 + sb_data1(w0)
4042 + sb_data2(w0)
4043 + sb_data3(w0)
4044 + sb_data4(w0)
4045 + sb_data5(w0)
4046 + sb_data6(w0)
4047 + sb_data7(w0)
4048 +
4049 + sb_data0(w1)
4050 + sb_data1(w1)
4051 + sb_data2(w1)
4052 + sb_data3(w1)
4053 + sb_data4(w1)
4054 + sb_data5(w1)
4055 + sb_data6(w1)
4056 + sb_data7(w1)
4057 +
4058 + sb_data0(w2)
4059 + sb_data1(w2)
4060 + sb_data2(w2)
4061 + sb_data3(w2)
4062 + sb_data4(w2)
4063 + sb_data5(w2)
4064 + sb_data6(w2)
4065 + sb_data7(w2)
4066 +
4067 + sb_data0(w3)
4068 + sb_data1(w3)
4069 + sb_data2(w3)
4070 + sb_data3(w3)
4071 + sb_data4(w3)
4072 + sb_data5(w3)
4073 + sb_data6(w3)
4074 + sb_data7(w3)
4075 +
4076 +// The inverse xor tables
4077 +
4078 + .align ALIGN32BYTES
4079 +aes_it_tab:
4080 + ib_data0(v0)
4081 + ib_data1(v0)
4082 + ib_data2(v0)
4083 + ib_data3(v0)
4084 + ib_data4(v0)
4085 + ib_data5(v0)
4086 + ib_data6(v0)
4087 + ib_data7(v0)
4088 +
4089 + ib_data0(v1)
4090 + ib_data1(v1)
4091 + ib_data2(v1)
4092 + ib_data3(v1)
4093 + ib_data4(v1)
4094 + ib_data5(v1)
4095 + ib_data6(v1)
4096 + ib_data7(v1)
4097 +
4098 + ib_data0(v2)
4099 + ib_data1(v2)
4100 + ib_data2(v2)
4101 + ib_data3(v2)
4102 + ib_data4(v2)
4103 + ib_data5(v2)
4104 + ib_data6(v2)
4105 + ib_data7(v2)
4106 +
4107 + ib_data0(v3)
4108 + ib_data1(v3)
4109 + ib_data2(v3)
4110 + ib_data3(v3)
4111 + ib_data4(v3)
4112 + ib_data5(v3)
4113 + ib_data6(v3)
4114 + ib_data7(v3)
4115 +
4116 + .align ALIGN32BYTES
4117 +aes_il_tab:
4118 + ib_data0(w0)
4119 + ib_data1(w0)
4120 + ib_data2(w0)
4121 + ib_data3(w0)
4122 + ib_data4(w0)
4123 + ib_data5(w0)
4124 + ib_data6(w0)
4125 + ib_data7(w0)
4126 +
4127 + ib_data0(w1)
4128 + ib_data1(w1)
4129 + ib_data2(w1)
4130 + ib_data3(w1)
4131 + ib_data4(w1)
4132 + ib_data5(w1)
4133 + ib_data6(w1)
4134 + ib_data7(w1)
4135 +
4136 + ib_data0(w2)
4137 + ib_data1(w2)
4138 + ib_data2(w2)
4139 + ib_data3(w2)
4140 + ib_data4(w2)
4141 + ib_data5(w2)
4142 + ib_data6(w2)
4143 + ib_data7(w2)
4144 +
4145 + ib_data0(w3)
4146 + ib_data1(w3)
4147 + ib_data2(w3)
4148 + ib_data3(w3)
4149 + ib_data4(w3)
4150 + ib_data5(w3)
4151 + ib_data6(w3)
4152 + ib_data7(w3)
4153 +
4154 +// The inverse mix column tables
4155 +
4156 + .align ALIGN32BYTES
4157 +aes_im_tab:
4158 + im_data0(v0)
4159 + im_data1(v0)
4160 + im_data2(v0)
4161 + im_data3(v0)
4162 + im_data4(v0)
4163 + im_data5(v0)
4164 + im_data6(v0)
4165 + im_data7(v0)
4166 +
4167 + im_data0(v1)
4168 + im_data1(v1)
4169 + im_data2(v1)
4170 + im_data3(v1)
4171 + im_data4(v1)
4172 + im_data5(v1)
4173 + im_data6(v1)
4174 + im_data7(v1)
4175 +
4176 + im_data0(v2)
4177 + im_data1(v2)
4178 + im_data2(v2)
4179 + im_data3(v2)
4180 + im_data4(v2)
4181 + im_data5(v2)
4182 + im_data6(v2)
4183 + im_data7(v2)
4184 +
4185 + im_data0(v3)
4186 + im_data1(v3)
4187 + im_data2(v3)
4188 + im_data3(v3)
4189 + im_data4(v3)
4190 + im_data5(v3)
4191 + im_data6(v3)
4192 + im_data7(v3)
4193 diff -urN linux-2.4.31-noloop/drivers/misc/aes.c linux-2.4.31-AES/drivers/misc/aes.c
4194 --- linux-2.4.31-noloop/drivers/misc/aes.c 1970-01-01 02:00:00.000000000 +0200
4195 +++ linux-2.4.31-AES/drivers/misc/aes.c 2005-06-01 20:59:27.000000000 +0300
4196 @@ -0,0 +1,1479 @@
4197 +// I retain copyright in this code but I encourage its free use provided
4198 +// that I don't carry any responsibility for the results. I am especially
4199 +// happy to see it used in free and open source software. If you do use
4200 +// it I would appreciate an acknowledgement of its origin in the code or
4201 +// the product that results and I would also appreciate knowing a little
4202 +// about the use to which it is being put. I am grateful to Frank Yellin
4203 +// for some ideas that are used in this implementation.
4204 +//
4205 +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
4206 +//
4207 +// This is an implementation of the AES encryption algorithm (Rijndael)
4208 +// designed by Joan Daemen and Vincent Rijmen. This version is designed
4209 +// to provide both fixed and dynamic block and key lengths and can also
4210 +// run with either big or little endian internal byte order (see aes.h).
4211 +// It inputs block and key lengths in bytes with the legal values being
4212 +// 16, 24 and 32.
4213 +
4214 +/*
4215 + * Modified by Jari Ruusu, May 1 2001
4216 + * - Fixed some compile warnings, code was ok but gcc warned anyway.
4217 + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
4218 + * - Major name space cleanup: Names visible to outside now begin
4219 + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
4220 + * - Removed C++ and DLL support as part of name space cleanup.
4221 + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
4222 + * - Merged precomputed constant tables to aes.c file.
4223 + * - Removed data alignment restrictions for portability reasons.
4224 + * - Made block and key lengths accept bit count (128/192/256)
4225 + * as well byte count (16/24/32).
4226 + * - Removed all error checks. This change also eliminated the need
4227 + * to preinitialize the context struct to zero.
4228 + * - Removed some totally unused constants.
4229 + */
4230 +/*
4231 + * Modified by Jari Ruusu, April 21 2004
4232 + * - Added back code that avoids byte swaps on big endian boxes.
4233 + */
4234 +
4235 +#include "aes.h"
4236 +
4237 +// CONFIGURATION OPTIONS (see also aes.h)
4238 +//
4239 +// 1. Define UNROLL for full loop unrolling in encryption and decryption.
4240 +// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
4241 +// 3. Define FIXED_TABLES for compiled rather than dynamic tables.
4242 +// 4. Define FF_TABLES to use tables for field multiplies and inverses.
4243 +// Do not enable this without understanding stack space requirements.
4244 +// 5. Define ARRAYS to use arrays to hold the local state block. If this
4245 +// is not defined, individually declared 32-bit words are used.
4246 +// 6. Define FAST_VARIABLE if a high speed variable block implementation
4247 +// is needed (essentially three separate fixed block size code sequences)
4248 +// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
4249 +// version using 1 table (2 kbytes of table space) or 4 tables (8
4250 +// kbytes of table space) for higher speed.
4251 +// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
4252 +// increase by using tables for the last rounds but with more table
4253 +// space (2 or 8 kbytes extra).
4254 +// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
4255 +// slower version is provided.
4256 +// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
4257 +// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
4258 +
4259 +#define UNROLL
4260 +//#define PARTIAL_UNROLL
4261 +
4262 +#define FIXED_TABLES
4263 +//#define FF_TABLES
4264 +//#define ARRAYS
4265 +#define FAST_VARIABLE
4266 +
4267 +//#define ONE_TABLE
4268 +#define FOUR_TABLES
4269 +
4270 +//#define ONE_LR_TABLE
4271 +#define FOUR_LR_TABLES
4272 +
4273 +//#define ONE_IM_TABLE
4274 +#define FOUR_IM_TABLES
4275 +
4276 +#if defined(UNROLL) && defined (PARTIAL_UNROLL)
4277 +#error both UNROLL and PARTIAL_UNROLL are defined
4278 +#endif
4279 +
4280 +#if defined(ONE_TABLE) && defined (FOUR_TABLES)
4281 +#error both ONE_TABLE and FOUR_TABLES are defined
4282 +#endif
4283 +
4284 +#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
4285 +#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
4286 +#endif
4287 +
4288 +#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
4289 +#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
4290 +#endif
4291 +
4292 +#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
4293 +#error an illegal block size has been specified
4294 +#endif
4295 +
4296 +/* INTERNAL_BYTE_ORDER: 0=unknown, 1=little endian, 2=big endian */
4297 +#if defined(INTERNAL_BYTE_ORDER)
4298 +#elif defined(__i386__)||defined(__i386)||defined(__x86_64__)||defined(__x86_64)||defined(__amd64__)||defined(__amd64)||defined(__AMD64__)||defined(__AMD64)
4299 +# define INTERNAL_BYTE_ORDER 1
4300 +# undef DATA_ALWAYS_ALIGNED
4301 +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
4302 +#elif defined(__ppc__)||defined(__ppc)||defined(__PPC__)||defined(__PPC)||defined(__powerpc__)||defined(__powerpc)||defined(__POWERPC__)||defined(__POWERPC)||defined(__PowerPC__)||defined(__PowerPC)||defined(__ppc64__)||defined(__ppc64)||defined(__PPC64__)||defined(__PPC64)||defined(__powerpc64__)||defined(__powerpc64)||defined(__s390__)||defined(__s390)
4303 +# define INTERNAL_BYTE_ORDER 2
4304 +# undef DATA_ALWAYS_ALIGNED
4305 +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
4306 +#elif defined(__alpha__)||defined(__alpha)||defined(__ia64__)||defined(__ia64)
4307 +# define INTERNAL_BYTE_ORDER 1
4308 +#elif defined(__hppa__)||defined(__hppa)||defined(__HPPA__)||defined(__HPPA)||defined(__parisc__)||defined(__parisc)||defined(__sparc__)||defined(__sparc)||defined(__sparc_v9__)||defined(__sparc_v9)||defined(__sparc64__)||defined(__sparc64)||defined(__mc68000__)||defined(__mc68000)
4309 +# define INTERNAL_BYTE_ORDER 2
4310 +#elif defined(CONFIGURE_DETECTS_BYTE_ORDER)
4311 +# if WORDS_BIGENDIAN
4312 +# define INTERNAL_BYTE_ORDER 2
4313 +# else
4314 +# define INTERNAL_BYTE_ORDER 1
4315 +# endif
4316 +#elif defined(__linux__) && defined(__KERNEL__)
4317 +# include <asm/byteorder.h>
4318 +# if defined(__BIG_ENDIAN)
4319 +# define INTERNAL_BYTE_ORDER 2
4320 +# else
4321 +# define INTERNAL_BYTE_ORDER 1
4322 +# endif
4323 +#else
4324 +# include <sys/param.h>
4325 +# if (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN))
4326 +# define INTERNAL_BYTE_ORDER 1
4327 +# elif WORDS_BIGENDIAN || defined(__BIG_ENDIAN__) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN))
4328 +# define INTERNAL_BYTE_ORDER 2
4329 +# else
4330 +# define INTERNAL_BYTE_ORDER 0
4331 +# endif
4332 +#endif
4333 +
4334 +#if defined(DATA_ALWAYS_ALIGNED) && (INTERNAL_BYTE_ORDER > 0)
4335 +# define word_in(x) *(u_int32_t*)(x)
4336 +# define word_out(x,v) *(u_int32_t*)(x) = (v)
4337 +#elif defined(__linux__) && defined(__KERNEL__)
4338 +# include <asm/unaligned.h>
4339 +# define word_in(x) get_unaligned((u_int32_t*)(x))
4340 +# define word_out(x,v) put_unaligned((v),(u_int32_t*)(x))
4341 +#else
4342 +/* unknown endianness and/or unable to handle unaligned data */
4343 +# undef INTERNAL_BYTE_ORDER
4344 +# define INTERNAL_BYTE_ORDER 1
4345 +# define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
4346 +# define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
4347 +#endif
4348 +
4349 +// upr(x,n): rotates bytes within words by n positions, moving bytes
4350 +// to higher index positions with wrap around into low positions
4351 +// ups(x,n): moves bytes by n positions to higher index positions in
4352 +// words but without wrap around
4353 +// bval(x,n): extracts a byte from a word
4354 +
4355 +#if (INTERNAL_BYTE_ORDER < 2)
4356 +/* little endian */
4357 +#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
4358 +#define ups(x,n) ((x) << 8 * (n))
4359 +#define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
4360 +#define bytes2word(b0, b1, b2, b3) \
4361 + ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
4362 +#else
4363 +/* big endian */
4364 +#define upr(x,n) (((x) >> 8 * (n)) | ((x) << (32 - 8 * (n))))
4365 +#define ups(x,n) ((x) >> 8 * (n)))
4366 +#define bval(x,n) ((unsigned char)((x) >> (24 - 8 * (n))))
4367 +#define bytes2word(b0, b1, b2, b3) \
4368 + ((u_int32_t)(b0) << 24 | (u_int32_t)(b1) << 16 | (u_int32_t)(b2) << 8 | (b3))
4369 +#endif
4370 +
4371 +// Disable at least some poor combinations of options
4372 +
4373 +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
4374 +#define FIXED_TABLES
4375 +#undef UNROLL
4376 +#undef ONE_LR_TABLE
4377 +#undef FOUR_LR_TABLES
4378 +#undef ONE_IM_TABLE
4379 +#undef FOUR_IM_TABLES
4380 +#elif !defined(FOUR_TABLES)
4381 +#ifdef FOUR_LR_TABLES
4382 +#undef FOUR_LR_TABLES
4383 +#define ONE_LR_TABLE
4384 +#endif
4385 +#ifdef FOUR_IM_TABLES
4386 +#undef FOUR_IM_TABLES
4387 +#define ONE_IM_TABLE
4388 +#endif
4389 +#elif !defined(AES_BLOCK_SIZE)
4390 +#if defined(UNROLL)
4391 +#define PARTIAL_UNROLL
4392 +#undef UNROLL
4393 +#endif
4394 +#endif
4395 +
4396 +// the finite field modular polynomial and elements
4397 +
4398 +#define ff_poly 0x011b
4399 +#define ff_hi 0x80
4400 +
4401 +// multiply four bytes in GF(2^8) by 'x' {02} in parallel
4402 +
4403 +#define m1 0x80808080
4404 +#define m2 0x7f7f7f7f
4405 +#define m3 0x0000001b
4406 +#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
4407 +
4408 +// The following defines provide alternative definitions of FFmulX that might
4409 +// give improved performance if a fast 32-bit multiply is not available. Note
4410 +// that a temporary variable u needs to be defined where FFmulX is used.
4411 +
4412 +// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
4413 +// #define m4 0x1b1b1b1b
4414 +// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
4415 +
4416 +// perform column mix operation on four bytes in parallel
4417 +
4418 +#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
4419 +
4420 +#if defined(FIXED_TABLES)
4421 +
4422 +// the S-Box table
4423 +
4424 +static const unsigned char s_box[256] =
4425 +{
4426 + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
4427 + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
4428 + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
4429 + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
4430 + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
4431 + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
4432 + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
4433 + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
4434 + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
4435 + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
4436 + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
4437 + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
4438 + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
4439 + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
4440 + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
4441 + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
4442 + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
4443 + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
4444 + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
4445 + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
4446 + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
4447 + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
4448 + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
4449 + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
4450 + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
4451 + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
4452 + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
4453 + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
4454 + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
4455 + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
4456 + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
4457 + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
4458 +};
4459 +
4460 +// the inverse S-Box table
4461 +
4462 +static const unsigned char inv_s_box[256] =
4463 +{
4464 + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
4465 + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
4466 + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
4467 + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
4468 + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
4469 + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
4470 + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
4471 + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
4472 + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
4473 + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
4474 + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
4475 + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
4476 + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
4477 + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
4478 + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
4479 + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
4480 + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
4481 + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
4482 + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
4483 + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
4484 + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
4485 + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
4486 + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
4487 + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
4488 + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
4489 + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
4490 + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
4491 + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
4492 + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
4493 + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
4494 + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
4495 + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
4496 +};
4497 +
4498 +// used to ensure table is generated in the right format
4499 +// depending on the internal byte order required
4500 +
4501 +#if (INTERNAL_BYTE_ORDER < 2)
4502 +/* little endian */
4503 +#define w0(p) 0x000000##p
4504 +#else
4505 +/* big endian */
4506 +#define w0(p) 0x##p##000000
4507 +#endif
4508 +
4509 +// Number of elements required in this table for different
4510 +// block and key lengths is:
4511 +//
4512 +// Nk = 4 6 8
4513 +// ----------
4514 +// Nb = 4 | 10 8 7
4515 +// 6 | 19 12 11
4516 +// 8 | 29 19 14
4517 +//
4518 +// this table can be a table of bytes if the key schedule
4519 +// code is adjusted accordingly
4520 +
4521 +static const u_int32_t rcon_tab[29] =
4522 +{
4523 + w0(01), w0(02), w0(04), w0(08),
4524 + w0(10), w0(20), w0(40), w0(80),
4525 + w0(1b), w0(36), w0(6c), w0(d8),
4526 + w0(ab), w0(4d), w0(9a), w0(2f),
4527 + w0(5e), w0(bc), w0(63), w0(c6),
4528 + w0(97), w0(35), w0(6a), w0(d4),
4529 + w0(b3), w0(7d), w0(fa), w0(ef),
4530 + w0(c5)
4531 +};
4532 +
4533 +#undef w0
4534 +
4535 +// used to ensure table is generated in the right format
4536 +// depending on the internal byte order required
4537 +
4538 +#if (INTERNAL_BYTE_ORDER < 2)
4539 +/* little endian */
4540 +#define r0(p,q,r,s) 0x##p##q##r##s
4541 +#define r1(p,q,r,s) 0x##q##r##s##p
4542 +#define r2(p,q,r,s) 0x##r##s##p##q
4543 +#define r3(p,q,r,s) 0x##s##p##q##r
4544 +#define w0(p) 0x000000##p
4545 +#define w1(p) 0x0000##p##00
4546 +#define w2(p) 0x00##p##0000
4547 +#define w3(p) 0x##p##000000
4548 +#else
4549 +/* big endian */
4550 +#define r0(p,q,r,s) 0x##s##r##q##p
4551 +#define r1(p,q,r,s) 0x##p##s##r##q
4552 +#define r2(p,q,r,s) 0x##q##p##s##r
4553 +#define r3(p,q,r,s) 0x##r##q##p##s
4554 +#define w0(p) 0x##p##000000
4555 +#define w1(p) 0x00##p##0000
4556 +#define w2(p) 0x0000##p##00
4557 +#define w3(p) 0x000000##p
4558 +#endif
4559 +
4560 +#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
4561 +
4562 +// data for forward tables (other than last round)
4563 +
4564 +#define f_table \
4565 + r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
4566 + r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
4567 + r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
4568 + r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
4569 + r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
4570 + r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
4571 + r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
4572 + r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
4573 + r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
4574 + r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
4575 + r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
4576 + r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
4577 + r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
4578 + r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
4579 + r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
4580 + r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
4581 + r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
4582 + r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
4583 + r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
4584 + r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
4585 + r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
4586 + r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
4587 + r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
4588 + r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
4589 + r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
4590 + r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
4591 + r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
4592 + r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
4593 + r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
4594 + r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
4595 + r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
4596 + r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
4597 + r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
4598 + r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
4599 + r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
4600 + r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
4601 + r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
4602 + r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
4603 + r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
4604 + r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
4605 + r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
4606 + r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
4607 + r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
4608 + r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
4609 + r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
4610 + r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
4611 + r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
4612 + r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
4613 + r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
4614 + r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
4615 + r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
4616 + r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
4617 + r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
4618 + r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
4619 + r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
4620 + r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
4621 + r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
4622 + r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
4623 + r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
4624 + r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
4625 + r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
4626 + r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
4627 + r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
4628 + r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
4629 +
4630 +// data for inverse tables (other than last round)
4631 +
4632 +#define i_table \
4633 + r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
4634 + r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
4635 + r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
4636 + r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
4637 + r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
4638 + r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
4639 + r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
4640 + r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
4641 + r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
4642 + r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
4643 + r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
4644 + r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
4645 + r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
4646 + r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
4647 + r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
4648 + r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
4649 + r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
4650 + r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
4651 + r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
4652 + r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
4653 + r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
4654 + r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
4655 + r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
4656 + r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
4657 + r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
4658 + r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
4659 + r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
4660 + r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
4661 + r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
4662 + r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
4663 + r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
4664 + r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
4665 + r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
4666 + r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
4667 + r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
4668 + r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
4669 + r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
4670 + r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
4671 + r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
4672 + r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
4673 + r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
4674 + r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
4675 + r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
4676 + r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
4677 + r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
4678 + r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
4679 + r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
4680 + r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
4681 + r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
4682 + r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
4683 + r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
4684 + r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
4685 + r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
4686 + r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
4687 + r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
4688 + r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
4689 + r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
4690 + r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
4691 + r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
4692 + r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
4693 + r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
4694 + r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
4695 + r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
4696 + r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
4697 +
4698 +// generate the required tables in the desired endian format
4699 +
4700 +#undef r
4701 +#define r r0
4702 +
4703 +#if defined(ONE_TABLE)
4704 +static const u_int32_t ft_tab[256] =
4705 + { f_table };
4706 +#elif defined(FOUR_TABLES)
4707 +static const u_int32_t ft_tab[4][256] =
4708 +{ { f_table },
4709 +#undef r
4710 +#define r r1
4711 + { f_table },
4712 +#undef r
4713 +#define r r2
4714 + { f_table },
4715 +#undef r
4716 +#define r r3
4717 + { f_table }
4718 +};
4719 +#endif
4720 +
4721 +#undef r
4722 +#define r r0
4723 +#if defined(ONE_TABLE)
4724 +static const u_int32_t it_tab[256] =
4725 + { i_table };
4726 +#elif defined(FOUR_TABLES)
4727 +static const u_int32_t it_tab[4][256] =
4728 +{ { i_table },
4729 +#undef r
4730 +#define r r1
4731 + { i_table },
4732 +#undef r
4733 +#define r r2
4734 + { i_table },
4735 +#undef r
4736 +#define r r3
4737 + { i_table }
4738 +};
4739 +#endif
4740 +
4741 +#endif
4742 +
4743 +#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
4744 +
4745 +// data for inverse tables (last round)
4746 +
4747 +#define li_table \
4748 + w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
4749 + w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
4750 + w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
4751 + w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
4752 + w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
4753 + w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
4754 + w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
4755 + w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
4756 + w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
4757 + w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
4758 + w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
4759 + w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
4760 + w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
4761 + w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
4762 + w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
4763 + w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
4764 + w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
4765 + w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
4766 + w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
4767 + w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
4768 + w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
4769 + w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
4770 + w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
4771 + w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
4772 + w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
4773 + w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
4774 + w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
4775 + w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
4776 + w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
4777 + w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
4778 + w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
4779 + w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
4780 +
4781 +// generate the required tables in the desired endian format
4782 +
4783 +#undef r
4784 +#define r(p,q,r,s) w0(q)
4785 +#if defined(ONE_LR_TABLE)
4786 +static const u_int32_t fl_tab[256] =
4787 + { f_table };
4788 +#elif defined(FOUR_LR_TABLES)
4789 +static const u_int32_t fl_tab[4][256] =
4790 +{ { f_table },
4791 +#undef r
4792 +#define r(p,q,r,s) w1(q)
4793 + { f_table },
4794 +#undef r
4795 +#define r(p,q,r,s) w2(q)
4796 + { f_table },
4797 +#undef r
4798 +#define r(p,q,r,s) w3(q)
4799 + { f_table }
4800 +};
4801 +#endif
4802 +
4803 +#undef w
4804 +#define w w0
4805 +#if defined(ONE_LR_TABLE)
4806 +static const u_int32_t il_tab[256] =
4807 + { li_table };
4808 +#elif defined(FOUR_LR_TABLES)
4809 +static const u_int32_t il_tab[4][256] =
4810 +{ { li_table },
4811 +#undef w
4812 +#define w w1
4813 + { li_table },
4814 +#undef w
4815 +#define w w2
4816 + { li_table },
4817 +#undef w
4818 +#define w w3
4819 + { li_table }
4820 +};
4821 +#endif
4822 +
4823 +#endif
4824 +
4825 +#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
4826 +
4827 +#define m_table \
4828 + r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
4829 + r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
4830 + r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
4831 + r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
4832 + r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
4833 + r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
4834 + r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
4835 + r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
4836 + r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
4837 + r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
4838 + r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
4839 + r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
4840 + r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
4841 + r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
4842 + r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
4843 + r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
4844 + r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
4845 + r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
4846 + r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
4847 + r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
4848 + r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
4849 + r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
4850 + r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
4851 + r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
4852 + r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
4853 + r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
4854 + r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
4855 + r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
4856 + r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
4857 + r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
4858 + r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
4859 + r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
4860 + r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
4861 + r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
4862 + r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
4863 + r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
4864 + r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
4865 + r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
4866 + r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
4867 + r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
4868 + r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
4869 + r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
4870 + r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
4871 + r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
4872 + r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
4873 + r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
4874 + r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
4875 + r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
4876 + r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
4877 + r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
4878 + r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
4879 + r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
4880 + r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
4881 + r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
4882 + r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
4883 + r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
4884 + r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
4885 + r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
4886 + r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
4887 + r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
4888 + r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
4889 + r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
4890 + r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
4891 + r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
4892 +
4893 +#undef r
4894 +#define r r0
4895 +
4896 +#if defined(ONE_IM_TABLE)
4897 +static const u_int32_t im_tab[256] =
4898 + { m_table };
4899 +#elif defined(FOUR_IM_TABLES)
4900 +static const u_int32_t im_tab[4][256] =
4901 +{ { m_table },
4902 +#undef r
4903 +#define r r1
4904 + { m_table },
4905 +#undef r
4906 +#define r r2
4907 + { m_table },
4908 +#undef r
4909 +#define r r3
4910 + { m_table }
4911 +};
4912 +#endif
4913 +
4914 +#endif
4915 +
4916 +#else
4917 +
4918 +static int tab_gen = 0;
4919 +
4920 +static unsigned char s_box[256]; // the S box
4921 +static unsigned char inv_s_box[256]; // the inverse S box
4922 +static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
4923 +
4924 +#if defined(ONE_TABLE)
4925 +static u_int32_t ft_tab[256];
4926 +static u_int32_t it_tab[256];
4927 +#elif defined(FOUR_TABLES)
4928 +static u_int32_t ft_tab[4][256];
4929 +static u_int32_t it_tab[4][256];
4930 +#endif
4931 +
4932 +#if defined(ONE_LR_TABLE)
4933 +static u_int32_t fl_tab[256];
4934 +static u_int32_t il_tab[256];
4935 +#elif defined(FOUR_LR_TABLES)
4936 +static u_int32_t fl_tab[4][256];
4937 +static u_int32_t il_tab[4][256];
4938 +#endif
4939 +
4940 +#if defined(ONE_IM_TABLE)
4941 +static u_int32_t im_tab[256];
4942 +#elif defined(FOUR_IM_TABLES)
4943 +static u_int32_t im_tab[4][256];
4944 +#endif
4945 +
4946 +// Generate the tables for the dynamic table option
4947 +
4948 +#if !defined(FF_TABLES)
4949 +
4950 +// It will generally be sensible to use tables to compute finite
4951 +// field multiplies and inverses but where memory is scarse this
4952 +// code might sometimes be better.
4953 +
4954 +// return 2 ^ (n - 1) where n is the bit number of the highest bit
4955 +// set in x with x in the range 1 < x < 0x00000200. This form is
4956 +// used so that locals within FFinv can be bytes rather than words
4957 +
4958 +static unsigned char hibit(const u_int32_t x)
4959 +{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
4960 +
4961 + r |= (r >> 2);
4962 + r |= (r >> 4);
4963 + return (r + 1) >> 1;
4964 +}
4965 +
4966 +// return the inverse of the finite field element x
4967 +
4968 +static unsigned char FFinv(const unsigned char x)
4969 +{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
4970 +
4971 + if(x < 2) return x;
4972 +
4973 + for(;;)
4974 + {
4975 + if(!n1) return v1;
4976 +
4977 + while(n2 >= n1)
4978 + {
4979 + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
4980 + }
4981 +
4982 + if(!n2) return v2;
4983 +
4984 + while(n1 >= n2)
4985 + {
4986 + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
4987 + }
4988 + }
4989 +}
4990 +
4991 +// define the finite field multiplies required for Rijndael
4992 +
4993 +#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
4994 +#define FFmul03(x) ((x) ^ FFmul02(x))
4995 +#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
4996 +#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
4997 +#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
4998 +#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
4999 +
5000 +#else
5001 +
5002 +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
5003 +
5004 +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
5005 +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
5006 +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
5007 +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
5008 +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
5009 +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
5010 +
5011 +#endif
5012 +
5013 +// The forward and inverse affine transformations used in the S-box
5014 +
5015 +#define fwd_affine(x) \
5016 + (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
5017 +
5018 +#define inv_affine(x) \
5019 + (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
5020 +
5021 +static void gen_tabs(void)
5022 +{ u_int32_t i, w;
5023 +
5024 +#if defined(FF_TABLES)
5025 +
5026 + unsigned char pow[512], log[256];
5027 +
5028 + // log and power tables for GF(2^8) finite field with
5029 + // 0x011b as modular polynomial - the simplest primitive
5030 + // root is 0x03, used here to generate the tables
5031 +
5032 + i = 0; w = 1;
5033 + do
5034 + {
5035 + pow[i] = (unsigned char)w;
5036 + pow[i + 255] = (unsigned char)w;
5037 + log[w] = (unsigned char)i++;
5038 + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
5039 + }
5040 + while (w != 1);
5041 +
5042 +#endif
5043 +
5044 + for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
5045 + {
5046 + rcon_tab[i] = bytes2word(w, 0, 0, 0);
5047 + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
5048 + }
5049 +
5050 + for(i = 0; i < 256; ++i)
5051 + { unsigned char b;
5052 +
5053 + s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
5054 +
5055 + w = bytes2word(b, 0, 0, 0);
5056 +#if defined(ONE_LR_TABLE)
5057 + fl_tab[i] = w;
5058 +#elif defined(FOUR_LR_TABLES)
5059 + fl_tab[0][i] = w;
5060 + fl_tab[1][i] = upr(w,1);
5061 + fl_tab[2][i] = upr(w,2);
5062 + fl_tab[3][i] = upr(w,3);
5063 +#endif
5064 + w = bytes2word(FFmul02(b), b, b, FFmul03(b));
5065 +#if defined(ONE_TABLE)
5066 + ft_tab[i] = w;
5067 +#elif defined(FOUR_TABLES)
5068 + ft_tab[0][i] = w;
5069 + ft_tab[1][i] = upr(w,1);
5070 + ft_tab[2][i] = upr(w,2);
5071 + ft_tab[3][i] = upr(w,3);
5072 +#endif
5073 + inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
5074 +
5075 + w = bytes2word(b, 0, 0, 0);
5076 +#if defined(ONE_LR_TABLE)
5077 + il_tab[i] = w;
5078 +#elif defined(FOUR_LR_TABLES)
5079 + il_tab[0][i] = w;
5080 + il_tab[1][i] = upr(w,1);
5081 + il_tab[2][i] = upr(w,2);
5082 + il_tab[3][i] = upr(w,3);
5083 +#endif
5084 + w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
5085 +#if defined(ONE_TABLE)
5086 + it_tab[i] = w;
5087 +#elif defined(FOUR_TABLES)
5088 + it_tab[0][i] = w;
5089 + it_tab[1][i] = upr(w,1);
5090 + it_tab[2][i] = upr(w,2);
5091 + it_tab[3][i] = upr(w,3);
5092 +#endif
5093 +#if defined(ONE_IM_TABLE)
5094 + im_tab[b] = w;
5095 +#elif defined(FOUR_IM_TABLES)
5096 + im_tab[0][b] = w;
5097 + im_tab[1][b] = upr(w,1);
5098 + im_tab[2][b] = upr(w,2);
5099 + im_tab[3][b] = upr(w,3);
5100 +#endif
5101 +
5102 + }
5103 +}
5104 +
5105 +#endif
5106 +
5107 +#define no_table(x,box,vf,rf,c) bytes2word( \
5108 + box[bval(vf(x,0,c),rf(0,c))], \
5109 + box[bval(vf(x,1,c),rf(1,c))], \
5110 + box[bval(vf(x,2,c),rf(2,c))], \
5111 + box[bval(vf(x,3,c),rf(3,c))])
5112 +
5113 +#define one_table(x,op,tab,vf,rf,c) \
5114 + ( tab[bval(vf(x,0,c),rf(0,c))] \
5115 + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
5116 + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
5117 + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
5118 +
5119 +#define four_tables(x,tab,vf,rf,c) \
5120 + ( tab[0][bval(vf(x,0,c),rf(0,c))] \
5121 + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
5122 + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
5123 + ^ tab[3][bval(vf(x,3,c),rf(3,c))])
5124 +
5125 +#define vf1(x,r,c) (x)
5126 +#define rf1(r,c) (r)
5127 +#define rf2(r,c) ((r-c)&3)
5128 +
5129 +#if defined(FOUR_LR_TABLES)
5130 +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
5131 +#elif defined(ONE_LR_TABLE)
5132 +#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
5133 +#else
5134 +#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
5135 +#endif
5136 +
5137 +#if defined(FOUR_IM_TABLES)
5138 +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
5139 +#elif defined(ONE_IM_TABLE)
5140 +#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
5141 +#else
5142 +#define inv_mcol(x) \
5143 + (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
5144 + f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
5145 +#endif
5146 +
5147 +// Subroutine to set the block size (if variable) in bytes, legal
5148 +// values being 16, 24 and 32.
5149 +
5150 +#if defined(AES_BLOCK_SIZE)
5151 +#define nc (AES_BLOCK_SIZE / 4)
5152 +#else
5153 +#define nc (cx->aes_Ncol)
5154 +
5155 +void aes_set_blk(aes_context *cx, int n_bytes)
5156 +{
5157 +#if !defined(FIXED_TABLES)
5158 + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
5159 +#endif
5160 +
5161 + switch(n_bytes) {
5162 + case 32: /* bytes */
5163 + case 256: /* bits */
5164 + nc = 8;
5165 + break;
5166 + case 24: /* bytes */
5167 + case 192: /* bits */
5168 + nc = 6;
5169 + break;
5170 + case 16: /* bytes */
5171 + case 128: /* bits */
5172 + default:
5173 + nc = 4;
5174 + break;
5175 + }
5176 +}
5177 +
5178 +#endif
5179 +
5180 +// Initialise the key schedule from the user supplied key. The key
5181 +// length is now specified in bytes - 16, 24 or 32 as appropriate.
5182 +// This corresponds to bit lengths of 128, 192 and 256 bits, and
5183 +// to Nk values of 4, 6 and 8 respectively.
5184 +
5185 +#define mx(t,f) (*t++ = inv_mcol(*f),f++)
5186 +#define cp(t,f) *t++ = *f++
5187 +
5188 +#if AES_BLOCK_SIZE == 16
5189 +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
5190 +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
5191 +#elif AES_BLOCK_SIZE == 24
5192 +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
5193 + cp(d,s); cp(d,s)
5194 +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
5195 + mx(d,s); mx(d,s)
5196 +#elif AES_BLOCK_SIZE == 32
5197 +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
5198 + cp(d,s); cp(d,s); cp(d,s); cp(d,s)
5199 +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
5200 + mx(d,s); mx(d,s); mx(d,s); mx(d,s)
5201 +#else
5202 +
5203 +#define cpy(d,s) \
5204 +switch(nc) \
5205 +{ case 8: cp(d,s); cp(d,s); \
5206 + case 6: cp(d,s); cp(d,s); \
5207 + case 4: cp(d,s); cp(d,s); \
5208 + cp(d,s); cp(d,s); \
5209 +}
5210 +
5211 +#define mix(d,s) \
5212 +switch(nc) \
5213 +{ case 8: mx(d,s); mx(d,s); \
5214 + case 6: mx(d,s); mx(d,s); \
5215 + case 4: mx(d,s); mx(d,s); \
5216 + mx(d,s); mx(d,s); \
5217 +}
5218 +
5219 +#endif
5220 +
5221 +void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
5222 +{ u_int32_t *kf, *kt, rci;
5223 +
5224 +#if !defined(FIXED_TABLES)
5225 + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
5226 +#endif
5227 +
5228 + switch(n_bytes) {
5229 + case 32: /* bytes */
5230 + case 256: /* bits */
5231 + cx->aes_Nkey = 8;
5232 + break;
5233 + case 24: /* bytes */
5234 + case 192: /* bits */
5235 + cx->aes_Nkey = 6;
5236 + break;
5237 + case 16: /* bytes */
5238 + case 128: /* bits */
5239 + default:
5240 + cx->aes_Nkey = 4;
5241 + break;
5242 + }
5243 +
5244 + cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
5245 +
5246 + cx->aes_e_key[0] = word_in(in_key );
5247 + cx->aes_e_key[1] = word_in(in_key + 4);
5248 + cx->aes_e_key[2] = word_in(in_key + 8);
5249 + cx->aes_e_key[3] = word_in(in_key + 12);
5250 +
5251 + kf = cx->aes_e_key;
5252 + kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey;
5253 + rci = 0;
5254 +
5255 + switch(cx->aes_Nkey)
5256 + {
5257 + case 4: do
5258 + { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
5259 + kf[5] = kf[1] ^ kf[4];
5260 + kf[6] = kf[2] ^ kf[5];
5261 + kf[7] = kf[3] ^ kf[6];
5262 + kf += 4;
5263 + }
5264 + while(kf < kt);
5265 + break;
5266 +
5267 + case 6: cx->aes_e_key[4] = word_in(in_key + 16);
5268 + cx->aes_e_key[5] = word_in(in_key + 20);
5269 + do
5270 + { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
5271 + kf[ 7] = kf[1] ^ kf[ 6];
5272 + kf[ 8] = kf[2] ^ kf[ 7];
5273 + kf[ 9] = kf[3] ^ kf[ 8];
5274 + kf[10] = kf[4] ^ kf[ 9];
5275 + kf[11] = kf[5] ^ kf[10];
5276 + kf += 6;
5277 + }
5278 + while(kf < kt);
5279 + break;
5280 +
5281 + case 8: cx->aes_e_key[4] = word_in(in_key + 16);
5282 + cx->aes_e_key[5] = word_in(in_key + 20);
5283 + cx->aes_e_key[6] = word_in(in_key + 24);
5284 + cx->aes_e_key[7] = word_in(in_key + 28);
5285 + do
5286 + { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
5287 + kf[ 9] = kf[1] ^ kf[ 8];
5288 + kf[10] = kf[2] ^ kf[ 9];
5289 + kf[11] = kf[3] ^ kf[10];
5290 + kf[12] = kf[4] ^ ls_box(kf[11],0);
5291 + kf[13] = kf[5] ^ kf[12];
5292 + kf[14] = kf[6] ^ kf[13];
5293 + kf[15] = kf[7] ^ kf[14];
5294 + kf += 8;
5295 + }
5296 + while (kf < kt);
5297 + break;
5298 + }
5299 +
5300 + if(!f)
5301 + { u_int32_t i;
5302 +
5303 + kt = cx->aes_d_key + nc * cx->aes_Nrnd;
5304 + kf = cx->aes_e_key;
5305 +
5306 + cpy(kt, kf); kt -= 2 * nc;
5307 +
5308 + for(i = 1; i < cx->aes_Nrnd; ++i)
5309 + {
5310 +#if defined(ONE_TABLE) || defined(FOUR_TABLES)
5311 +#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
5312 + u_int32_t f2, f4, f8, f9;
5313 +#endif
5314 + mix(kt, kf);
5315 +#else
5316 + cpy(kt, kf);
5317 +#endif
5318 + kt -= 2 * nc;
5319 + }
5320 +
5321 + cpy(kt, kf);
5322 + }
5323 +}
5324 +
5325 +// y = output word, x = input word, r = row, c = column
5326 +// for r = 0, 1, 2 and 3 = column accessed for row r
5327 +
5328 +#if defined(ARRAYS)
5329 +#define s(x,c) x[c]
5330 +#else
5331 +#define s(x,c) x##c
5332 +#endif
5333 +
5334 +// I am grateful to Frank Yellin for the following constructions
5335 +// which, given the column (c) of the output state variable that
5336 +// is being computed, return the input state variables which are
5337 +// needed for each row (r) of the state
5338 +
5339 +// For the fixed block size options, compilers reduce these two
5340 +// expressions to fixed variable references. For variable block
5341 +// size code conditional clauses will sometimes be returned
5342 +
5343 +#define unused 77 // Sunset Strip
5344 +
5345 +#define fwd_var(x,r,c) \
5346 + ( r==0 ? \
5347 + ( c==0 ? s(x,0) \
5348 + : c==1 ? s(x,1) \
5349 + : c==2 ? s(x,2) \
5350 + : c==3 ? s(x,3) \
5351 + : c==4 ? s(x,4) \
5352 + : c==5 ? s(x,5) \
5353 + : c==6 ? s(x,6) \
5354 + : s(x,7)) \
5355 + : r==1 ? \
5356 + ( c==0 ? s(x,1) \
5357 + : c==1 ? s(x,2) \
5358 + : c==2 ? s(x,3) \
5359 + : c==3 ? nc==4 ? s(x,0) : s(x,4) \
5360 + : c==4 ? s(x,5) \
5361 + : c==5 ? nc==8 ? s(x,6) : s(x,0) \
5362 + : c==6 ? s(x,7) \
5363 + : s(x,0)) \
5364 + : r==2 ? \
5365 + ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
5366 + : c==1 ? nc==8 ? s(x,4) : s(x,3) \
5367 + : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
5368 + : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
5369 + : c==4 ? nc==8 ? s(x,7) : s(x,0) \
5370 + : c==5 ? nc==8 ? s(x,0) : s(x,1) \
5371 + : c==6 ? s(x,1) \
5372 + : s(x,2)) \
5373 + : \
5374 + ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
5375 + : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
5376 + : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
5377 + : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
5378 + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
5379 + :