libparted: add support for Linux software RAID arrays (mdraid)
[parted/parted.git] / libparted / arch / linux.c
1 /* libparted - a library for manipulating disk partitions
2     Copyright (C) 1999-2010 Free Software Foundation, Inc.
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 3 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #define PROC_DEVICES_BUFSIZ 16384
19
20 #include <config.h>
21 #include <arch/linux.h>
22
23 #include <parted/parted.h>
24 #include <parted/debug.h>
25 #if defined __s390__ || defined __s390x__
26 #include <parted/fdasd.h>
27 #endif
28
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <libgen.h>
33 #include <stdio.h>
34 #include <syscall.h>
35 #include <unistd.h>
36 #include <stdbool.h>
37 #include <dirent.h>
38 #include <sys/ioctl.h>
39 #include <sys/stat.h>
40 #include <sys/types.h>
41 #include <sys/utsname.h>        /* for uname() */
42 #include <scsi/scsi.h>
43 #ifdef ENABLE_DEVICE_MAPPER
44 #include <libdevmapper.h>
45 #endif
46
47 #include "../architecture.h"
48 #include "dirname.h"
49
50 #if ENABLE_NLS
51 #  include <libintl.h>
52 #  define _(String) dgettext (PACKAGE, String)
53 #else
54 #  define _(String) (String)
55 #endif /* ENABLE_NLS */
56
57 #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
58
59 #ifndef __NR__llseek
60 #define __NR__llseek 140
61 #endif
62
63 #ifndef SCSI_IOCTL_SEND_COMMAND
64 #define SCSI_IOCTL_SEND_COMMAND 1
65 #endif
66
67 /* from <linux/hdreg.h> */
68 #define HDIO_GETGEO             0x0301  /* get device geometry */
69 #define HDIO_GET_IDENTITY       0x030d  /* get IDE identification info */
70
71 #define RD_MODE (O_RDONLY)
72 #define WR_MODE (O_WRONLY)
73 #define RW_MODE (O_RDWR)
74
75 struct hd_geometry {
76         unsigned char heads;
77         unsigned char sectors;
78         unsigned short cylinders;
79         unsigned long start;
80 };
81
82 struct ata7_sectinfo {
83         int valid1:1;
84         int valid2:1;
85         int rsv:26;
86         int multiplier:4;
87 };
88
89 /* structure returned by HDIO_GET_IDENTITY, as per ANSI ATA2 rev.2f spec */
90 struct hd_driveid {
91         unsigned short  config;         /* lots of obsolete bit flags */
92         unsigned short  cyls;           /* "physical" cyls */
93         unsigned short  reserved2;      /* reserved (word 2) */
94         unsigned short  heads;          /* "physical" heads */
95         unsigned short  track_bytes;    /* unformatted bytes per track */
96         unsigned short  sector_bytes;   /* unformatted bytes per sector */
97         unsigned short  sectors;        /* "physical" sectors per track */
98         unsigned short  vendor0;        /* vendor unique */
99         unsigned short  vendor1;        /* vendor unique */
100         unsigned short  vendor2;        /* vendor unique */
101         unsigned char   serial_no[20];  /* 0 = not_specified */
102         unsigned short  buf_type;
103         unsigned short  buf_size;       /* 512 byte increments;
104                                                          0 = not_specified */
105         unsigned short  ecc_bytes;      /* for r/w long cmds;
106                                                          0 = not_specified */
107         unsigned char   fw_rev[8];      /* 0 = not_specified */
108         char            model[40];      /* 0 = not_specified */
109         unsigned char   max_multsect;   /* 0=not_implemented */
110         unsigned char   vendor3;        /* vendor unique */
111         unsigned short  dword_io;       /* 0=not_implemented; 1=implemented */
112         unsigned char   vendor4;        /* vendor unique */
113         unsigned char   capability;     /* bits 0:DMA 1:LBA 2:IORDYsw
114                                                 3:IORDYsup*/
115         unsigned short  reserved50;     /* reserved (word 50) */
116         unsigned char   vendor5;        /* vendor unique */
117         unsigned char   tPIO;           /* 0=slow, 1=medium, 2=fast */
118         unsigned char   vendor6;        /* vendor unique */
119         unsigned char   tDMA;           /* 0=slow, 1=medium, 2=fast */
120         unsigned short  field_valid;    /* bits 0:cur_ok 1:eide_ok */
121         unsigned short  cur_cyls;       /* logical cylinders */
122         unsigned short  cur_heads;      /* logical heads */
123         unsigned short  cur_sectors;    /* logical sectors per track */
124         unsigned short  cur_capacity0;  /* logical total sectors on drive */
125         unsigned short  cur_capacity1;  /*  (2 words, misaligned int)     */
126         unsigned char   multsect;       /* current multiple sector count */
127         unsigned char   multsect_valid; /* when (bit0==1) multsect is ok */
128         unsigned int    lba_capacity;   /* total number of sectors */
129         unsigned short  dma_1word;      /* single-word dma info */
130         unsigned short  dma_mword;      /* multiple-word dma info */
131         unsigned short  eide_pio_modes; /* bits 0:mode3 1:mode4 */
132         unsigned short  eide_dma_min;   /* min mword dma cycle time (ns) */
133         unsigned short  eide_dma_time;  /* recommended mword dma cycle
134                                            time (ns) */
135         unsigned short  eide_pio;       /* min cycle time (ns), no IORDY  */
136         unsigned short  eide_pio_iordy; /* min cycle time (ns), with IORDY */
137         unsigned short  words69_70[2];  /* reserved words 69-70 */
138         /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */
139         unsigned short  words71_74[4];  /* reserved words 71-74 */
140         unsigned short  queue_depth;    /*  */
141         unsigned short  words76_79[4];  /* reserved words 76-79 */
142         unsigned short  major_rev_num;  /*  */
143         unsigned short  minor_rev_num;  /*  */
144         unsigned short  command_set_1;  /* bits 0:Smart 1:Security 2:Removable
145                                                 3:PM */
146         unsigned short  command_set_2;  /* bits 14:Smart Enabled 13:0 zero */
147         unsigned short  cfsse;          /* command set-feature supported
148                                            extensions */
149         unsigned short  cfs_enable_1;   /* command set-feature enabled */
150         unsigned short  cfs_enable_2;   /* command set-feature enabled */
151         unsigned short  csf_default;    /* command set-feature default */
152         unsigned short  dma_ultra;      /*  */
153         unsigned short  word89;         /* reserved (word 89) */
154         unsigned short  word90;         /* reserved (word 90) */
155         unsigned short  CurAPMvalues;   /* current APM values */
156         unsigned short  word92;         /* reserved (word 92) */
157         unsigned short  hw_config;      /* hardware config */
158         unsigned short  words94_105[12];/* reserved words 94-105 */
159         struct ata7_sectinfo ata7_sectinfo; /* ATAPI/ATA7 physical and logical
160                                                sector size */
161         unsigned short  words107_116[10];/* reserved words 107-116 */
162         unsigned int    logical_sectsize;/* ATAPI/ATA7 logical sector size */
163         unsigned short  words119_125[7];/* reserved words 119-125 */
164         unsigned short  last_lun;       /* reserved (word 126) */
165         unsigned short  word127;        /* reserved (word 127) */
166         unsigned short  dlf;            /* device lock function
167                                          * 15:9 reserved
168                                          * 8    security level 1:max 0:high
169                                          * 7:6  reserved
170                                          * 5    enhanced erase
171                                          * 4    expire
172                                          * 3    frozen
173                                          * 2    locked
174                                          * 1    en/disabled
175                                          * 0    capability
176                                          */
177         unsigned short  csfo;           /* current set features options
178                                          * 15:4 reserved
179                                          * 3    auto reassign
180                                          * 2    reverting
181                                          * 1    read-look-ahead
182                                          * 0    write cache
183                                          */
184         unsigned short  words130_155[26];/* reserved vendor words 130-155 */
185         unsigned short  word156;
186         unsigned short  words157_159[3]; /* reserved vendor words 157-159 */
187         unsigned short  words160_255[95];/* reserved words 160-255 */
188 };
189
190 /* from <linux/fs.h> */
191 #define BLKRRPART  _IO(0x12,95) /* re-read partition table */
192 #define BLKGETSIZE _IO(0x12,96) /* return device size */
193 #define BLKFLSBUF  _IO(0x12,97) /* flush buffer cache */
194 #define BLKSSZGET  _IO(0x12,104) /* get block device sector size */
195 #define BLKGETLASTSECT  _IO(0x12,108) /* get last sector of block device */
196 #define BLKSETLASTSECT  _IO(0x12,109) /* set last sector of block device */
197
198 /* return device size in bytes (u64 *arg) */
199 #define BLKGETSIZE64 _IOR(0x12,114,size_t)
200
201 struct blkdev_ioctl_param {
202         unsigned int block;
203         size_t content_length;
204         char * block_contents;
205 };
206
207 /* from <linux/major.h> */
208 #define IDE0_MAJOR              3
209 #define IDE1_MAJOR              22
210 #define IDE2_MAJOR              33
211 #define IDE3_MAJOR              34
212 #define IDE4_MAJOR              56
213 #define IDE5_MAJOR              57
214 #define SCSI_CDROM_MAJOR        11
215 #define SCSI_DISK0_MAJOR        8
216 #define SCSI_DISK1_MAJOR        65
217 #define SCSI_DISK2_MAJOR        66
218 #define SCSI_DISK3_MAJOR        67
219 #define SCSI_DISK4_MAJOR        68
220 #define SCSI_DISK5_MAJOR        69
221 #define SCSI_DISK6_MAJOR        70
222 #define SCSI_DISK7_MAJOR        71
223 #define COMPAQ_SMART2_MAJOR     72
224 #define COMPAQ_SMART2_MAJOR1    73
225 #define COMPAQ_SMART2_MAJOR2    74
226 #define COMPAQ_SMART2_MAJOR3    75
227 #define COMPAQ_SMART2_MAJOR4    76
228 #define COMPAQ_SMART2_MAJOR5    77
229 #define COMPAQ_SMART2_MAJOR6    78
230 #define COMPAQ_SMART2_MAJOR7    79
231 #define COMPAQ_SMART_MAJOR      104
232 #define COMPAQ_SMART_MAJOR1     105
233 #define COMPAQ_SMART_MAJOR2     106
234 #define COMPAQ_SMART_MAJOR3     107
235 #define COMPAQ_SMART_MAJOR4     108
236 #define COMPAQ_SMART_MAJOR5     109
237 #define COMPAQ_SMART_MAJOR6     110
238 #define COMPAQ_SMART_MAJOR7     111
239 #define DAC960_MAJOR            48
240 #define ATARAID_MAJOR           114
241 #define I2O_MAJOR1              80
242 #define I2O_MAJOR2              81
243 #define I2O_MAJOR3              82
244 #define I2O_MAJOR4              83
245 #define I2O_MAJOR5              84
246 #define I2O_MAJOR6              85
247 #define I2O_MAJOR7              86
248 #define I2O_MAJOR8              87
249 #define UBD_MAJOR               98
250 #define DASD_MAJOR              94
251 #define VIODASD_MAJOR           112
252 #define AOE_MAJOR               152
253 #define SX8_MAJOR1              160
254 #define SX8_MAJOR2              161
255 #define XVD_MAJOR               202
256 #define SDMMC_MAJOR             179
257 #define LOOP_MAJOR              7
258 #define MD_MAJOR                9
259
260 #define SCSI_BLK_MAJOR(M) (                                             \
261                 (M) == SCSI_DISK0_MAJOR                                 \
262                 || (M) == SCSI_CDROM_MAJOR                              \
263                 || ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR))
264
265 /* Maximum number of partitions supported by linux. */
266 #define MAX_NUM_PARTS           64
267
268 static char* _device_get_part_path (PedDevice* dev, int num);
269 static int _partition_is_mounted_by_path (const char* path);
270
271 static int
272 _read_fd (int fd, char **buf)
273 {
274         char* p;
275         size_t size = PROC_DEVICES_BUFSIZ;
276         int s, filesize = 0;
277
278         *buf = malloc (size * sizeof (char));
279         if (*buf == 0) {
280                 return -1;
281         }
282
283         do {
284                 p = &(*buf) [filesize];
285                 s = read (fd, p, PROC_DEVICES_BUFSIZ);
286                 /* exit if there is an error or EOF is reached */
287                 if (s <= 0)
288                         break;
289                 filesize += s;
290                 size += s;
291                 char *new_buf = realloc (*buf, size);
292                 if (new_buf == NULL) {
293                         int saved_errno = errno;
294                         free (*buf);
295                         errno = saved_errno;
296                         return -1;
297                 }
298                 *buf = new_buf;
299         } while (1);
300
301         if (filesize == 0 && s < 0) {
302                 free (*buf);
303                 *buf = NULL;
304                 return -1;
305         } else {
306                 char *new_buf = realloc (*buf, filesize + 1);
307                 if (new_buf == NULL) {
308                         int saved_errno = errno;
309                         free (*buf);
310                         errno = saved_errno;
311                         return -1;
312                 }
313                 *buf = new_buf;
314                 (*buf)[filesize] = '\0';
315         }
316
317         return filesize;
318 }
319
320 static int
321 _major_type_in_devices (int major, const char* type)
322 {
323         int fd;
324         char* buf = NULL;
325         char* line;
326         char* end;
327         int bd = 0;
328         char c;
329
330         fd = open ("/proc/devices", O_RDONLY);
331         if (fd < 0)
332                 return 0;
333
334         if (_read_fd(fd, &buf) < 0) {
335                 close(fd);
336                 return 0;
337         }
338
339         line = buf;
340         end = strchr(line, '\n');
341         while (end) {
342                 char *name;
343                 int maj;
344
345                 c = *end;
346                 *end = '\0';
347
348                 if (!bd) {
349                         if (!strncmp(line, "Block devices:", 14))
350                                 bd = 1;
351                         goto next;
352                 }
353
354                 name = strrchr(line, ' ');
355                 if (!name || strcmp(name+1, type))
356                         goto next;
357
358                 maj = strtol(line, &name, 10);
359                 if (maj == major) {
360                         free(buf);
361                         close(fd);
362                         return 1;
363                 }
364
365 next:
366                 *end = c;
367                 line = end+1;
368                 end = strchr(line, '\n');
369         }
370         free(buf);
371         close(fd);
372         return 0;
373 }
374
375 static int
376 _is_ide_major (int major)
377 {
378         switch (major) {
379                 case IDE0_MAJOR:
380                 case IDE1_MAJOR:
381                 case IDE2_MAJOR:
382                 case IDE3_MAJOR:
383                 case IDE4_MAJOR:
384                 case IDE5_MAJOR:
385                         return 1;
386
387                 default:
388                         return 0;
389         }
390 }
391
392 static int
393 _is_cpqarray_major (int major)
394 {
395         return ((COMPAQ_SMART2_MAJOR <= major && major <= COMPAQ_SMART2_MAJOR7)
396              || (COMPAQ_SMART_MAJOR <= major && major <= COMPAQ_SMART_MAJOR7));
397 }
398
399 static int
400 _is_i2o_major (int major)
401 {
402         return (I2O_MAJOR1 <= major && major <= I2O_MAJOR8);
403 }
404
405 static int
406 _is_sx8_major (int major)
407 {
408         return (SX8_MAJOR1 <= major && major <= SX8_MAJOR2);
409 }
410
411 static int
412 _is_virtblk_major (int major)
413 {
414         return _major_type_in_devices (major, "virtblk");
415 }
416
417 #ifdef ENABLE_DEVICE_MAPPER
418 static int
419 _is_dm_major (int major)
420 {
421         return _major_type_in_devices (major, "device-mapper");
422 }
423
424 static int
425 _dm_maptype (PedDevice *dev)
426 {
427         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
428         struct dm_task *dmt;
429         uint64_t start, length;
430         char *target_type = NULL;
431         char *params;
432         int r = -1;
433         const char* dev_dir = getenv ("DM_DEV_DIR");
434
435         if (dev_dir && *dev_dir && !dm_set_dev_dir(dev_dir))
436                 return r;
437
438         if (!(dmt = dm_task_create(DM_DEVICE_TABLE)))
439                 return r;
440
441         if (!dm_task_set_major_minor(dmt, arch_specific->major,
442                                      arch_specific->minor, 0))
443                 goto bad;
444
445         dm_task_no_open_count(dmt);
446
447         if (!dm_task_run(dmt))
448                 goto bad;
449
450         dm_get_next_target(dmt, NULL, &start, &length, &target_type, &params);
451
452         arch_specific->dmtype = strdup(target_type ? target_type : "NO-TARGET");
453         if (arch_specific->dmtype == NULL)
454                 goto bad;
455         r = 0;
456 bad:
457         dm_task_destroy(dmt);
458         return r;
459 }
460
461
462 static int
463 _probe_dm_devices ()
464 {
465        DIR*            mapper_dir;
466        struct dirent*  dent;
467        char            buf [512];      /* readdir(3) claims d_name[256] */
468        struct stat     st;
469
470        mapper_dir = opendir ("/dev/mapper");
471        if (!mapper_dir)
472                return 0;
473
474        /* Search the /dev/mapper directory for devices w/ the same major
475         * number that was returned from _probe_lvm_major().
476         */
477        while ((dent = readdir (mapper_dir))) {
478                if (strcmp (dent->d_name, ".")  == 0 ||
479                    strcmp (dent->d_name, "..") == 0)
480                        continue;
481
482                snprintf (buf, sizeof (buf), "/dev/mapper/%s", dent->d_name);
483
484                if (stat (buf, &st) != 0)
485                        continue;
486
487                if (_is_dm_major(major(st.st_rdev)))
488                        _ped_device_probe (buf);
489        }
490        closedir (mapper_dir);
491
492        return 1;
493 }
494 #endif
495
496 static int
497 _device_stat (PedDevice* dev, struct stat * dev_stat)
498 {
499         PED_ASSERT (dev != NULL, return 0);
500         PED_ASSERT (!dev->external_mode, return 0);
501
502         while (1) {
503                 if (!stat (dev->path, dev_stat)) {
504                         return 1;
505                 } else {
506                         if (ped_exception_throw (
507                                 PED_EXCEPTION_ERROR,
508                                 PED_EXCEPTION_RETRY_CANCEL,
509                                 _("Could not stat device %s - %s."),
510                                 dev->path,
511                                 strerror (errno))
512                                         != PED_EXCEPTION_RETRY)
513                                 return 0;
514                 }
515         }
516 }
517
518 static int
519 _device_probe_type (PedDevice* dev)
520 {
521         struct stat             dev_stat;
522         int                     dev_major;
523         int                     dev_minor;
524         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
525
526         if (!_device_stat (dev, &dev_stat))
527                 return 0;
528
529         if (!S_ISBLK(dev_stat.st_mode)) {
530                 dev->type = PED_DEVICE_FILE;
531                 return 1;
532         }
533
534         arch_specific->major = dev_major = major (dev_stat.st_rdev);
535         arch_specific->minor = dev_minor = minor (dev_stat.st_rdev);
536
537         if (SCSI_BLK_MAJOR (dev_major) && (dev_minor % 0x10 == 0)) {
538                 dev->type = PED_DEVICE_SCSI;
539         } else if (_is_ide_major (dev_major) && (dev_minor % 0x40 == 0)) {
540                 dev->type = PED_DEVICE_IDE;
541         } else if (dev_major == DAC960_MAJOR && (dev_minor % 0x8 == 0)) {
542                 dev->type = PED_DEVICE_DAC960;
543         } else if (dev_major == ATARAID_MAJOR && (dev_minor % 0x10 == 0)) {
544                 dev->type = PED_DEVICE_ATARAID;
545         } else if (dev_major == AOE_MAJOR && (dev_minor % 0x10 == 0)) {
546                 dev->type = PED_DEVICE_AOE;
547         } else if (dev_major == DASD_MAJOR && (dev_minor % 0x4 == 0)) {
548                 dev->type = PED_DEVICE_DASD;
549         } else if (dev_major == VIODASD_MAJOR && (dev_minor % 0x8 == 0)) {
550                 dev->type = PED_DEVICE_VIODASD;
551         } else if (_is_sx8_major(dev_major) && (dev_minor % 0x20 == 0)) {
552                 dev->type = PED_DEVICE_SX8;
553         } else if (_is_i2o_major (dev_major) && (dev_minor % 0x10 == 0)) {
554                 dev->type = PED_DEVICE_I2O;
555         } else if (_is_cpqarray_major (dev_major) && (dev_minor % 0x10 == 0)) {
556                 dev->type = PED_DEVICE_CPQARRAY;
557         } else if (dev_major == UBD_MAJOR && (dev_minor % 0x10 == 0)) {
558                 dev->type = PED_DEVICE_UBD;
559 #ifdef ENABLE_DEVICE_MAPPER
560         } else if (_is_dm_major(dev_major)) {
561                 dev->type = PED_DEVICE_DM;
562                 if (_dm_maptype(dev)) {
563                         ped_exception_throw (
564                                 PED_EXCEPTION_BUG,
565                                 PED_EXCEPTION_CANCEL,
566                                 _("Unable to determine the dm type of %s."),
567                                 dev->path);
568                 }
569 #endif
570         } else if (dev_major == XVD_MAJOR && (dev_minor % 0x10 == 0)) {
571                 dev->type = PED_DEVICE_XVD;
572         } else if (dev_major == SDMMC_MAJOR && (dev_minor % 0x08 == 0)) {
573                 dev->type = PED_DEVICE_SDMMC;
574         } else if (_is_virtblk_major(dev_major)) {
575                 dev->type = PED_DEVICE_VIRTBLK;
576         } else if (dev_major == LOOP_MAJOR) {
577                 dev->type = PED_DEVICE_FILE;
578         } else if (dev_major == MD_MAJOR) {
579                 dev->type = PED_DEVICE_MD;
580         } else {
581                 dev->type = PED_DEVICE_UNKNOWN;
582         }
583
584         return 1;
585 }
586
587 static int
588 _get_linux_version ()
589 {
590         static int kver = -1;
591
592         struct utsname uts;
593         int major;
594         int minor;
595         int teeny;
596
597         if (kver != -1)
598                 return kver;
599
600         if (uname (&uts))
601                 return kver = 0;
602         if (sscanf (uts.release, "%u.%u.%u", &major, &minor, &teeny) != 3)
603                 return kver = 0;
604
605         return kver = KERNEL_VERSION (major, minor, teeny);
606 }
607
608 static int
609 _have_kern26 ()
610 {
611         static int have_kern26 = -1;
612         int kver;
613
614         if (have_kern26 != -1)
615                 return have_kern26;
616
617         kver = _get_linux_version();
618         return have_kern26 = kver >= KERNEL_VERSION (2,6,0) ? 1 : 0;
619 }
620
621 #if USE_BLKID
622 static void
623 get_blkid_topology (LinuxSpecific *arch_specific)
624 {
625         arch_specific->probe = blkid_new_probe ();
626         if (!arch_specific->probe)
627                 return;
628
629         if (blkid_probe_set_device(arch_specific->probe,
630                                    arch_specific->fd, 0, 0))
631                 return;
632
633         arch_specific->topology =
634                 blkid_probe_get_topology(arch_specific->probe);
635 }
636 #endif
637
638 static void
639 _device_set_sector_size (PedDevice* dev)
640 {
641         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
642         int sector_size;
643
644         dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
645         dev->phys_sector_size = PED_SECTOR_SIZE_DEFAULT;
646
647         PED_ASSERT (dev->open_count, return);
648
649         if (_get_linux_version() < KERNEL_VERSION (2,3,0)) {
650                 dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
651                 return;
652         }
653
654         if (ioctl (arch_specific->fd, BLKSSZGET, &sector_size)) {
655                 ped_exception_throw (
656                         PED_EXCEPTION_WARNING,
657                         PED_EXCEPTION_OK,
658                         _("Could not determine sector size for %s: %s.\n"
659                           "Using the default sector size (%lld)."),
660                         dev->path, strerror (errno), PED_SECTOR_SIZE_DEFAULT);
661         } else {
662                 dev->sector_size = (long long)sector_size;
663                 dev->phys_sector_size = dev->sector_size;
664         }
665
666 #if USE_BLKID
667         get_blkid_topology(arch_specific);
668         if (!arch_specific->topology) {
669                 dev->phys_sector_size = 0;
670         } else {
671                 dev->phys_sector_size =
672                         blkid_topology_get_physical_sector_size(
673                                 arch_specific->topology);
674         }
675         if (dev->phys_sector_size == 0) {
676                 ped_exception_throw (
677                         PED_EXCEPTION_WARNING,
678                         PED_EXCEPTION_OK,
679                         _("Could not determine physical sector size for %s.\n"
680                           "Using the logical sector size (%lld)."),
681                         dev->path, dev->sector_size);
682                 dev->phys_sector_size = dev->sector_size;
683         }
684 #endif
685
686 #if defined __s390__ || defined __s390x__
687         /* Return PED_SECTOR_SIZE_DEFAULT for DASDs. */
688         if (dev->type == PED_DEVICE_DASD) {
689                 arch_specific->real_sector_size = dev->sector_size;
690                 dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
691         }
692 #endif
693 }
694
695 static int
696 _kernel_has_blkgetsize64(void)
697 {
698         int version = _get_linux_version();
699
700         if (version >= KERNEL_VERSION (2,5,4)) return 1;
701         if (version <  KERNEL_VERSION (2,5,0) &&
702             version >= KERNEL_VERSION (2,4,18)) return 1;
703         return 0;
704 }
705
706 /* TODO: do a binary search if BLKGETSIZE doesn't work?! */
707 static PedSector
708 _device_get_length (PedDevice* dev)
709 {
710         unsigned long           size;
711         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
712         uint64_t bytes=0;
713
714
715         PED_ASSERT (dev->open_count > 0, return 0);
716         PED_ASSERT (dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0, return 0);
717
718         if (_kernel_has_blkgetsize64()) {
719                 if (ioctl(arch_specific->fd, BLKGETSIZE64, &bytes) == 0) {
720                         return bytes / dev->sector_size;
721                 }
722         }
723
724         if (ioctl (arch_specific->fd, BLKGETSIZE, &size)) {
725                 ped_exception_throw (
726                         PED_EXCEPTION_BUG,
727                         PED_EXCEPTION_CANCEL,
728                         _("Unable to determine the size of %s (%s)."),
729                         dev->path,
730                         strerror (errno));
731                 return 0;
732         }
733
734         return size;
735 }
736
737 static int
738 _device_probe_geometry (PedDevice* dev)
739 {
740         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
741         struct stat             dev_stat;
742         struct hd_geometry      geometry;
743
744         if (!_device_stat (dev, &dev_stat))
745                 return 0;
746         PED_ASSERT (S_ISBLK (dev_stat.st_mode), return 0);
747
748         _device_set_sector_size (dev);
749
750         dev->length = _device_get_length (dev);
751         if (!dev->length)
752                 return 0;
753
754         /* The GETGEO ioctl is no longer useful (as of linux 2.6.x).  We could
755          * still use it in 2.4.x, but this is contentious.  Perhaps we should
756          * move to EDD. */
757         dev->bios_geom.sectors = 63;
758         dev->bios_geom.heads = 255;
759         dev->bios_geom.cylinders
760                 = dev->length / (63 * 255);
761
762         /* FIXME: what should we put here?  (TODO: discuss on linux-kernel) */
763         if (!ioctl (arch_specific->fd, HDIO_GETGEO, &geometry)
764                         && geometry.sectors && geometry.heads) {
765                 dev->hw_geom.sectors = geometry.sectors;
766                 dev->hw_geom.heads = geometry.heads;
767                 dev->hw_geom.cylinders
768                         = dev->length / (dev->hw_geom.heads
769                                          * dev->hw_geom.sectors);
770         } else {
771                 dev->hw_geom = dev->bios_geom;
772         }
773
774         return 1;
775 }
776
777 static char*
778 strip_name(char* str)
779 {
780         int     i;
781         int     end = 0;
782
783         for (i = 0; str[i] != 0; i++) {
784                 if (!isspace (str[i])
785                     || (isspace (str[i]) && !isspace (str[i+1]) && str[i+1])) {
786                         str [end] = str[i];
787                         end++;
788                 }
789         }
790         str[end] = 0;
791         return strdup (str);
792 }
793
794 static int
795 init_ide (PedDevice* dev)
796 {
797         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
798         struct stat             dev_stat;
799         struct hd_driveid       hdi;
800         PedExceptionOption      ex_status;
801         char                    hdi_buf[41];
802         int                     sector_multiplier = 0;
803
804         if (!_device_stat (dev, &dev_stat))
805                 goto error;
806
807         if (!ped_device_open (dev))
808                 goto error;
809
810         if (ioctl (arch_specific->fd, HDIO_GET_IDENTITY, &hdi)) {
811                 ex_status = ped_exception_throw (
812                                 PED_EXCEPTION_WARNING,
813                                 PED_EXCEPTION_IGNORE_CANCEL,
814                                 _("Could not get identity of device %s - %s"),
815                                 dev->path, strerror (errno));
816                 switch (ex_status) {
817                         case PED_EXCEPTION_CANCEL:
818                                 goto error_close_dev;
819
820                         case PED_EXCEPTION_UNHANDLED:
821                                 ped_exception_catch ();
822                         case PED_EXCEPTION_IGNORE:
823                                 dev->model = strdup(_("Generic IDE"));
824                                 break;
825                         default:
826                                 PED_ASSERT (0, (void) 0);
827                                 break;
828                 }
829         } else {
830                 /* hdi.model is not guaranteed to be NULL terminated */
831                 memcpy (hdi_buf, hdi.model, 40);
832                 hdi_buf[40] = '\0';
833                 dev->model = strip_name (hdi_buf);
834
835                 if (!hdi.ata7_sectinfo.valid1 && hdi.ata7_sectinfo.valid2)
836                         sector_multiplier = hdi.ata7_sectinfo.multiplier;
837                 else
838                         sector_multiplier = 1;
839
840                 if (sector_multiplier != 1) {
841                         ex_status = ped_exception_throw (
842                                 PED_EXCEPTION_WARNING,
843                                 PED_EXCEPTION_IGNORE_CANCEL,
844                                 _("Device %s has multiple (%d) logical sectors "
845                                   "per physical sector.\n"
846                                   "GNU Parted supports this EXPERIMENTALLY for "
847                                   "some special disk label/file system "
848                                   "combinations, e.g. GPT and ext2/3.\n"
849                                   "Please consult the web site for up-to-date "
850                                   "information."),
851                                 dev->path, sector_multiplier);
852
853                         switch (ex_status) {
854                                 case PED_EXCEPTION_CANCEL:
855                                         goto error_close_dev;
856
857                                 case PED_EXCEPTION_UNHANDLED:
858                                         ped_exception_catch ();
859                                 case PED_EXCEPTION_IGNORE:
860                                         break;
861                                 default:
862                                         PED_ASSERT (0, (void) 0);
863                                         break;
864                         }
865                 }
866
867                 /* XXX sector_size has not been set yet! */
868                 /* dev->phys_sector_size = dev->sector_size
869                    * sector_multiplier;*/
870                 dev->phys_sector_size = PED_SECTOR_SIZE_DEFAULT;
871         }
872
873         if (!_device_probe_geometry (dev))
874                 goto error_close_dev;
875
876         ped_device_close (dev);
877         return 1;
878
879 error_close_dev:
880         ped_device_close (dev);
881 error:
882         return 0;
883 }
884
885 /* This function reads the /sys entry named "file" for device "dev". */
886 static char *
887 read_device_sysfs_file (PedDevice *dev, const char *file)
888 {
889         FILE *f;
890         char name_buf[128];
891         char buf[256];
892
893         snprintf (name_buf, 127, "/sys/block/%s/device/%s",
894                   last_component (dev->path), file);
895
896         if ((f = fopen (name_buf, "r")) == NULL)
897                 return NULL;
898
899         if (fgets (buf, 255, f) == NULL)
900                 return NULL;
901
902         fclose (f);
903         return strip_name (buf);
904 }
905
906 /* This function sends a query to a SCSI device for vendor and product
907  * information.  It uses the deprecated SCSI_IOCTL_SEND_COMMAND to
908  * issue this query.
909  */
910 static int
911 scsi_query_product_info (PedDevice* dev, char **vendor, char **product)
912 {
913         /* The following are defined by the SCSI-2 specification. */
914         typedef struct _scsi_inquiry_cmd
915         {
916                 uint8_t op;
917                 uint8_t lun;          /* bits 5-7 denote the LUN */
918                 uint8_t page_code;
919                 uint8_t reserved;
920                 uint8_t alloc_length;
921                 uint8_t control;
922         } __attribute__((packed)) scsi_inquiry_cmd_t;
923
924         typedef struct _scsi_inquiry_data
925         {
926                 uint8_t peripheral_info;
927                 uint8_t device_info;
928                 uint8_t version_info;
929                 uint8_t _field1;
930                 uint8_t additional_length;
931                 uint8_t _reserved1;
932                 uint8_t _reserved2;
933                 uint8_t _field2;
934                 uint8_t vendor_id[8];
935                 uint8_t product_id[16];
936                 uint8_t product_revision[4];
937                 uint8_t vendor_specific[20];
938                 uint8_t _reserved3[40];
939         } __attribute__((packed)) scsi_inquiry_data_t;
940
941         struct scsi_arg
942         {
943                 unsigned int inlen;
944                 unsigned int outlen;
945
946                 union arg_data
947                 {
948                         scsi_inquiry_data_t out;
949                         scsi_inquiry_cmd_t  in;
950                 } data;
951         } arg;
952
953         LinuxSpecific* arch_specific = LINUX_SPECIFIC (dev);
954         char    buf[32];
955
956         *vendor = NULL;
957         *product = NULL;
958
959         memset (&arg, 0x00, sizeof(struct scsi_arg));
960         arg.inlen  = 0;
961         arg.outlen = sizeof(scsi_inquiry_data_t);
962         arg.data.in.op  = INQUIRY;
963         arg.data.in.lun = dev->host << 5;
964         arg.data.in.alloc_length = sizeof(scsi_inquiry_data_t);
965         arg.data.in.page_code = 0;
966         arg.data.in.reserved = 0;
967         arg.data.in.control = 0;
968
969         if (ioctl (arch_specific->fd, SCSI_IOCTL_SEND_COMMAND, &arg) < 0)
970                 return 0;
971
972         memcpy (buf, arg.data.out.vendor_id, 8);
973         buf[8] = '\0';
974         *vendor = strip_name (buf);
975
976         memcpy (buf, arg.data.out.product_id, 16);
977         buf[16] = '\0';
978         *product = strip_name (buf);
979
980         return 1;
981 }
982
983 /* This function provides the vendor and product name for a SCSI device.
984  * It supports both the modern /sys interface and direct queries
985  * via the deprecated ioctl, SCSI_IOCTL_SEND_COMMAND.
986  */
987 static int
988 scsi_get_product_info (PedDevice* dev, char **vendor, char **product)
989 {
990         *vendor = read_device_sysfs_file (dev, "vendor");
991         *product = read_device_sysfs_file (dev, "model");
992         if (*vendor && *product)
993                 return 1;
994
995         return scsi_query_product_info (dev, vendor, product);
996 }
997
998 static int
999 init_scsi (PedDevice* dev)
1000 {
1001         struct scsi_idlun
1002         {
1003                 uint32_t dev_id;
1004                 uint32_t host_unique_id;
1005         } idlun;
1006
1007         LinuxSpecific* arch_specific = LINUX_SPECIFIC (dev);
1008         char* vendor;
1009         char* product;
1010
1011         if (!ped_device_open (dev))
1012                 goto error;
1013
1014         if (ioctl (arch_specific->fd, SCSI_IOCTL_GET_IDLUN, &idlun) < 0) {
1015                 dev->host = 0;
1016                 dev->did = 0;
1017                 if (ped_exception_throw (
1018                         PED_EXCEPTION_ERROR, PED_EXCEPTION_IGNORE_CANCEL,
1019                         _("Error initialising SCSI device %s - %s"),
1020                         dev->path, strerror (errno))
1021                                 != PED_EXCEPTION_IGNORE)
1022                         goto error_close_dev;
1023                 if (!_device_probe_geometry (dev))
1024                         goto error_close_dev;
1025                 ped_device_close (dev);
1026                 return 1;
1027         }
1028
1029         dev->host = idlun.host_unique_id;
1030         dev->did  = idlun.dev_id;
1031
1032         dev->model = (char*) ped_malloc (8 + 16 + 2);
1033         if (!dev->model)
1034                 goto error_close_dev;
1035
1036         if (scsi_get_product_info (dev, &vendor, &product)) {
1037                 sprintf (dev->model, "%.8s %.16s", vendor, product);
1038                 free (vendor);
1039                 free (product);
1040         } else {
1041                 strcpy (dev->model, "Generic SCSI");
1042         }
1043
1044         if (!_device_probe_geometry (dev))
1045                 goto error_close_dev;
1046
1047         ped_device_close (dev);
1048         return 1;
1049
1050 error_close_dev:
1051         ped_device_close (dev);
1052 error:
1053         return 0;
1054 }
1055
1056 static int
1057 init_file (PedDevice* dev)
1058 {
1059         struct stat     dev_stat;
1060
1061         if (!_device_stat (dev, &dev_stat))
1062                 goto error;
1063         if (!ped_device_open (dev))
1064                 goto error;
1065
1066         dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
1067         char *p = getenv ("PARTED_SECTOR_SIZE");
1068         if (p) {
1069                 int s = atoi (p);
1070                 if (0 < s && s % 512 == 0)
1071                         dev->sector_size = s;
1072         }
1073         dev->phys_sector_size = dev->sector_size;
1074
1075         if (S_ISBLK(dev_stat.st_mode))
1076                 dev->length = _device_get_length (dev);
1077         else
1078                 dev->length = dev_stat.st_size / dev->sector_size;
1079         if (dev->length <= 0) {
1080                 ped_exception_throw (
1081                         PED_EXCEPTION_ERROR,
1082                         PED_EXCEPTION_CANCEL,
1083                         _("The device %s has zero length, and can't possibly "
1084                           "store a file system or partition table.  Perhaps "
1085                           "you selected the wrong device?"),
1086                         dev->path);
1087                 goto error_close_dev;
1088         }
1089
1090         ped_device_close (dev);
1091
1092         dev->bios_geom.cylinders = dev->length / 4 / 32;
1093         dev->bios_geom.heads = 4;
1094         dev->bios_geom.sectors = 32;
1095         dev->hw_geom = dev->bios_geom;
1096         dev->model = strdup ("");
1097
1098         return 1;
1099
1100 error_close_dev:
1101         ped_device_close (dev);
1102 error:
1103         return 0;
1104 }
1105
1106 #if defined __s390__ || defined __s390x__
1107 static int
1108 init_dasd (PedDevice* dev, const char* model_name)
1109 {
1110         struct stat             dev_stat;
1111         struct hd_geometry      geo;
1112         dasd_information_t dasd_info;
1113
1114         if (!_device_stat (dev, &dev_stat))
1115                 goto error;
1116
1117         if (!ped_device_open (dev))
1118                 goto error;
1119
1120         LinuxSpecific* arch_specific = LINUX_SPECIFIC (dev);
1121
1122         PED_ASSERT (S_ISBLK (dev_stat.st_mode), return 0);
1123
1124         _device_set_sector_size (dev);
1125         if (!dev->sector_size)
1126                 goto error_close_dev;
1127
1128         dev->length = _device_get_length (dev);
1129         if (!dev->length)
1130                 goto error_close_dev;
1131
1132         if (!ioctl (arch_specific->fd, HDIO_GETGEO, &geo)) {
1133                 dev->hw_geom.sectors = geo.sectors;
1134                 dev->hw_geom.heads = geo.heads;
1135                 dev->hw_geom.cylinders = dev->length
1136                         / (dev->hw_geom.heads * dev->hw_geom.sectors)
1137                         / (dev->sector_size / PED_SECTOR_SIZE_DEFAULT);
1138                 dev->bios_geom = dev->hw_geom;
1139         } else {
1140                 dev->bios_geom.sectors = 12;
1141                 dev->bios_geom.heads = 15;
1142                 dev->bios_geom.cylinders = dev->length
1143                         / (dev->hw_geom.heads * dev->hw_geom.sectors)
1144                         / (dev->sector_size / PED_SECTOR_SIZE_DEFAULT);
1145                 dev->hw_geom = dev->bios_geom;
1146         }
1147
1148         if (!ioctl(arch_specific->fd, BIODASDINFO, &dasd_info)) {
1149                 arch_specific->devno = dasd_info.devno;
1150         } else {
1151                 arch_specific->devno = arch_specific->major * 256 +
1152                                        arch_specific->minor;
1153         }
1154
1155         dev->model = strdup (model_name);
1156
1157         ped_device_close (dev);
1158         return 1;
1159
1160 error_close_dev:
1161         ped_device_close (dev);
1162 error:
1163         return 0;
1164 }
1165 #endif
1166
1167 static int
1168 init_generic (PedDevice* dev, const char* model_name)
1169 {
1170         struct stat             dev_stat;
1171         PedExceptionOption      ex_status;
1172
1173         if (!_device_stat (dev, &dev_stat))
1174                 goto error;
1175
1176         if (!ped_device_open (dev))
1177                 goto error;
1178
1179         ped_exception_fetch_all ();
1180         if (_device_probe_geometry (dev)) {
1181                 ped_exception_leave_all ();
1182         } else {
1183                 /* hack to allow use of files, for testing */
1184                 ped_exception_catch ();
1185                 ped_exception_leave_all ();
1186
1187                 ex_status = ped_exception_throw (
1188                                 PED_EXCEPTION_WARNING,
1189                                 PED_EXCEPTION_IGNORE_CANCEL,
1190                                 _("Unable to determine geometry of "
1191                                 "file/device %s.  You should not use Parted "
1192                                 "unless you REALLY know what you're doing!"),
1193                                 dev->path);
1194                 switch (ex_status) {
1195                         case PED_EXCEPTION_CANCEL:
1196                                 goto error_close_dev;
1197
1198                         case PED_EXCEPTION_UNHANDLED:
1199                                 ped_exception_catch ();
1200                         case PED_EXCEPTION_IGNORE:
1201                                 break;
1202                         default:
1203                                 PED_ASSERT (0, (void) 0);
1204                                 break;
1205                 }
1206
1207                 /* what should we stick in here? */
1208                 dev->length = dev_stat.st_size / PED_SECTOR_SIZE_DEFAULT;
1209                 dev->bios_geom.cylinders = dev->length / 4 / 32;
1210                 dev->bios_geom.heads = 4;
1211                 dev->bios_geom.sectors = 32;
1212                 dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
1213                 dev->phys_sector_size = PED_SECTOR_SIZE_DEFAULT;
1214         }
1215
1216         dev->model = strdup (model_name);
1217
1218         ped_device_close (dev);
1219         return 1;
1220
1221 error_close_dev:
1222         ped_device_close (dev);
1223 error:
1224         return 0;
1225 }
1226
1227 static int
1228 sdmmc_get_product_info (PedDevice* dev, char **type, char **name)
1229 {
1230         *type = read_device_sysfs_file (dev, "type");
1231         *name = read_device_sysfs_file (dev, "name");
1232         if (*type && *name)
1233                 return 1;
1234
1235         return 0;
1236 }
1237
1238 static int
1239 init_sdmmc (PedDevice* dev)
1240 {
1241         char id[128];
1242         char *type, *name;
1243
1244         if (sdmmc_get_product_info (dev, &type, &name)) {
1245                 snprintf (id, sizeof(id) - 1, "%s %s", type, name);
1246                 free (type);
1247                 free (name);
1248         } else {
1249                 snprintf (id, sizeof(id) - 1, "%s",
1250                           _("Generic SD/MMC Storage Card"));
1251         }
1252         return init_generic(dev, id);
1253 }
1254
1255 static PedDevice*
1256 linux_new (const char* path)
1257 {
1258         PedDevice*      dev;
1259         LinuxSpecific*  arch_specific;
1260
1261         PED_ASSERT (path != NULL, return NULL);
1262
1263         dev = (PedDevice*) ped_malloc (sizeof (PedDevice));
1264         if (!dev)
1265                 goto error;
1266
1267         dev->path = strdup (path);
1268         if (!dev->path)
1269                 goto error_free_dev;
1270
1271         dev->arch_specific
1272                 = (LinuxSpecific*) ped_malloc (sizeof (LinuxSpecific));
1273         if (!dev->arch_specific)
1274                 goto error_free_path;
1275         arch_specific = LINUX_SPECIFIC (dev);
1276         arch_specific->dmtype = NULL;
1277 #if USE_BLKID
1278         arch_specific->probe = NULL;
1279         arch_specific->topology = NULL;
1280 #endif
1281
1282         dev->open_count = 0;
1283         dev->read_only = 0;
1284         dev->external_mode = 0;
1285         dev->dirty = 0;
1286         dev->boot_dirty = 0;
1287
1288         if (!_device_probe_type (dev))
1289                 goto error_free_arch_specific;
1290
1291         switch (dev->type) {
1292         case PED_DEVICE_IDE:
1293                 if (!init_ide (dev))
1294                         goto error_free_arch_specific;
1295                 break;
1296
1297         case PED_DEVICE_SCSI:
1298                 if (!init_scsi (dev))
1299                         goto error_free_arch_specific;
1300                 break;
1301
1302         case PED_DEVICE_DAC960:
1303                 if (!init_generic (dev, _("DAC960 RAID controller")))
1304                         goto error_free_arch_specific;
1305                 break;
1306
1307         case PED_DEVICE_SX8:
1308                 if (!init_generic (dev, _("Promise SX8 SATA Device")))
1309                         goto error_free_arch_specific;
1310                 break;
1311
1312         case PED_DEVICE_AOE:
1313                 if (!init_generic (dev, _("ATA over Ethernet Device")))
1314                     goto error_free_arch_specific;
1315                 break;
1316
1317 #if defined __s390__ || defined __s390x__
1318         case PED_DEVICE_DASD:
1319                 if (!init_dasd (dev, _("IBM S390 DASD drive")))
1320                         goto error_free_arch_specific;
1321                 break;
1322 #endif
1323
1324         case PED_DEVICE_VIODASD:
1325                 if (!init_generic (dev, _("IBM iSeries Virtual DASD")))
1326                         goto error_free_arch_specific;
1327                 break;
1328
1329         case PED_DEVICE_CPQARRAY:
1330                 if (!init_generic (dev, _("Compaq Smart Array")))
1331                         goto error_free_arch_specific;
1332                 break;
1333
1334         case PED_DEVICE_ATARAID:
1335                 if (!init_generic (dev, _("ATARAID Controller")))
1336                         goto error_free_arch_specific;
1337                 break;
1338
1339         case PED_DEVICE_I2O:
1340                 if (!init_generic (dev, _("I2O Controller")))
1341                         goto error_free_arch_specific;
1342                 break;
1343
1344         case PED_DEVICE_UBD:
1345                 if (!init_generic (dev, _("User-Mode Linux UBD")))
1346                         goto error_free_arch_specific;
1347                 break;
1348
1349         case PED_DEVICE_FILE:
1350                 if (!init_file (dev))
1351                         goto error_free_arch_specific;
1352                 break;
1353
1354         case PED_DEVICE_DM:
1355                 {
1356                   char* type;
1357                   if (arch_specific->dmtype == NULL
1358                       || asprintf(&type, _("Linux device-mapper (%s)"),
1359                                   arch_specific->dmtype) == -1)
1360                         goto error_free_arch_specific;
1361                   bool ok = init_generic (dev, type);
1362                   free (type);
1363                   if (!ok)
1364                     goto error_free_arch_specific;
1365                   break;
1366                 }
1367
1368         case PED_DEVICE_XVD:
1369                 if (!init_generic (dev, _("Xen Virtual Block Device")))
1370                         goto error_free_arch_specific;
1371                 break;
1372
1373         case PED_DEVICE_UNKNOWN:
1374                 if (!init_generic (dev, _("Unknown")))
1375                         goto error_free_arch_specific;
1376                 break;
1377
1378         case PED_DEVICE_SDMMC:
1379                 if (!init_sdmmc (dev))
1380                         goto error_free_arch_specific;
1381                 break;
1382         case PED_DEVICE_VIRTBLK:
1383                 if (!init_generic(dev, _("Virtio Block Device")))
1384                         goto error_free_arch_specific;
1385                 break;
1386
1387         case PED_DEVICE_MD:
1388                 if (!init_generic(dev, _("Linux Software RAID Array")))
1389                         goto error_free_arch_specific;
1390                 break;
1391
1392         default:
1393                 ped_exception_throw (PED_EXCEPTION_NO_FEATURE,
1394                                 PED_EXCEPTION_CANCEL,
1395                                 _("ped_device_new()  Unsupported device type"));
1396                 goto error_free_arch_specific;
1397         }
1398         return dev;
1399
1400 error_free_arch_specific:
1401         free (dev->arch_specific);
1402 error_free_path:
1403         free (dev->path);
1404 error_free_dev:
1405         free (dev);
1406 error:
1407         return NULL;
1408 }
1409
1410 static void
1411 linux_destroy (PedDevice* dev)
1412 {
1413         LinuxSpecific *arch_specific = LINUX_SPECIFIC(dev);
1414         void *p = arch_specific->dmtype;
1415
1416 #if USE_BLKID
1417         if (arch_specific->probe)
1418                 blkid_free_probe(arch_specific->probe);
1419 #endif
1420         free (p);
1421         free (dev->arch_specific);
1422         free (dev->path);
1423         free (dev->model);
1424         free (dev);
1425 }
1426
1427 static int
1428 linux_is_busy (PedDevice* dev)
1429 {
1430         int     i;
1431         char*   part_name;
1432
1433         if (_partition_is_mounted_by_path (dev->path))
1434                 return 1;
1435
1436         for (i = 0; i < 32; i++) {
1437                 int status;
1438
1439                 part_name = _device_get_part_path (dev, i);
1440                 if (!part_name)
1441                         return 1;
1442                 status = _partition_is_mounted_by_path (part_name);
1443                 free (part_name);
1444
1445                 if (status)
1446                         return 1;
1447         }
1448
1449         return 0;
1450 }
1451
1452 /* we need to flush the master device, and with kernel < 2.6 all the partition
1453  * devices, because there is no coherency between the caches with old kernels.
1454  * We should only flush unmounted partition devices, because:
1455  *  - there is never a need to flush them (we're not doing IO there)
1456  *  - flushing a device that is mounted causes unnecessary IO, and can
1457  * even screw journaling & friends up.  Even cause oopsen!
1458  */
1459 static void
1460 _flush_cache (PedDevice* dev)
1461 {
1462         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
1463         int             i;
1464
1465         if (dev->read_only)
1466                 return;
1467         dev->dirty = 0;
1468
1469         ioctl (arch_specific->fd, BLKFLSBUF);
1470
1471         /* With linux-2.6.0 and newer, we're done.  */
1472         if (_have_kern26())
1473                 return;
1474
1475         for (i = 1; i < 16; i++) {
1476                 char*           name;
1477                 int             fd;
1478
1479                 name = _device_get_part_path (dev, i);
1480                 if (!name)
1481                         break;
1482                 if (!_partition_is_mounted_by_path (name)) {
1483                         fd = open (name, WR_MODE, 0);
1484                         if (fd > 0) {
1485                                 ioctl (fd, BLKFLSBUF);
1486 retry:
1487                                 if (fsync (fd) < 0 || close (fd) < 0)
1488                                         if (ped_exception_throw (
1489                                                 PED_EXCEPTION_WARNING,
1490                                                 PED_EXCEPTION_RETRY +
1491                                                         PED_EXCEPTION_IGNORE,
1492                                                 _("Error fsyncing/closing %s: %s"),
1493                                                 name, strerror (errno))
1494                                                         == PED_EXCEPTION_RETRY)
1495                                                 goto retry;
1496                         }
1497                 }
1498                 free (name);
1499         }
1500 }
1501
1502 static int
1503 linux_open (PedDevice* dev)
1504 {
1505         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
1506
1507 retry:
1508         arch_specific->fd = open (dev->path, RW_MODE);
1509
1510         if (arch_specific->fd == -1) {
1511                 char*   rw_error_msg = strerror (errno);
1512
1513                 arch_specific->fd = open (dev->path, RD_MODE);
1514
1515                 if (arch_specific->fd == -1) {
1516                         if (ped_exception_throw (
1517                                 PED_EXCEPTION_ERROR,
1518                                 PED_EXCEPTION_RETRY_CANCEL,
1519                                 _("Error opening %s: %s"),
1520                                 dev->path, strerror (errno))
1521                                         != PED_EXCEPTION_RETRY) {
1522                                 return 0;
1523                         } else {
1524                                 goto retry;
1525                         }
1526                 } else {
1527                         ped_exception_throw (
1528                                 PED_EXCEPTION_WARNING,
1529                                 PED_EXCEPTION_OK,
1530                                 _("Unable to open %s read-write (%s).  %s has "
1531                                   "been opened read-only."),
1532                                 dev->path, rw_error_msg, dev->path);
1533                         dev->read_only = 1;
1534                 }
1535         } else {
1536                 dev->read_only = 0;
1537         }
1538
1539         /* With kernels < 2.6 flush cache for cache coherence issues */
1540         if (!_have_kern26())
1541                 _flush_cache (dev);
1542
1543         return 1;
1544 }
1545
1546 static int
1547 linux_refresh_open (PedDevice* dev)
1548 {
1549         return 1;
1550 }
1551
1552 static int
1553 linux_close (PedDevice* dev)
1554 {
1555         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
1556
1557         if (dev->dirty)
1558                 _flush_cache (dev);
1559 retry:
1560         if (fsync (arch_specific->fd) < 0 || close (arch_specific->fd) < 0)
1561                 if (ped_exception_throw (
1562                         PED_EXCEPTION_WARNING,
1563                         PED_EXCEPTION_RETRY + PED_EXCEPTION_IGNORE,
1564                         _("Error fsyncing/closing %s: %s"),
1565                         dev->path, strerror (errno))
1566                                 == PED_EXCEPTION_RETRY)
1567                         goto retry;
1568         return 1;
1569 }
1570
1571 static int
1572 linux_refresh_close (PedDevice* dev)
1573 {
1574         if (dev->dirty)
1575                 _flush_cache (dev);
1576         return 1;
1577 }
1578
1579 #if SIZEOF_OFF_T < 8
1580
1581 static _syscall5(int,_llseek,
1582                  unsigned int, fd,
1583                  unsigned long, offset_high,
1584                  unsigned long, offset_low,
1585                  loff_t*, result,
1586                  unsigned int, origin)
1587
1588 loff_t
1589 llseek (unsigned int fd, loff_t offset, unsigned int whence)
1590 {
1591         loff_t result;
1592         int retval;
1593
1594         retval = _llseek(fd,
1595                          ((unsigned long long)offset) >> 32,
1596                          ((unsigned long long)offset) & 0xffffffff,
1597                          &result,
1598                          whence);
1599         return (retval==-1 ? (loff_t) retval : result);
1600 }
1601
1602 #endif /* SIZEOF_OFF_T < 8 */
1603
1604 static int
1605 _device_seek (const PedDevice* dev, PedSector sector)
1606 {
1607         LinuxSpecific*  arch_specific;
1608
1609         PED_ASSERT (dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0, return 0);
1610         PED_ASSERT (dev != NULL, return 0);
1611         PED_ASSERT (!dev->external_mode, return 0);
1612
1613         arch_specific = LINUX_SPECIFIC (dev);
1614
1615 #if SIZEOF_OFF_T < 8
1616         if (sizeof (off_t) < 8) {
1617                 loff_t  pos = (loff_t)(sector * dev->sector_size);
1618                 return llseek (arch_specific->fd, pos, SEEK_SET) == pos;
1619         } else
1620 #endif
1621         {
1622                 off_t   pos = sector * dev->sector_size;
1623                 return lseek (arch_specific->fd, pos, SEEK_SET) == pos;
1624         }
1625 }
1626
1627 static int
1628 _read_lastoddsector (const PedDevice* dev, void* buffer)
1629 {
1630         LinuxSpecific*                  arch_specific;
1631         struct blkdev_ioctl_param       ioctl_param;
1632
1633         PED_ASSERT(dev != NULL, return 0);
1634         PED_ASSERT(buffer != NULL, return 0);
1635
1636         arch_specific = LINUX_SPECIFIC (dev);
1637
1638 retry:
1639         ioctl_param.block = 0; /* read the last sector */
1640         ioctl_param.content_length = dev->sector_size;
1641         ioctl_param.block_contents = buffer;
1642
1643         if (ioctl(arch_specific->fd, BLKGETLASTSECT, &ioctl_param) == -1) {
1644                 PedExceptionOption      opt;
1645                 opt = ped_exception_throw (
1646                         PED_EXCEPTION_ERROR,
1647                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1648                         _("%s during read on %s"),
1649                         strerror (errno), dev->path);
1650
1651                 if (opt == PED_EXCEPTION_CANCEL)
1652                         return 0;
1653                 if (opt == PED_EXCEPTION_RETRY)
1654                         goto retry;
1655         }
1656
1657         return 1;
1658 }
1659
1660 static int
1661 linux_read (const PedDevice* dev, void* buffer, PedSector start,
1662             PedSector count)
1663 {
1664         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
1665         PedExceptionOption      ex_status;
1666         void*                   diobuf = NULL;
1667
1668         PED_ASSERT (dev != NULL, return 0);
1669         PED_ASSERT (dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0, return 0);
1670
1671         if (_get_linux_version() < KERNEL_VERSION (2,6,0)) {
1672                 /* Kludge.  This is necessary to read/write the last
1673                    block of an odd-sized disk, until Linux 2.5.x kernel fixes.
1674                 */
1675                 if (dev->type != PED_DEVICE_FILE && (dev->length & 1)
1676                     && start + count - 1 == dev->length - 1)
1677                         return ped_device_read (dev, buffer, start, count - 1)
1678                                 && _read_lastoddsector (
1679                                         dev, (char *) buffer + (count-1) * 512);
1680         }
1681         while (1) {
1682                 if (_device_seek (dev, start))
1683                         break;
1684
1685                 ex_status = ped_exception_throw (
1686                         PED_EXCEPTION_ERROR,
1687                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1688                         _("%s during seek for read on %s"),
1689                         strerror (errno), dev->path);
1690
1691                 switch (ex_status) {
1692                         case PED_EXCEPTION_IGNORE:
1693                                 return 1;
1694
1695                         case PED_EXCEPTION_RETRY:
1696                                 break;
1697
1698                         case PED_EXCEPTION_UNHANDLED:
1699                                 ped_exception_catch ();
1700                         case PED_EXCEPTION_CANCEL:
1701                                 return 0;
1702                         default:
1703                                 PED_ASSERT (0, (void) 0);
1704                                 break;
1705                 }
1706         }
1707
1708         size_t read_length = count * dev->sector_size;
1709         if (posix_memalign (&diobuf, dev->sector_size, read_length) != 0)
1710                 return 0;
1711
1712         while (1) {
1713                 ssize_t status = read (arch_specific->fd, diobuf, read_length);
1714                 if (status > 0)
1715                         memcpy(buffer, diobuf, status);
1716                 if (status == (ssize_t) read_length)
1717                         break;
1718                 if (status > 0) {
1719                         read_length -= status;
1720                         buffer = (char *) buffer + status;
1721                         continue;
1722                 }
1723
1724                 ex_status = ped_exception_throw (
1725                         PED_EXCEPTION_ERROR,
1726                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1727                         (status == 0
1728                          ? _("end of file while reading %s")
1729                          : _("%s during read on %s")),
1730                         strerror (errno),
1731                         dev->path);
1732
1733                 switch (ex_status) {
1734                         case PED_EXCEPTION_IGNORE:
1735                                 free(diobuf);
1736                                 return 1;
1737
1738                         case PED_EXCEPTION_RETRY:
1739                                 break;
1740
1741                         case PED_EXCEPTION_UNHANDLED:
1742                                 ped_exception_catch ();
1743                         case PED_EXCEPTION_CANCEL:
1744                                 free(diobuf);
1745                                 return 0;
1746                         default:
1747                                 PED_ASSERT (0, (void) 0);
1748                                 break;
1749                 }
1750         }
1751
1752         free (diobuf);
1753
1754         return 1;
1755 }
1756
1757 static int
1758 _write_lastoddsector (PedDevice* dev, const void* buffer)
1759 {
1760         LinuxSpecific*                  arch_specific;
1761         struct blkdev_ioctl_param       ioctl_param;
1762
1763         PED_ASSERT(dev != NULL, return 0);
1764         PED_ASSERT(buffer != NULL, return 0);
1765
1766         arch_specific = LINUX_SPECIFIC (dev);
1767
1768 retry:
1769         ioctl_param.block = 0; /* write the last sector */
1770         ioctl_param.content_length = dev->sector_size;
1771         ioctl_param.block_contents = (void*) buffer;
1772
1773         if (ioctl(arch_specific->fd, BLKSETLASTSECT, &ioctl_param) == -1) {
1774                 PedExceptionOption      opt;
1775                 opt = ped_exception_throw (
1776                         PED_EXCEPTION_ERROR,
1777                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1778                         _("%s during write on %s"),
1779                         strerror (errno), dev->path);
1780
1781                 if (opt == PED_EXCEPTION_CANCEL)
1782                         return 0;
1783                 if (opt == PED_EXCEPTION_RETRY)
1784                         goto retry;
1785         }
1786
1787         return 1;
1788 }
1789
1790 static int
1791 linux_write (PedDevice* dev, const void* buffer, PedSector start,
1792              PedSector count)
1793 {
1794         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
1795         PedExceptionOption      ex_status;
1796         void*                   diobuf;
1797         void*                   diobuf_start;
1798
1799         PED_ASSERT(dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0, return 0);
1800
1801         if (dev->read_only) {
1802                 if (ped_exception_throw (
1803                         PED_EXCEPTION_ERROR,
1804                         PED_EXCEPTION_IGNORE_CANCEL,
1805                         _("Can't write to %s, because it is opened read-only."),
1806                         dev->path)
1807                                 != PED_EXCEPTION_IGNORE)
1808                         return 0;
1809                 else
1810                         return 1;
1811         }
1812
1813         if (_get_linux_version() < KERNEL_VERSION (2,6,0)) {
1814                 /* Kludge.  This is necessary to read/write the last
1815                    block of an odd-sized disk, until Linux 2.5.x kernel fixes.
1816                 */
1817                 if (dev->type != PED_DEVICE_FILE && (dev->length & 1)
1818                     && start + count - 1 == dev->length - 1)
1819                         return ped_device_write (dev, buffer, start, count - 1)
1820                                 && _write_lastoddsector (
1821                                         dev, ((char*) buffer
1822                                               + (count-1) * dev->sector_size));
1823         }
1824         while (1) {
1825                 if (_device_seek (dev, start))
1826                         break;
1827
1828                 ex_status = ped_exception_throw (
1829                         PED_EXCEPTION_ERROR, PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1830                         _("%s during seek for write on %s"),
1831                         strerror (errno), dev->path);
1832
1833                 switch (ex_status) {
1834                         case PED_EXCEPTION_IGNORE:
1835                                 return 1;
1836
1837                         case PED_EXCEPTION_RETRY:
1838                                 break;
1839
1840                         case PED_EXCEPTION_UNHANDLED:
1841                                 ped_exception_catch ();
1842                         case PED_EXCEPTION_CANCEL:
1843                                 return 0;
1844                         default:
1845                                 PED_ASSERT (0, (void) 0);
1846                                 break;
1847                 }
1848         }
1849
1850 #ifdef READ_ONLY
1851         printf ("ped_device_write (\"%s\", %p, %d, %d)\n",
1852                 dev->path, buffer, (int) start, (int) count);
1853 #else
1854         size_t write_length = count * dev->sector_size;
1855         dev->dirty = 1;
1856         if (posix_memalign(&diobuf, dev->sector_size, write_length) != 0)
1857                 return 0;
1858         memcpy(diobuf, buffer, write_length);
1859         diobuf_start = diobuf;
1860         while (1) {
1861                 ssize_t status = write (arch_specific->fd, diobuf, write_length);
1862                 if (status == write_length) break;
1863                 if (status > 0) {
1864                         write_length -= status;
1865                         diobuf = (char *) diobuf + status;
1866                         continue;
1867                 }
1868
1869                 ex_status = ped_exception_throw (
1870                         PED_EXCEPTION_ERROR,
1871                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1872                         _("%s during write on %s"),
1873                         strerror (errno), dev->path);
1874
1875                 switch (ex_status) {
1876                         case PED_EXCEPTION_IGNORE:
1877                                 free(diobuf_start);
1878                                 return 1;
1879
1880                         case PED_EXCEPTION_RETRY:
1881                                 break;
1882
1883                         case PED_EXCEPTION_UNHANDLED:
1884                                 ped_exception_catch ();
1885                         case PED_EXCEPTION_CANCEL:
1886                                 free(diobuf_start);
1887                                 return 0;
1888                         default:
1889                                 PED_ASSERT (0, (void) 0);
1890                                 break;
1891                 }
1892         }
1893         free(diobuf_start);
1894 #endif /* !READ_ONLY */
1895         return 1;
1896 }
1897
1898 /* returns the number of sectors that are ok.
1899  */
1900 static PedSector
1901 linux_check (PedDevice* dev, void* buffer, PedSector start, PedSector count)
1902 {
1903         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
1904         PedSector       done = 0;
1905         int             status;
1906         void*           diobuf;
1907
1908         PED_ASSERT(dev != NULL, return 0);
1909
1910         if (!_device_seek (dev, start))
1911                 return 0;
1912
1913         if (posix_memalign(&diobuf, PED_SECTOR_SIZE_DEFAULT,
1914                            count * PED_SECTOR_SIZE_DEFAULT) != 0)
1915                 return 0;
1916
1917         for (done = 0; done < count; done += status / dev->sector_size) {
1918                 status = read (arch_specific->fd, diobuf,
1919                                (size_t) ((count-done) * dev->sector_size));
1920                 if (status > 0)
1921                         memcpy(buffer, diobuf, status);
1922                 if (status < 0)
1923                         break;
1924         }
1925         free(diobuf);
1926
1927         return done;
1928 }
1929
1930 static int
1931 _do_fsync (PedDevice* dev)
1932 {
1933         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
1934         int                     status;
1935         PedExceptionOption      ex_status;
1936
1937         while (1) {
1938                 status = fsync (arch_specific->fd);
1939                 if (status >= 0) break;
1940
1941                 ex_status = ped_exception_throw (
1942                         PED_EXCEPTION_ERROR,
1943                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
1944                         _("%s during write on %s"),
1945                         strerror (errno), dev->path);
1946
1947                 switch (ex_status) {
1948                         case PED_EXCEPTION_IGNORE:
1949                                 return 1;
1950
1951                         case PED_EXCEPTION_RETRY:
1952                                 break;
1953
1954                         case PED_EXCEPTION_UNHANDLED:
1955                                 ped_exception_catch ();
1956                         case PED_EXCEPTION_CANCEL:
1957                                 return 0;
1958                         default:
1959                                 PED_ASSERT (0, (void) 0);
1960                                 break;
1961                 }
1962         }
1963         return 1;
1964 }
1965
1966 static int
1967 linux_sync (PedDevice* dev)
1968 {
1969         PED_ASSERT (dev != NULL, return 0);
1970         PED_ASSERT (!dev->external_mode, return 0);
1971
1972         if (dev->read_only)
1973                 return 1;
1974         if (!_do_fsync (dev))
1975                 return 0;
1976         _flush_cache (dev);
1977         return 1;
1978 }
1979
1980 static int
1981 linux_sync_fast (PedDevice* dev)
1982 {
1983         PED_ASSERT (dev != NULL, return 0);
1984         PED_ASSERT (!dev->external_mode, return 0);
1985
1986         if (dev->read_only)
1987                 return 1;
1988         if (!_do_fsync (dev))
1989                 return 0;
1990         /* no cache flush... */
1991         return 1;
1992 }
1993
1994 static inline int
1995 _compare_digit_state (char ch, int need_digit)
1996 {
1997         return !!isdigit (ch) == need_digit;
1998 }
1999
2000 /* matches the regexp "[^0-9]+[0-9]+[^0-9]+[0-9]+$".
2001  * Motivation: accept devices looking like /dev/rd/c0d0, but
2002  * not looking like /dev/hda1 and /dev/rd/c0d0p1
2003  */
2004 static int
2005 _match_rd_device (const char* name)
2006 {
2007         const char* pos;
2008         int state;
2009
2010         /* exclude directory names from test */
2011         pos = strrchr(name, '/') ?: name;
2012
2013         /* states:
2014          *      0       non-digits
2015          *      1       digits
2016          *      2       non-digits
2017          *      3       digits
2018          */
2019         for (state = 0; state < 4; state++) {
2020                 int want_digits = (state % 2 == 1);
2021                 do {
2022                         if (!*pos)
2023                                 return 0;
2024                         if (!_compare_digit_state (*pos, want_digits))
2025                                 return 0;
2026                         pos++;
2027                 } while (_compare_digit_state (*pos, want_digits));
2028         }
2029
2030         return *pos == 0;
2031 }
2032
2033 static int
2034 _probe_proc_partitions ()
2035 {
2036         FILE*           proc_part_file;
2037         int             major, minor, size;
2038         char            buf [512];
2039         char            part_name [256];
2040         char            dev_name [256];
2041
2042         proc_part_file = fopen ("/proc/partitions", "r");
2043         if (!proc_part_file)
2044                 return 0;
2045
2046         if (fgets (buf, 256, proc_part_file) == NULL)
2047                 return 0;
2048
2049         if (fgets (buf, 256, proc_part_file) == NULL)
2050                 return 0;
2051
2052         while (fgets (buf, 512, proc_part_file)
2053                && sscanf (buf, "%d %d %d %255s", &major, &minor, &size,
2054                           part_name) == 4) {
2055                 /* Heuristic for telling partitions and devices apart
2056                  * Probably needs to be improved
2057                  */
2058                 if (!_match_rd_device (part_name)
2059                     && isdigit (part_name [strlen (part_name) - 1]))
2060                         continue;
2061
2062                 strcpy (dev_name, "/dev/");
2063                 strcat (dev_name, part_name);
2064                 _ped_device_probe (dev_name);
2065         }
2066
2067         fclose (proc_part_file);
2068         return 1;
2069 }
2070
2071 struct _entry {
2072         const char *name;
2073         size_t len;
2074 };
2075
2076 static int
2077 _skip_entry (const char *name)
2078 {
2079         struct _entry *i;
2080         static struct _entry entries[] = {
2081                 { ".",          sizeof (".") - 1        },
2082                 { "..",         sizeof ("..") - 1       },
2083                 { "dm-",        sizeof ("dm-") - 1      },
2084                 { "loop",       sizeof ("loop") - 1     },
2085                 { "ram",        sizeof ("ram") - 1      },
2086                 { 0, 0 },
2087         };
2088
2089         for (i = entries; i->name != 0; i++) {
2090                 if (strncmp (name, i->name, i->len) == 0)
2091                         return 1;
2092         }
2093
2094         return 0;
2095 }
2096
2097 static int
2098 _probe_sys_block ()
2099 {
2100         DIR *blockdir;
2101         struct dirent *dirent;
2102         char dev_name [256];
2103         char *ptr;
2104
2105         if (!(blockdir = opendir ("/sys/block")))
2106                 return 0;
2107         while ((dirent = readdir (blockdir))) {
2108                 if (_skip_entry (dirent->d_name))
2109                         continue;
2110
2111                 if (strlen (dirent->d_name) > sizeof (dev_name) - 6)
2112                         continue; /* device name too long! */
2113
2114                 strcpy (dev_name, "/dev/");
2115                 strcat (dev_name, dirent->d_name);
2116                 /* in /sys/block, '/'s are replaced with '!' or '.' */
2117                 for (ptr = dev_name; *ptr != '\0'; ptr++) {
2118                         if (*ptr == '!' || *ptr == '.')
2119                                 *ptr = '/';
2120                 }
2121                 _ped_device_probe (dev_name);
2122         }
2123
2124         closedir (blockdir);
2125         return 1;
2126 }
2127
2128 static int
2129 _probe_standard_devices ()
2130 {
2131         _ped_device_probe ("/dev/hda");
2132         _ped_device_probe ("/dev/hdb");
2133         _ped_device_probe ("/dev/hdc");
2134         _ped_device_probe ("/dev/hdd");
2135         _ped_device_probe ("/dev/hde");
2136         _ped_device_probe ("/dev/hdf");
2137         _ped_device_probe ("/dev/hdg");
2138         _ped_device_probe ("/dev/hdh");
2139
2140         _ped_device_probe ("/dev/sda");
2141         _ped_device_probe ("/dev/sdb");
2142         _ped_device_probe ("/dev/sdc");
2143         _ped_device_probe ("/dev/sdd");
2144         _ped_device_probe ("/dev/sde");
2145         _ped_device_probe ("/dev/sdf");
2146
2147         return 1;
2148 }
2149
2150 static void
2151 linux_probe_all ()
2152 {
2153         /* we should probe the standard devs too, even with /proc/partitions,
2154          * because /proc/partitions might return devfs stuff, and we might not
2155          * have devfs available
2156          */
2157         _probe_standard_devices ();
2158
2159 #ifdef ENABLE_DEVICE_MAPPER
2160         /* device-mapper devices aren't listed in /proc/partitions; or, if
2161          * they are, they're listed as dm-X.  So, instead of relying on that,
2162          * we do our own checks.
2163          */
2164         _probe_dm_devices ();
2165 #endif
2166
2167         /* /sys/block is more reliable and consistent; fall back to using
2168          * /proc/partitions if the former is unavailable, however.
2169          */
2170         if (!_probe_sys_block ())
2171                 _probe_proc_partitions ();
2172 }
2173
2174 static char*
2175 _device_get_part_path (PedDevice* dev, int num)
2176 {
2177         int             path_len = strlen (dev->path);
2178         int             result_len = path_len + 16;
2179         char*           result;
2180
2181         result = (char*) ped_malloc (result_len);
2182         if (!result)
2183                 return NULL;
2184
2185         /* Check for devfs-style /disc => /partN transformation
2186            unconditionally; the system might be using udev with devfs rules,
2187            and if not the test is harmless. */
2188         if (!strcmp (dev->path + path_len - 5, "/disc")) {
2189                 /* replace /disc with /path%d */
2190                 strcpy (result, dev->path);
2191                 snprintf (result + path_len - 5, 16, "/part%d", num);
2192         } else if (dev->type == PED_DEVICE_DAC960
2193                         || dev->type == PED_DEVICE_CPQARRAY
2194                         || dev->type == PED_DEVICE_ATARAID
2195                         || dev->type == PED_DEVICE_DM
2196                         || isdigit (dev->path[path_len - 1]))
2197                 snprintf (result, result_len, "%sp%d", dev->path, num);
2198         else
2199                 snprintf (result, result_len, "%s%d", dev->path, num);
2200
2201         return result;
2202 }
2203
2204 static char*
2205 linux_partition_get_path (const PedPartition* part)
2206 {
2207         return _device_get_part_path (part->disk->dev, part->num);
2208 }
2209
2210 static dev_t
2211 _partition_get_part_dev (const PedPartition* part)
2212 {
2213         struct stat dev_stat;
2214         int dev_major, dev_minor;
2215
2216         if (stat (part->disk->dev->path, &dev_stat))
2217                 return (dev_t)0;
2218         dev_major = major (dev_stat.st_rdev);
2219         dev_minor = minor (dev_stat.st_rdev);
2220         return (dev_t)makedev (dev_major, dev_minor + part->num);
2221 }
2222
2223 static int
2224 _mount_table_search (const char* file_name, dev_t dev)
2225 {
2226         struct stat part_stat;
2227         char line[512];
2228         char part_name[512];
2229         FILE* file;
2230
2231         file = fopen (file_name, "r");
2232         if (!file)
2233                 return 0;
2234         while (fgets (line, 512, file)) {
2235                 if (sscanf (line, "%s", part_name) == 1
2236                     && stat (part_name, &part_stat) == 0) {
2237                         if (part_stat.st_rdev == dev) {
2238                                 fclose (file);
2239                                 return 1;
2240                         }
2241                 }
2242         }
2243         fclose (file);
2244         return 0;
2245 }
2246
2247 static int
2248 _partition_is_mounted_by_dev (dev_t dev)
2249 {
2250         return  _mount_table_search( "/proc/mounts", dev)
2251                 || _mount_table_search( "/proc/swaps", dev)
2252                 || _mount_table_search( "/etc/mtab", dev);
2253 }
2254
2255 static int
2256 _partition_is_mounted_by_path (const char *path)
2257 {
2258         struct stat part_stat;
2259         if (stat (path, &part_stat) != 0)
2260                 return 0;
2261         if (!S_ISBLK(part_stat.st_mode))
2262                 return 0;
2263         return _partition_is_mounted_by_dev (part_stat.st_rdev);
2264 }
2265
2266 static int
2267 _partition_is_mounted (const PedPartition *part)
2268 {
2269         dev_t dev;
2270         if (!ped_partition_is_active (part))
2271                 return 0;
2272         dev = _partition_get_part_dev (part);
2273         if (!dev)
2274                 return 0;
2275         return _partition_is_mounted_by_dev (dev);
2276 }
2277
2278 static int
2279 _has_partitions (const PedDisk* disk)
2280 {
2281         PED_ASSERT(disk != NULL, return 0);
2282
2283         /* Some devices can't be partitioned. */
2284         if (!strcmp (disk->type->name, "loop"))
2285                 return 0;
2286
2287         return 1;
2288 }
2289
2290 static int
2291 linux_partition_is_busy (const PedPartition* part)
2292 {
2293         PedPartition*   walk;
2294
2295         PED_ASSERT (part != NULL, return 0);
2296
2297         if (_partition_is_mounted (part))
2298                 return 1;
2299         if (part->type == PED_PARTITION_EXTENDED) {
2300                 for (walk = part->part_list; walk; walk = walk->next) {
2301                         if (linux_partition_is_busy (walk))
2302                                 return 1;
2303                 }
2304         }
2305         return 0;
2306 }
2307
2308 #ifdef ENABLE_DEVICE_MAPPER
2309 static int
2310 _dm_remove_map_name(char *name)
2311 {
2312         struct dm_task  *task = NULL;
2313         int             rc;
2314
2315         task = dm_task_create(DM_DEVICE_REMOVE);
2316         if (!task)
2317                 return 1;
2318
2319         dm_task_set_name (task, name);
2320
2321         rc = dm_task_run(task);
2322         dm_task_update_nodes();
2323         dm_task_destroy(task);
2324         if (rc < 0)
2325                 return 1;
2326
2327         return 0;
2328 }
2329
2330 static int
2331 _dm_is_part (struct dm_info *this, char *name)
2332 {
2333         struct dm_task* task = NULL;
2334         struct dm_info* info = alloca(sizeof *info);
2335         struct dm_deps* deps = NULL;
2336         int             rc = 0;
2337         unsigned int    i;
2338
2339         task = dm_task_create(DM_DEVICE_DEPS);
2340         if (!task)
2341                 return 0;
2342
2343         dm_task_set_name(task, name);
2344         rc = dm_task_run(task);
2345         if (rc < 0) {
2346                 rc = 0;
2347                 goto err;
2348         }
2349         rc = 0;
2350
2351         memset(info, '\0', sizeof *info);
2352         dm_task_get_info(task, info);
2353         if (!info->exists)
2354                 goto err;
2355
2356         deps = dm_task_get_deps(task);
2357         if (!deps)
2358                 goto err;
2359
2360         rc = 0;
2361         for (i = 0; i < deps->count; i++) {
2362                 unsigned int ma = major(deps->device[i]),
2363                              mi = minor(deps->device[i]);
2364
2365                 if (ma == this->major && mi == this->minor)
2366                         rc = 1;
2367         }
2368
2369 err:
2370         dm_task_destroy(task);
2371         return rc;
2372 }
2373
2374 static int
2375 _dm_remove_parts (PedDevice* dev)
2376 {
2377         struct dm_task*         task = NULL;
2378         struct dm_info*         info = alloca(sizeof *info);
2379         struct dm_names*        names = NULL;
2380         unsigned int            next = 0;
2381         int                     rc;
2382         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
2383
2384         task = dm_task_create(DM_DEVICE_LIST);
2385         if (!task)
2386                 goto err;
2387
2388         if (!dm_task_set_major_minor (task, arch_specific->major,
2389                                       arch_specific->minor, 0))
2390                 goto err;
2391
2392         rc = dm_task_run(task);
2393         if (rc < 0)
2394                 goto err;
2395
2396         memset(info, '\0', sizeof *info);
2397         dm_task_get_info(task, info);
2398         if (!info->exists)
2399                 goto err;
2400
2401         names = dm_task_get_names(task);
2402         if (!names)
2403                 goto err;
2404
2405         rc = 0;
2406         do {
2407                 names = (void *) ((char *) names + next);
2408
2409                 if (_dm_is_part(info, names->name))
2410                         rc += _dm_remove_map_name(names->name);
2411
2412                 next = names->next;
2413         } while (next);
2414
2415         dm_task_update_nodes();
2416         dm_task_destroy(task);
2417         task = NULL;
2418
2419         if (!rc)
2420                 return 1;
2421 err:
2422         if (task)
2423                 dm_task_destroy(task);
2424         ped_exception_throw (PED_EXCEPTION_WARNING, PED_EXCEPTION_IGNORE,
2425                 _("parted was unable to re-read the partition "
2426                   "table on %s (%s).  This means Linux won't know "
2427                   "anything about the modifications you made. "),
2428                 dev->path, strerror (errno));
2429         return 0;
2430 }
2431
2432 static int
2433 _dm_add_partition (PedDisk* disk, PedPartition* part)
2434 {
2435         int             rc;
2436         char*           vol_name = NULL;
2437         const char*     dev_name = NULL;
2438         char*           params = NULL;
2439         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (disk->dev);
2440
2441         if (!_has_partitions(disk))
2442                 return 0;
2443
2444         /* Get map name from devicemapper */
2445         struct dm_task *task = dm_task_create (DM_DEVICE_INFO);
2446         if (!task)
2447                 goto err;
2448
2449         if (!dm_task_set_major_minor (task, arch_specific->major,
2450                                       arch_specific->minor, 0))
2451                 goto err;
2452
2453         rc = dm_task_run(task);
2454         if (rc < 0)
2455                 goto err;
2456
2457         dev_name = dm_task_get_name (task);
2458
2459         if (asprintf (&vol_name, "%sp%d", dev_name, part->num) == -1)
2460                 goto err;
2461
2462         /* Caution: dm_task_destroy frees dev_name.  */
2463         dm_task_destroy (task);
2464         task = NULL;
2465
2466         if (asprintf (&params, "%d:%d %lld", arch_specific->major,
2467                       arch_specific->minor, part->geom.start) == -1)
2468                 goto err;
2469
2470         task = dm_task_create (DM_DEVICE_CREATE);
2471         if (!task)
2472                 goto err;
2473
2474         dm_task_set_name (task, vol_name);
2475         dm_task_add_target (task, 0, part->geom.length,
2476                 "linear", params);
2477         rc = dm_task_run (task);
2478         if (rc >= 0) {
2479                 //printf("0 %ld linear %s\n", part->geom.length, params);
2480                 dm_task_update_nodes();
2481                 dm_task_destroy(task);
2482                 free(params);
2483                 free(vol_name);
2484                 return 1;
2485         } else {
2486                 _dm_remove_map_name(vol_name);
2487         }
2488 err:
2489         dm_task_update_nodes();
2490         if (task)
2491                 dm_task_destroy (task);
2492         free (params);
2493         free (vol_name);
2494         return 0;
2495 }
2496
2497 static int
2498 _dm_reread_part_table (PedDisk* disk)
2499 {
2500         int largest_partnum = ped_disk_get_last_partition_num (disk);
2501         if (largest_partnum <= 0)
2502           return 1;
2503
2504         int     rc = 1;
2505         int     last = PED_MIN (largest_partnum, 16);
2506         int     i;
2507
2508         sync();
2509         if (!_dm_remove_parts(disk->dev))
2510                 rc = 0;
2511
2512         for (i = 1; i <= last; i++) {
2513                 PedPartition*      part;
2514
2515                 part = ped_disk_get_partition (disk, i);
2516                 if (!part)
2517                         continue;
2518
2519                 if (!_dm_add_partition (disk, part))
2520                         rc = 0;
2521         }
2522         return rc;
2523 }
2524 #endif
2525
2526 static int
2527 _kernel_reread_part_table (PedDevice* dev)
2528 {
2529         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
2530         int             retry_count = 9;
2531
2532         sync();
2533         while (ioctl (arch_specific->fd, BLKRRPART)) {
2534                 retry_count--;
2535                 sync();
2536                 if (retry_count == 3)
2537                         sleep(1); /* Pause to allow system to settle */
2538
2539                 if (!retry_count) {
2540                         ped_exception_throw (
2541                                 PED_EXCEPTION_WARNING,
2542                                 PED_EXCEPTION_IGNORE,
2543                         _("WARNING: the kernel failed to re-read the partition "
2544                           "table on %s (%s).  As a result, it may not "
2545                           "reflect all of your changes until after reboot."),
2546                                 dev->path, strerror (errno));
2547                         return 0;
2548                 }
2549         }
2550
2551         return 1;
2552 }
2553
2554 static int
2555 linux_disk_commit (PedDisk* disk)
2556 {
2557        if (!_has_partitions (disk))
2558                return 1;
2559
2560 #ifdef ENABLE_DEVICE_MAPPER
2561         if (disk->dev->type == PED_DEVICE_DM)
2562                 return _dm_reread_part_table (disk);
2563 #endif
2564         if (disk->dev->type != PED_DEVICE_FILE) {
2565                 return _kernel_reread_part_table (disk->dev);
2566         }
2567
2568         return 1;
2569 }
2570
2571 #if USE_BLKID
2572 static PedAlignment*
2573 linux_get_minimum_alignment(const PedDevice *dev)
2574 {
2575         blkid_topology tp = LINUX_SPECIFIC(dev)->topology;
2576         if (!tp)
2577                 return NULL;
2578
2579         if (blkid_topology_get_minimum_io_size(tp) == 0)
2580                 return ped_alignment_new(
2581                         blkid_topology_get_alignment_offset(tp) /
2582                                 dev->sector_size,
2583                         dev->phys_sector_size / dev->sector_size);
2584
2585         return ped_alignment_new(
2586                 blkid_topology_get_alignment_offset(tp) / dev->sector_size,
2587                 blkid_topology_get_minimum_io_size(tp) / dev->sector_size);
2588 }
2589
2590 static PedAlignment*
2591 linux_get_optimum_alignment(const PedDevice *dev)
2592 {
2593         blkid_topology tp = LINUX_SPECIFIC(dev)->topology;
2594         if (!tp)
2595                 return NULL;
2596
2597         /* If optimal_io_size is 0 _and_ alignment_offset is 0 _and_
2598            minimum_io_size is a power of 2 then go with the device.c default */
2599         unsigned long minimum_io_size = blkid_topology_get_minimum_io_size(tp);
2600         if (blkid_topology_get_optimal_io_size(tp) == 0 &&
2601             blkid_topology_get_alignment_offset(tp) == 0 &&
2602             (minimum_io_size & (minimum_io_size - 1)) == 0)
2603                 return NULL;
2604
2605         /* If optimal_io_size is 0 and we don't meet the other criteria
2606            for using the device.c default, return the minimum alignment. */
2607         if (blkid_topology_get_optimal_io_size(tp) == 0)
2608                 return linux_get_minimum_alignment(dev);
2609
2610         return ped_alignment_new(
2611                 blkid_topology_get_alignment_offset(tp) / dev->sector_size,
2612                 blkid_topology_get_optimal_io_size(tp) / dev->sector_size);
2613 }
2614 #endif
2615
2616 static PedDeviceArchOps linux_dev_ops = {
2617         _new:           linux_new,
2618         destroy:        linux_destroy,
2619         is_busy:        linux_is_busy,
2620         open:           linux_open,
2621         refresh_open:   linux_refresh_open,
2622         close:          linux_close,
2623         refresh_close:  linux_refresh_close,
2624         read:           linux_read,
2625         write:          linux_write,
2626         check:          linux_check,
2627         sync:           linux_sync,
2628         sync_fast:      linux_sync_fast,
2629         probe_all:      linux_probe_all,
2630 #if USE_BLKID
2631         get_minimum_alignment:  linux_get_minimum_alignment,
2632         get_optimum_alignment:  linux_get_optimum_alignment,
2633 #endif
2634 };
2635
2636 PedDiskArchOps linux_disk_ops =  {
2637         partition_get_path:     linux_partition_get_path,
2638         partition_is_busy:      linux_partition_is_busy,
2639         disk_commit:            linux_disk_commit
2640 };
2641
2642 PedArchitecture ped_linux_arch = {
2643         dev_ops:        &linux_dev_ops,
2644         disk_ops:       &linux_disk_ops
2645 };