libparted: improve BLKPG error checking
[parted/parted.git] / libparted / arch / linux.c
index 3527f5d..8e7d185 100644 (file)
@@ -1,5 +1,5 @@
 /* libparted - a library for manipulating disk partitions
-    Copyright (C) 1999 - 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+    Copyright (C) 1999-2010 Free Software Foundation, Inc.
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
 #define PROC_DEVICES_BUFSIZ 16384
 
 #include <config.h>
-
+#include <arch/linux.h>
+#include <linux/blkpg.h>
 #include <parted/parted.h>
 #include <parted/debug.h>
-#include <parted/linux.h>
+#if defined __s390__ || defined __s390x__
+#include <parted/fdasd.h>
+#endif
 
 #include <ctype.h>
 #include <errno.h>
@@ -41,8 +44,8 @@
 #include <libdevmapper.h>
 #endif
 
-#include "blkpg.h"
 #include "../architecture.h"
+#include "dirname.h"
 
 #if ENABLE_NLS
 #  include <libintl.h>
 #define HDIO_GETGEO             0x0301  /* get device geometry */
 #define HDIO_GET_IDENTITY       0x030d  /* get IDE identification info */
 
-#if defined(O_DIRECT) && !(defined(__s390__) || defined(__s390x__))
-#define RD_MODE (O_RDONLY | O_DIRECT)
-#define WR_MODE (O_WRONLY | O_DIRECT)
-#define RW_MODE (O_RDWR | O_DIRECT)
-#else
 #define RD_MODE (O_RDONLY)
 #define WR_MODE (O_WRONLY)
 #define RW_MODE (O_RDWR)
-#endif
 
 struct hd_geometry {
         unsigned char heads;
@@ -252,96 +249,27 @@ struct blkdev_ioctl_param {
 #define UBD_MAJOR               98
 #define DASD_MAJOR              94
 #define VIODASD_MAJOR           112
+#define AOE_MAJOR               152
 #define SX8_MAJOR1              160
 #define SX8_MAJOR2              161
 #define XVD_MAJOR               202
 #define SDMMC_MAJOR             179
+#define LOOP_MAJOR              7
+#define MD_MAJOR                9
 
 #define SCSI_BLK_MAJOR(M) (                                             \
                 (M) == SCSI_DISK0_MAJOR                                 \
                 || (M) == SCSI_CDROM_MAJOR                              \
                 || ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR))
 
+/* Maximum number of partitions supported by linux. */
+#define MAX_NUM_PARTS          64
+
 static char* _device_get_part_path (PedDevice* dev, int num);
 static int _partition_is_mounted_by_path (const char* path);
 
 static int
-_is_ide_major (int major)
-{
-        switch (major) {
-                case IDE0_MAJOR:
-                case IDE1_MAJOR:
-                case IDE2_MAJOR:
-                case IDE3_MAJOR:
-                case IDE4_MAJOR:
-                case IDE5_MAJOR:
-                        return 1;
-
-                default:
-                        return 0;
-        }
-}
-
-static int
-_is_cpqarray_major (int major)
-{
-        return ((COMPAQ_SMART2_MAJOR <= major && major <= COMPAQ_SMART2_MAJOR7)
-             || (COMPAQ_SMART_MAJOR <= major && major <= COMPAQ_SMART_MAJOR7));
-}
-
-static int
-_is_i2o_major (int major)
-{
-        return (I2O_MAJOR1 <= major && major <= I2O_MAJOR8);
-}
-
-static int
-_is_sx8_major (int major)
-{
-        return (SX8_MAJOR1 <= major && major <= SX8_MAJOR2);
-}
-
-#ifdef ENABLE_DEVICE_MAPPER
-static int
-_dm_maptype (PedDevice *dev)
-{
-        LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
-        struct dm_task *dmt;
-        void *next;
-        uint64_t start, length;
-        char *target_type = NULL;
-        char *params;
-        int r = -1;
-        const char* dev_dir = getenv ("DM_DEV_DIR");
-
-        if (dev_dir && *dev_dir && !dm_set_dev_dir(dev_dir))
-                return r;
-
-        if (!(dmt = dm_task_create(DM_DEVICE_TABLE)))
-                return r;
-
-        if (!dm_task_set_name(dmt, dev->path))
-                goto bad;
-
-        dm_task_no_open_count(dmt);
-
-        if (!dm_task_run(dmt))
-                goto bad;
-
-        next = dm_get_next_target(dmt, NULL, &start, &length,
-                                  &target_type, &params);
-
-        arch_specific->dmtype = strdup(target_type);
-        if (arch_specific->dmtype == NULL)
-                goto bad;
-        r = 0;
-bad:
-        dm_task_destroy(dmt);
-        return r;
-}
-
-static int
-readFD (int fd, char **buf)
+_read_fd (int fd, char **buf)
 {
         char* p;
         size_t size = PROC_DEVICES_BUFSIZ;
@@ -360,7 +288,14 @@ readFD (int fd, char **buf)
                         break;
                 filesize += s;
                 size += s;
-                *buf = realloc (*buf, size);
+                char *new_buf = realloc (*buf, size);
+                if (new_buf == NULL) {
+                        int saved_errno = errno;
+                        free (*buf);
+                        errno = saved_errno;
+                        return -1;
+                }
+                *buf = new_buf;
         } while (1);
 
         if (filesize == 0 && s < 0) {
@@ -368,8 +303,14 @@ readFD (int fd, char **buf)
                 *buf = NULL;
                 return -1;
         } else {
-                /* there is always some excess memory left unused */
-                *buf = realloc (*buf, filesize+1);
+                char *new_buf = realloc (*buf, filesize + 1);
+                if (new_buf == NULL) {
+                        int saved_errno = errno;
+                        free (*buf);
+                        errno = saved_errno;
+                        return -1;
+                }
+                *buf = new_buf;
                 (*buf)[filesize] = '\0';
         }
 
@@ -377,7 +318,7 @@ readFD (int fd, char **buf)
 }
 
 static int
-_is_dm_major (int major)
+_major_type_in_devices (int major, const char* type)
 {
         int fd;
         char* buf = NULL;
@@ -390,7 +331,7 @@ _is_dm_major (int major)
         if (fd < 0)
                 return 0;
 
-        if (readFD(fd, &buf) < 0) {
+        if (_read_fd(fd, &buf) < 0) {
                 close(fd);
                 return 0;
         }
@@ -411,7 +352,7 @@ _is_dm_major (int major)
                 }
 
                 name = strrchr(line, ' ');
-                if (!name || strcmp(name+1, "device-mapper"))
+                if (!name || strcmp(name+1, type))
                         goto next;
 
                 maj = strtol(line, &name, 10);
@@ -432,6 +373,93 @@ next:
 }
 
 static int
+_is_ide_major (int major)
+{
+        switch (major) {
+                case IDE0_MAJOR:
+                case IDE1_MAJOR:
+                case IDE2_MAJOR:
+                case IDE3_MAJOR:
+                case IDE4_MAJOR:
+                case IDE5_MAJOR:
+                        return 1;
+
+                default:
+                        return 0;
+        }
+}
+
+static int
+_is_cpqarray_major (int major)
+{
+        return ((COMPAQ_SMART2_MAJOR <= major && major <= COMPAQ_SMART2_MAJOR7)
+             || (COMPAQ_SMART_MAJOR <= major && major <= COMPAQ_SMART_MAJOR7));
+}
+
+static int
+_is_i2o_major (int major)
+{
+        return (I2O_MAJOR1 <= major && major <= I2O_MAJOR8);
+}
+
+static int
+_is_sx8_major (int major)
+{
+        return (SX8_MAJOR1 <= major && major <= SX8_MAJOR2);
+}
+
+static int
+_is_virtblk_major (int major)
+{
+        return _major_type_in_devices (major, "virtblk");
+}
+
+#ifdef ENABLE_DEVICE_MAPPER
+static int
+_is_dm_major (int major)
+{
+        return _major_type_in_devices (major, "device-mapper");
+}
+
+static int
+_dm_maptype (PedDevice *dev)
+{
+        LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
+        struct dm_task *dmt;
+        uint64_t start, length;
+        char *target_type = NULL;
+        char *params;
+        int r = -1;
+        const char* dev_dir = getenv ("DM_DEV_DIR");
+
+        if (dev_dir && *dev_dir && !dm_set_dev_dir(dev_dir))
+                return r;
+
+        if (!(dmt = dm_task_create(DM_DEVICE_TABLE)))
+                return r;
+
+        if (!dm_task_set_major_minor(dmt, arch_specific->major,
+                                     arch_specific->minor, 0))
+                goto bad;
+
+        dm_task_no_open_count(dmt);
+
+        if (!dm_task_run(dmt))
+                goto bad;
+
+        dm_get_next_target(dmt, NULL, &start, &length, &target_type, &params);
+
+        arch_specific->dmtype = strdup(target_type ? target_type : "NO-TARGET");
+        if (arch_specific->dmtype == NULL)
+                goto bad;
+        r = 0;
+bad:
+        dm_task_destroy(dmt);
+        return r;
+}
+
+
+static int
 _probe_dm_devices ()
 {
        DIR*            mapper_dir;
@@ -493,6 +521,7 @@ _device_probe_type (PedDevice* dev)
         struct stat             dev_stat;
         int                     dev_major;
         int                     dev_minor;
+        LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
 
         if (!_device_stat (dev, &dev_stat))
                 return 0;
@@ -502,8 +531,8 @@ _device_probe_type (PedDevice* dev)
                 return 1;
         }
 
-        dev_major = major (dev_stat.st_rdev);
-        dev_minor = minor (dev_stat.st_rdev);
+        arch_specific->major = dev_major = major (dev_stat.st_rdev);
+        arch_specific->minor = dev_minor = minor (dev_stat.st_rdev);
 
         if (SCSI_BLK_MAJOR (dev_major) && (dev_minor % 0x10 == 0)) {
                 dev->type = PED_DEVICE_SCSI;
@@ -513,6 +542,8 @@ _device_probe_type (PedDevice* dev)
                 dev->type = PED_DEVICE_DAC960;
         } else if (dev_major == ATARAID_MAJOR && (dev_minor % 0x10 == 0)) {
                 dev->type = PED_DEVICE_ATARAID;
+        } else if (dev_major == AOE_MAJOR && (dev_minor % 0x10 == 0)) {
+                dev->type = PED_DEVICE_AOE;
         } else if (dev_major == DASD_MAJOR && (dev_minor % 0x4 == 0)) {
                 dev->type = PED_DEVICE_DASD;
         } else if (dev_major == VIODASD_MAJOR && (dev_minor % 0x8 == 0)) {
@@ -540,6 +571,12 @@ _device_probe_type (PedDevice* dev)
                 dev->type = PED_DEVICE_XVD;
         } else if (dev_major == SDMMC_MAJOR && (dev_minor % 0x08 == 0)) {
                 dev->type = PED_DEVICE_SDMMC;
+        } else if (_is_virtblk_major(dev_major)) {
+                dev->type = PED_DEVICE_VIRTBLK;
+        } else if (dev_major == LOOP_MAJOR) {
+                dev->type = PED_DEVICE_FILE;
+        } else if (dev_major == MD_MAJOR) {
+                dev->type = PED_DEVICE_MD;
         } else {
                 dev->type = PED_DEVICE_UNKNOWN;
         }
@@ -569,27 +606,41 @@ _get_linux_version ()
 }
 
 static int
-_have_devfs ()
+_have_kern26 ()
 {
-        static int have_devfs = -1;
-        struct stat sb;
+        static int have_kern26 = -1;
+        int kver;
 
-        if (have_devfs != -1)
-                return have_devfs;
+        if (have_kern26 != -1)
+                return have_kern26;
 
-        /* the presence of /dev/.devfsd implies that DevFS is active */
-        if (stat("/dev/.devfsd", &sb) < 0)
-                return have_devfs = 0;
+        kver = _get_linux_version();
+        return have_kern26 = kver >= KERNEL_VERSION (2,6,0) ? 1 : 0;
+}
+
+#if USE_BLKID
+static void
+get_blkid_topology (LinuxSpecific *arch_specific)
+{
+        arch_specific->probe = blkid_new_probe ();
+        if (!arch_specific->probe)
+                return;
 
-        return have_devfs = S_ISCHR(sb.st_mode) ? 1 : 0;
+        if (blkid_probe_set_device(arch_specific->probe,
+                                   arch_specific->fd, 0, 0))
+                return;
+
+        arch_specific->topology =
+                blkid_probe_get_topology(arch_specific->probe);
 }
+#endif
 
-static void 
+static void
 _device_set_sector_size (PedDevice* dev)
 {
         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
         int sector_size;
-        
+
         dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
         dev->phys_sector_size = PED_SECTOR_SIZE_DEFAULT;
 
@@ -599,7 +650,7 @@ _device_set_sector_size (PedDevice* dev)
                 dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
                 return;
         }
-        
+
         if (ioctl (arch_specific->fd, BLKSSZGET, &sector_size)) {
                 ped_exception_throw (
                         PED_EXCEPTION_WARNING,
@@ -609,22 +660,36 @@ _device_set_sector_size (PedDevice* dev)
                         dev->path, strerror (errno), PED_SECTOR_SIZE_DEFAULT);
         } else {
                 dev->sector_size = (long long)sector_size;
+                dev->phys_sector_size = dev->sector_size;
         }
 
-        /* Return PED_SECTOR_SIZE_DEFAULT for DASDs. */
-        if (dev->type == PED_DEVICE_DASD) {
-                dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
+#if USE_BLKID
+        get_blkid_topology(arch_specific);
+        if (!arch_specific->topology) {
+                dev->phys_sector_size = 0;
+        } else {
+                dev->phys_sector_size =
+                        blkid_topology_get_physical_sector_size(
+                                arch_specific->topology);
         }
-
-        if (dev->sector_size != PED_SECTOR_SIZE_DEFAULT) {
+        if (dev->phys_sector_size == 0) {
                 ped_exception_throw (
                         PED_EXCEPTION_WARNING,
                         PED_EXCEPTION_OK,
-                        _("Device %s has a logical sector size of %lld.  Not "
-                          "all parts of GNU Parted support this at the moment, "
-                          "and the working code is HIGHLY EXPERIMENTAL.\n"),
+                        _("Could not determine physical sector size for %s.\n"
+                          "Using the logical sector size (%lld)."),
                         dev->path, dev->sector_size);
+                dev->phys_sector_size = dev->sector_size;
         }
+#endif
+
+#if defined __s390__ || defined __s390x__
+        /* Return PED_SECTOR_SIZE_DEFAULT for DASDs. */
+        if (dev->type == PED_DEVICE_DASD) {
+                arch_specific->real_sector_size = dev->sector_size;
+                dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
+        }
+#endif
 }
 
 static int
@@ -681,7 +746,7 @@ _device_probe_geometry (PedDevice* dev)
         PED_ASSERT (S_ISBLK (dev_stat.st_mode), return 0);
 
         _device_set_sector_size (dev);
-        
+
         dev->length = _device_get_length (dev);
         if (!dev->length)
                 return 0;
@@ -731,17 +796,14 @@ init_ide (PedDevice* dev)
 {
         LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
         struct stat             dev_stat;
-        int                     dev_major;
         struct hd_driveid       hdi;
         PedExceptionOption      ex_status;
         char                    hdi_buf[41];
         int                     sector_multiplier = 0;
-        
+
         if (!_device_stat (dev, &dev_stat))
                 goto error;
 
-        dev_major = major (dev_stat.st_rdev);
-
         if (!ped_device_open (dev))
                 goto error;
 
@@ -769,12 +831,12 @@ init_ide (PedDevice* dev)
                 memcpy (hdi_buf, hdi.model, 40);
                 hdi_buf[40] = '\0';
                 dev->model = strip_name (hdi_buf);
-                
+
                 if (!hdi.ata7_sectinfo.valid1 && hdi.ata7_sectinfo.valid2)
                         sector_multiplier = hdi.ata7_sectinfo.multiplier;
                 else
                         sector_multiplier = 1;
-                        
+
                 if (sector_multiplier != 1) {
                         ex_status = ped_exception_throw (
                                 PED_EXCEPTION_WARNING,
@@ -787,7 +849,7 @@ init_ide (PedDevice* dev)
                                   "Please consult the web site for up-to-date "
                                   "information."),
                                 dev->path, sector_multiplier);
-                        
+
                         switch (ex_status) {
                                 case PED_EXCEPTION_CANCEL:
                                         goto error_close_dev;
@@ -801,7 +863,7 @@ init_ide (PedDevice* dev)
                                         break;
                         }
                 }
-                
+
                 /* XXX sector_size has not been set yet! */
                 /* dev->phys_sector_size = dev->sector_size
                    * sector_multiplier;*/
@@ -829,7 +891,7 @@ read_device_sysfs_file (PedDevice *dev, const char *file)
         char buf[256];
 
         snprintf (name_buf, 127, "/sys/block/%s/device/%s",
-                  basename (dev->path), file);
+                  last_component (dev->path), file);
 
         if ((f = fopen (name_buf, "r")) == NULL)
                 return NULL;
@@ -995,16 +1057,25 @@ static int
 init_file (PedDevice* dev)
 {
         struct stat     dev_stat;
+
         if (!_device_stat (dev, &dev_stat))
                 goto error;
         if (!ped_device_open (dev))
                 goto error;
 
+        dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
+        char *p = getenv ("PARTED_SECTOR_SIZE");
+        if (p) {
+                int s = atoi (p);
+                if (0 < s && s % 512 == 0)
+                        dev->sector_size = s;
+        }
+        dev->phys_sector_size = dev->sector_size;
+
         if (S_ISBLK(dev_stat.st_mode))
                 dev->length = _device_get_length (dev);
         else
-                dev->length = dev_stat.st_size / 512;
+                dev->length = dev_stat.st_size / dev->sector_size;
         if (dev->length <= 0) {
                 ped_exception_throw (
                         PED_EXCEPTION_ERROR,
@@ -1022,24 +1093,23 @@ init_file (PedDevice* dev)
         dev->bios_geom.heads = 4;
         dev->bios_geom.sectors = 32;
         dev->hw_geom = dev->bios_geom;
-        dev->sector_size = PED_SECTOR_SIZE_DEFAULT;
-        dev->phys_sector_size = PED_SECTOR_SIZE_DEFAULT;
         dev->model = strdup ("");
-        
+
         return 1;
-        
+
 error_close_dev:
         ped_device_close (dev);
 error:
         return 0;
 }
 
+#if defined __s390__ || defined __s390x__
 static int
 init_dasd (PedDevice* dev, const char* model_name)
 {
         struct stat             dev_stat;
         struct hd_geometry      geo;
-        char *errstr = 0;
+        dasd_information_t dasd_info;
 
         if (!_device_stat (dev, &dev_stat))
                 goto error;
@@ -1075,20 +1145,24 @@ init_dasd (PedDevice* dev, const char* model_name)
                 dev->hw_geom = dev->bios_geom;
         }
 
+        if (!ioctl(arch_specific->fd, BIODASDINFO, &dasd_info)) {
+                arch_specific->devno = dasd_info.devno;
+        } else {
+                arch_specific->devno = arch_specific->major * 256 +
+                                       arch_specific->minor;
+        }
+
         dev->model = strdup (model_name);
 
         ped_device_close (dev);
         return 1;
 
-        ped_exception_throw ( PED_EXCEPTION_ERROR,
-                              PED_EXCEPTION_IGNORE_CANCEL,
-                              errstr );
-
 error_close_dev:
         ped_device_close (dev);
 error:
         return 0;
 }
+#endif
 
 static int
 init_generic (PedDevice* dev, const char* model_name)
@@ -1200,6 +1274,10 @@ linux_new (const char* path)
                 goto error_free_path;
         arch_specific = LINUX_SPECIFIC (dev);
         arch_specific->dmtype = NULL;
+#if USE_BLKID
+        arch_specific->probe = NULL;
+        arch_specific->topology = NULL;
+#endif
 
         dev->open_count = 0;
         dev->read_only = 0;
@@ -1231,10 +1309,17 @@ linux_new (const char* path)
                         goto error_free_arch_specific;
                 break;
 
+        case PED_DEVICE_AOE:
+                if (!init_generic (dev, _("ATA over Ethernet Device")))
+                    goto error_free_arch_specific;
+                break;
+
+#if defined __s390__ || defined __s390x__
         case PED_DEVICE_DASD:
                 if (!init_dasd (dev, _("IBM S390 DASD drive")))
                         goto error_free_arch_specific;
                 break;
+#endif
 
         case PED_DEVICE_VIODASD:
                 if (!init_generic (dev, _("IBM iSeries Virtual DASD")))
@@ -1294,6 +1379,15 @@ linux_new (const char* path)
                 if (!init_sdmmc (dev))
                         goto error_free_arch_specific;
                 break;
+        case PED_DEVICE_VIRTBLK:
+                if (!init_generic(dev, _("Virtio Block Device")))
+                        goto error_free_arch_specific;
+                break;
+
+        case PED_DEVICE_MD:
+                if (!init_generic(dev, _("Linux Software RAID Array")))
+                        goto error_free_arch_specific;
+                break;
 
         default:
                 ped_exception_throw (PED_EXCEPTION_NO_FEATURE,
@@ -1316,7 +1410,14 @@ error:
 static void
 linux_destroy (PedDevice* dev)
 {
-        free (((LinuxSpecific*)dev->arch_specific)->dmtype);
+        LinuxSpecific *arch_specific = LINUX_SPECIFIC(dev);
+        void *p = arch_specific->dmtype;
+
+#if USE_BLKID
+        if (arch_specific->probe)
+                blkid_free_probe(arch_specific->probe);
+#endif
+        free (p);
         free (dev->arch_specific);
         free (dev->path);
         free (dev->model);
@@ -1348,8 +1449,8 @@ linux_is_busy (PedDevice* dev)
         return 0;
 }
 
-/* we need to flush the master device, and all the partition devices,
- * because there is no coherency between the caches.
+/* we need to flush the master device, and with kernel < 2.6 all the partition
+ * devices, because there is no coherency between the caches with old kernels.
  * We should only flush unmounted partition devices, because:
  *  - there is never a need to flush them (we're not doing IO there)
  *  - flushing a device that is mounted causes unnecessary IO, and can
@@ -1367,6 +1468,10 @@ _flush_cache (PedDevice* dev)
 
         ioctl (arch_specific->fd, BLKFLSBUF);
 
+        /* With linux-2.6.0 and newer, we're done.  */
+        if (_have_kern26())
+                return;
+
         for (i = 1; i < 16; i++) {
                 char*           name;
                 int             fd;
@@ -1378,7 +1483,16 @@ _flush_cache (PedDevice* dev)
                         fd = open (name, WR_MODE, 0);
                         if (fd > 0) {
                                 ioctl (fd, BLKFLSBUF);
-                                close (fd);
+retry:
+                                if (fsync (fd) < 0 || close (fd) < 0)
+                                       if (ped_exception_throw (
+                                               PED_EXCEPTION_WARNING,
+                                               PED_EXCEPTION_RETRY +
+                                                       PED_EXCEPTION_IGNORE,
+                                               _("Error fsyncing/closing %s: %s"),
+                                               name, strerror (errno))
+                                                       == PED_EXCEPTION_RETRY)
+                                               goto retry;
                         }
                 }
                 free (name);
@@ -1422,7 +1536,9 @@ retry:
                 dev->read_only = 0;
         }
 
-        _flush_cache (dev);
+        /* With kernels < 2.6 flush cache for cache coherence issues */
+        if (!_have_kern26())
+                _flush_cache (dev);
 
         return 1;
 }
@@ -1440,7 +1556,15 @@ linux_close (PedDevice* dev)
 
         if (dev->dirty)
                 _flush_cache (dev);
-        close (arch_specific->fd);
+retry:
+        if (fsync (arch_specific->fd) < 0 || close (arch_specific->fd) < 0)
+               if (ped_exception_throw (
+                       PED_EXCEPTION_WARNING,
+                       PED_EXCEPTION_RETRY + PED_EXCEPTION_IGNORE,
+                       _("Error fsyncing/closing %s: %s"),
+                       dev->path, strerror (errno))
+                               == PED_EXCEPTION_RETRY)
+                       goto retry;
         return 1;
 }
 
@@ -1481,11 +1605,11 @@ static int
 _device_seek (const PedDevice* dev, PedSector sector)
 {
         LinuxSpecific*  arch_specific;
-        
+
         PED_ASSERT (dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0, return 0);
         PED_ASSERT (dev != NULL, return 0);
         PED_ASSERT (!dev->external_mode, return 0);
-        
+
         arch_specific = LINUX_SPECIFIC (dev);
 
 #if SIZEOF_OFF_T < 8
@@ -1508,14 +1632,14 @@ _read_lastoddsector (const PedDevice* dev, void* buffer)
 
         PED_ASSERT(dev != NULL, return 0);
         PED_ASSERT(buffer != NULL, return 0);
-        
+
         arch_specific = LINUX_SPECIFIC (dev);
 
 retry:
         ioctl_param.block = 0; /* read the last sector */
         ioctl_param.content_length = dev->sector_size;
         ioctl_param.block_contents = buffer;
-        
+
         if (ioctl(arch_specific->fd, BLKGETLASTSECT, &ioctl_param) == -1) {
                 PedExceptionOption      opt;
                 opt = ped_exception_throw (
@@ -1600,7 +1724,9 @@ linux_read (const PedDevice* dev, void* buffer, PedSector start,
                 ex_status = ped_exception_throw (
                         PED_EXCEPTION_ERROR,
                         PED_EXCEPTION_RETRY_IGNORE_CANCEL,
-                        _("%s during read on %s"),
+                        (status == 0
+                         ? _("end of file while reading %s")
+                         : _("%s during read on %s")),
                         strerror (errno),
                         dev->path);
 
@@ -1636,14 +1762,14 @@ _write_lastoddsector (PedDevice* dev, const void* buffer)
 
         PED_ASSERT(dev != NULL, return 0);
         PED_ASSERT(buffer != NULL, return 0);
-        
+
         arch_specific = LINUX_SPECIFIC (dev);
 
 retry:
         ioctl_param.block = 0; /* write the last sector */
         ioctl_param.content_length = dev->sector_size;
         ioctl_param.block_contents = (void*) buffer;
-        
+
         if (ioctl(arch_specific->fd, BLKSETLASTSECT, &ioctl_param) == -1) {
                 PedExceptionOption      opt;
                 opt = ped_exception_throw (
@@ -1692,7 +1818,8 @@ linux_write (PedDevice* dev, const void* buffer, PedSector start,
                     && start + count - 1 == dev->length - 1)
                         return ped_device_write (dev, buffer, start, count - 1)
                                 && _write_lastoddsector (
-                                        dev, (char*) buffer + (count-1) * 512);
+                                        dev, ((char*) buffer
+                                              + (count-1) * dev->sector_size));
         }
         while (1) {
                 if (_device_seek (dev, start))
@@ -1779,7 +1906,7 @@ linux_check (PedDevice* dev, void* buffer, PedSector start, PedSector count)
         void*           diobuf;
 
         PED_ASSERT(dev != NULL, return 0);
-        
+
         if (!_device_seek (dev, start))
                 return 0;
 
@@ -1832,7 +1959,7 @@ _do_fsync (PedDevice* dev)
                                 PED_ASSERT (0, (void) 0);
                                 break;
                 }
-        } 
+        }
         return 1;
 }
 
@@ -1993,7 +2120,7 @@ _probe_sys_block ()
                }
                _ped_device_probe (dev_name);
        }
-       
+
        closedir (blockdir);
        return 1;
 }
@@ -2086,7 +2213,7 @@ _partition_get_part_dev (const PedPartition* part)
         struct stat dev_stat;
         int dev_major, dev_minor;
 
-        if (!_device_stat (part->disk->dev, &dev_stat))
+        if (stat (part->disk->dev->path, &dev_stat))
                 return (dev_t)0;
         dev_major = major (dev_stat.st_rdev);
         dev_minor = minor (dev_stat.st_rdev);
@@ -2100,14 +2227,13 @@ _mount_table_search (const char* file_name, dev_t dev)
         char line[512];
         char part_name[512];
         FILE* file;
-        int junk;
 
         file = fopen (file_name, "r");
         if (!file)
                 return 0;
         while (fgets (line, 512, file)) {
-                junk = sscanf (line, "%s", part_name);
-                if (stat (part_name, &part_stat) == 0) {
+                if (sscanf (line, "%s", part_name) == 1
+                    && stat (part_name, &part_stat) == 0) {
                         if (part_stat.st_rdev == dev) {
                                 fclose (file);
                                 return 1;
@@ -2144,10 +2270,24 @@ _partition_is_mounted (const PedPartition *part)
         if (!ped_partition_is_active (part))
                 return 0;
         dev = _partition_get_part_dev (part);
+        if (!dev)
+                return 0;
         return _partition_is_mounted_by_dev (dev);
 }
 
 static int
+_has_partitions (const PedDisk* disk)
+{
+        PED_ASSERT(disk != NULL, return 0);
+
+        /* Some devices can't be partitioned. */
+        if (!strcmp (disk->type->name, "loop"))
+                return 0;
+
+        return 1;
+}
+
+static int
 linux_partition_is_busy (const PedPartition* part)
 {
         PedPartition*   walk;
@@ -2190,6 +2330,9 @@ _blkpg_add_partition (PedDisk* disk, const PedPartition *part)
         PED_ASSERT(disk->dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0,
                    return 0);
 
+        if (!_has_partitions (disk))
+                return 0;
+
         if (ped_disk_type_check_feature (disk->type,
                                          PED_DISK_TYPE_PARTITION_NAME))
                 vol_name = ped_partition_get_name (part);
@@ -2238,50 +2381,165 @@ _blkpg_remove_partition (PedDisk* disk, int n)
 {
         struct blkpg_partition  linux_part;
 
+        if (!_has_partitions (disk))
+                return 0;
+
         memset (&linux_part, 0, sizeof (linux_part));
         linux_part.pno = n;
         return _blkpg_part_command (disk->dev, &linux_part,
                                     BLKPG_DEL_PARTITION);
 }
 
+/*
+ * The number of partitions that a device can have depends on the kernel.
+ * If we don't find this value in /sys/block/DEV/range, we will use our own
+ * value.
+ */
+static unsigned int
+_device_get_partition_range(PedDevice* dev)
+{
+        int         range, r;
+        char        path[128];
+        FILE*       fp;
+        bool        ok;
+
+        r = snprintf(path, sizeof(path), "/sys/block/%s/range",
+                     last_component(dev->path));
+        if(r < 0 || r >= sizeof(path))
+                return MAX_NUM_PARTS;
+
+        fp = fopen(path, "r");
+        if(!fp)
+                return MAX_NUM_PARTS;
+
+        ok = fscanf(fp, "%d", &range) == 1;
+        fclose(fp);
+
+        /* (range <= 0) is none sense.*/
+        return ok && range > 0 ? range : MAX_NUM_PARTS;
+}
+
+/*
+ * Sync the partition table in two step process:
+ * 1. Remove all of the partitions from the kernel's tables, but do not attempt
+ *    removal of any partition for which the corresponding ioctl call fails.
+ * 2. Add all the partitions that we hold in disk, throwing a warning
+ *    if we cannot because step 1 failed to remove it and it is not being
+ *    added back with the same start and length.
+ *
+ * To achieve this two step process we must calculate the minimum number of
+ * maximum possible partitions between what linux supports and what the label
+ * type supports. EX:
+ *
+ * number=MIN(max_parts_supported_in_linux,max_parts_supported_in_msdos_tables)
+ */
 static int
 _disk_sync_part_table (PedDisk* disk)
 {
-        int largest_partnum = ped_disk_get_last_partition_num (disk);
-        if (largest_partnum <= 0)
-          return 1;
+        PED_ASSERT(disk != NULL, return 0);
+        PED_ASSERT(disk->dev != NULL, return 0);
+        int lpn;
 
-        int     last = 16;
-        int*    rets = ped_malloc(sizeof(int) * last);
-        int*    errnums = ped_malloc(sizeof(int) * last);
-        int     ret = 1;
-        int     i;
+        /* lpn = largest partition number. */
+        if(ped_disk_get_max_supported_partition_count(disk, &lpn))
+                lpn = PED_MIN(lpn, _device_get_partition_range(disk->dev));
+        else
+                lpn = _device_get_partition_range(disk->dev);
 
-        for (i = 1; i <= last; i++) {
+        /* Its not possible to support largest_partnum < 0.
+         * largest_partnum == 0 would mean does not support partitions.
+         * */
+        if(lpn < 0)
+                return 0;
+        int ret = 0;
+        int *rets = ped_malloc(sizeof(int) * lpn);
+        if (!rets)
+                return 0;
+        int *errnums = ped_malloc(sizeof(int) * lpn);
+        if (!errnums)
+                goto free_rets;
+        int i;
+
+        for (i = 1; i <= lpn; i++) {
                 rets[i - 1] = _blkpg_remove_partition (disk, i);
                 errnums[i - 1] = errno;
         }
 
-        for (i = 1; i <= last; i++) {
-                const PedPartition *part;
-
-                part = ped_disk_get_partition (disk, i);
+        for (i = 1; i <= lpn; i++) {
+                const PedPartition *part = ped_disk_get_partition (disk, i);
                 if (part) {
-                        /* busy... so we won't (can't!) disturb ;)  Prolly
-                         * doesn't matter anyway, because users shouldn't be
-                         * changing mounted partitions anyway...
-                         */
-                        if (!rets[i - 1] && errnums[i - 1] == EBUSY)
-                                        continue;
+                        if (!rets[i - 1] && errnums[i - 1] == EBUSY) {
+                                struct hd_geometry geom;
+                                int fd;
+                                unsigned long long length = 0;
+                                /* get start and length of existing partition */
+                                char *dev_name = _device_get_part_path (disk->dev, i);
+                                if (!dev_name)
+                                        goto free_errnums;
+                                fd = open (dev_name, O_RDONLY);
+                                if (fd == -1 ||
+                                    ioctl (fd, HDIO_GETGEO, &geom) ||
+                                    ioctl (fd, BLKGETSIZE64, &length)) {
+                                        ped_exception_throw (
+                                                             PED_EXCEPTION_BUG,
+                                                             PED_EXCEPTION_CANCEL,
+                                                             _("Unable to determine the size and length of %s."),
+                                                             dev_name);
+                                        if( fd != -1 )
+                                                close (fd);
+                                        free (dev_name);
+                                        goto free_errnums;
+                                }
+                                free (dev_name);
+                                length /= disk->dev->sector_size;
+                                close (fd);
+                                if (geom.start == part->geom.start &&
+                                    length == part->geom.length)
+                                        rets[i - 1] = 1;
+                                /* if the new partition is unchanged and the existing
+                                   one was not removed because it was in use, then
+                                   reset the error flag and skip adding it
+                                   since it is already there */
+                                continue;
+                        }
 
                         /* add the (possibly modified or new) partition */
-                        if (!_blkpg_add_partition (disk, part))
-                                ret = 0;
+                        if (!_blkpg_add_partition (disk, part)) {
+                                ped_exception_throw (
+                                        PED_EXCEPTION_ERROR,
+                                        PED_EXCEPTION_RETRY_CANCEL,
+                                        _("Failed to add partition %i (%s)"),
+                                        i, strerror (errno));
+                                goto free_errnums;
+                        }
                 }
         }
 
-        free (rets);
+        char *parts = ped_malloc (lpn * 5);
+        if (!parts)
+                goto free_errnums;
+        parts[0] = 0;
+        /* now warn about any errors */
+        for (i = 1; i <= lpn; i++)
+                if (!rets[i - 1] && errnums[i - 1] != ENXIO)
+                        sprintf (parts + strlen (parts), "%i, ", i);
+        if (parts[0]) {
+                parts[strlen (parts) - 2] = 0;
+                ped_exception_throw (
+                        PED_EXCEPTION_WARNING,
+                        PED_EXCEPTION_IGNORE,
+                        _("Partition(s) %s on %s could not be modified, probably "
+                          "because it/they is/are in use.  As a result, the old partition(s) "
+                          "will remain in use until after reboot. You should reboot "
+                          "now before making further changes."),
+                        parts, disk->dev->path);
+        }
+        free (parts);
+        ret = 1;
+ free_errnums:
         free (errnums);
+ free_rets:
+        free (rets);
         return ret;
 }
 
@@ -2301,13 +2559,13 @@ _dm_remove_map_name(char *name)
         rc = dm_task_run(task);
         dm_task_update_nodes();
         dm_task_destroy(task);
-        if (rc < 0)
+        if (!rc)
                 return 1;
 
         return 0;
 }
 
-static int 
+static int
 _dm_is_part (struct dm_info *this, char *name)
 {
         struct dm_task* task = NULL;
@@ -2319,14 +2577,10 @@ _dm_is_part (struct dm_info *this, char *name)
         task = dm_task_create(DM_DEVICE_DEPS);
         if (!task)
                 return 0;
-        
+
         dm_task_set_name(task, name);
-        rc = dm_task_run(task);
-        if (rc < 0) {
-                rc = 0;
+        if (!dm_task_run(task))
                 goto err;
-        }
-        rc = 0;
 
         memset(info, '\0', sizeof *info);
         dm_task_get_info(task, info);
@@ -2337,7 +2591,6 @@ _dm_is_part (struct dm_info *this, char *name)
         if (!deps)
                 goto err;
 
-        rc = 0;
         for (i = 0; i < deps->count; i++) {
                 unsigned int ma = major(deps->device[i]),
                              mi = minor(deps->device[i]);
@@ -2354,25 +2607,22 @@ err:
 static int
 _dm_remove_parts (PedDevice* dev)
 {
-        struct stat             dev_stat;
         struct dm_task*         task = NULL;
         struct dm_info*         info = alloca(sizeof *info);
         struct dm_names*        names = NULL;
         unsigned int            next = 0;
         int                     rc;
-
-        if (!_device_stat (dev, &dev_stat))
-                goto err;
+        LinuxSpecific*          arch_specific = LINUX_SPECIFIC (dev);
 
         task = dm_task_create(DM_DEVICE_LIST);
         if (!task)
                 goto err;
 
-        dm_task_set_major (task, major (dev_stat.st_rdev));
-        dm_task_set_minor (task, minor (dev_stat.st_rdev));
+        if (!dm_task_set_major_minor (task, arch_specific->major,
+                                      arch_specific->minor, 0))
+                goto err;
 
-        rc = dm_task_run(task);
-        if (rc < 0)
+        if (!dm_task_run(task))
                 goto err;
 
         memset(info, '\0', sizeof *info);
@@ -2386,7 +2636,7 @@ _dm_remove_parts (PedDevice* dev)
 
         rc = 0;
         do {
-                names = (void *)names + next;
+                names = (void *) ((char *) names + next);
 
                 if (_dm_is_part(info, names->name))
                         rc += _dm_remove_map_name(names->name);
@@ -2414,33 +2664,37 @@ err:
 static int
 _dm_add_partition (PedDisk* disk, PedPartition* part)
 {
-        struct stat     dev_stat;
-        struct dm_task* task = NULL;
-        int             rc;
         char*           vol_name = NULL;
-        char*           dev_name = NULL;
+        const char*     dev_name = NULL;
         char*           params = NULL;
+        LinuxSpecific*  arch_specific = LINUX_SPECIFIC (disk->dev);
 
-        dev_name = _device_get_part_path (disk->dev, part->num);
-        if (!dev_name)
+        if (!_has_partitions(disk))
                 return 0;
 
-        vol_name = strrchr (dev_name, '/');
-        if (vol_name && *vol_name && *(++vol_name))
-                vol_name = strdup (vol_name);
-        else
-                vol_name = strdup (dev_name);
-        if (!vol_name)
-                return 0;
+        /* Get map name from devicemapper */
+        struct dm_task *task = dm_task_create (DM_DEVICE_INFO);
+        if (!task)
+                goto err;
+
+        if (!dm_task_set_major_minor (task, arch_specific->major,
+                                      arch_specific->minor, 0))
+                goto err;
 
-        if (!_device_stat (disk->dev, &dev_stat))
+        if (!dm_task_run(task))
                 goto err;
 
-        if (asprintf (&params, "%d:%d %lld", major (dev_stat.st_rdev),
-                      minor (dev_stat.st_rdev), part->geom.start) == -1)
+        dev_name = dm_task_get_name (task);
+
+        if (asprintf (&vol_name, "%sp%d", dev_name, part->num) == -1)
                 goto err;
 
-        if (!params)
+        /* Caution: dm_task_destroy frees dev_name.  */
+        dm_task_destroy (task);
+        task = NULL;
+
+        if (asprintf (&params, "%d:%d %lld", arch_specific->major,
+                      arch_specific->minor, part->geom.start) == -1)
                 goto err;
 
         task = dm_task_create (DM_DEVICE_CREATE);
@@ -2450,8 +2704,7 @@ _dm_add_partition (PedDisk* disk, PedPartition* part)
         dm_task_set_name (task, vol_name);
         dm_task_add_target (task, 0, part->geom.length,
                 "linear", params);
-        rc = dm_task_run(task);
-        if (rc >= 0) {
+        if (dm_task_run (task)) {
                 //printf("0 %ld linear %s\n", part->geom.length, params);
                 dm_task_update_nodes();
                 dm_task_destroy(task);
@@ -2503,22 +2756,23 @@ static int
 _kernel_reread_part_table (PedDevice* dev)
 {
         LinuxSpecific*  arch_specific = LINUX_SPECIFIC (dev);
-        int             retry_count = 5;
+        int             retry_count = 9;
 
         sync();
         while (ioctl (arch_specific->fd, BLKRRPART)) {
                 retry_count--;
                 sync();
+                if (retry_count == 3)
+                        sleep(1); /* Pause to allow system to settle */
+
                 if (!retry_count) {
                         ped_exception_throw (
                                 PED_EXCEPTION_WARNING,
                                 PED_EXCEPTION_IGNORE,
-                        _("The kernel was unable to re-read the partition "
-                          "table on %s (%s).  This means Linux won't know "
-                          "anything about the modifications you made "
-                          "until you reboot.  You should reboot your computer "
-                          "before doing anything with %s."),
-                                dev->path, strerror (errno), dev->path);
+                        _("WARNING: the kernel failed to re-read the partition "
+                          "table on %s (%s).  As a result, it may not "
+                          "reflect all of your changes until after reboot."),
+                                dev->path, strerror (errno));
                         return 0;
                 }
         }
@@ -2542,6 +2796,9 @@ _have_blkpg ()
 static int
 linux_disk_commit (PedDisk* disk)
 {
+        if (!_has_partitions (disk))
+                return 1;
+
 #ifdef ENABLE_DEVICE_MAPPER
         if (disk->dev->type == PED_DEVICE_DM)
                 return _dm_reread_part_table (disk);
@@ -2555,7 +2812,7 @@ linux_disk_commit (PedDisk* disk)
                  */
                 /* Work around kernel dasd problem so we really do BLKRRPART */
                 if (disk->dev->type != PED_DEVICE_DASD &&
-                    _have_blkpg () && !_have_devfs ()) {
+                    _have_blkpg () ) {
                         if (_disk_sync_part_table (disk))
                                 return 1;
                 }
@@ -2566,6 +2823,51 @@ linux_disk_commit (PedDisk* disk)
         return 1;
 }
 
+#if USE_BLKID
+static PedAlignment*
+linux_get_minimum_alignment(const PedDevice *dev)
+{
+        blkid_topology tp = LINUX_SPECIFIC(dev)->topology;
+        if (!tp)
+                return NULL;
+
+        if (blkid_topology_get_minimum_io_size(tp) == 0)
+                return ped_alignment_new(
+                        blkid_topology_get_alignment_offset(tp) /
+                                dev->sector_size,
+                        dev->phys_sector_size / dev->sector_size);
+
+        return ped_alignment_new(
+                blkid_topology_get_alignment_offset(tp) / dev->sector_size,
+                blkid_topology_get_minimum_io_size(tp) / dev->sector_size);
+}
+
+static PedAlignment*
+linux_get_optimum_alignment(const PedDevice *dev)
+{
+        blkid_topology tp = LINUX_SPECIFIC(dev)->topology;
+        if (!tp)
+                return NULL;
+
+        /* If optimal_io_size is 0 _and_ alignment_offset is 0 _and_
+           minimum_io_size is a power of 2 then go with the device.c default */
+        unsigned long minimum_io_size = blkid_topology_get_minimum_io_size(tp);
+        if (blkid_topology_get_optimal_io_size(tp) == 0 &&
+            blkid_topology_get_alignment_offset(tp) == 0 &&
+            (minimum_io_size & (minimum_io_size - 1)) == 0)
+                return NULL;
+
+        /* If optimal_io_size is 0 and we don't meet the other criteria
+           for using the device.c default, return the minimum alignment. */
+        if (blkid_topology_get_optimal_io_size(tp) == 0)
+                return linux_get_minimum_alignment(dev);
+
+        return ped_alignment_new(
+                blkid_topology_get_alignment_offset(tp) / dev->sector_size,
+                blkid_topology_get_optimal_io_size(tp) / dev->sector_size);
+}
+#endif
+
 static PedDeviceArchOps linux_dev_ops = {
         _new:           linux_new,
         destroy:        linux_destroy,
@@ -2579,7 +2881,11 @@ static PedDeviceArchOps linux_dev_ops = {
         check:          linux_check,
         sync:           linux_sync,
         sync_fast:      linux_sync_fast,
-        probe_all:      linux_probe_all
+        probe_all:      linux_probe_all,
+#if USE_BLKID
+        get_minimum_alignment: linux_get_minimum_alignment,
+        get_optimum_alignment: linux_get_optimum_alignment,
+#endif
 };
 
 PedDiskArchOps linux_disk_ops =  {