grub segmentation fault on RAID 1 lvm
Bill Rugolsky Jr.
brugolsky at telemetry-investments.com
Fri Aug 26 16:09:37 UTC 2005
On Fri, Aug 26, 2005 at 09:38:30AM -0500, Shawn Iverson wrote:
> I have come across an error during preparing the rebooting of my system
> with an updated kernel. I am receiving a segmentation fault during the
> setup phase of grub. I have RAID 1, so grub-install does not work, of
> course. I have instead either done it by hand or used my own script
> (see below). I have been able to perform this task sucessfully in the
> past up to this point with prior kernels on this hardware. Below is the
> output when doing it manually, with /boot on /dev/hda1 mirrored to
> /dev/hdb1 on an ext3 volume (/dev/md0). My lvm volume is at /dev/hda2
> mirrored to /dev/hdb2 (/dev/md1). Both volumes are clean, and /dev/hdc1
> and /dev/hdc2 function as hotspares for their respective partitions.
>
> I have noted the thread "grub segmentation fault when trying to do
> setup" on July 19th with an issue like this one. I was hoping that
> someone would have some insight into this before I go clear across town
> to boot into rescue mode to complete this task.
After many trials with GRUB and RAID, including "corruption" due to
out-of-sync RAID1 components, I've taken to doing the following:
1. Install ms-sys (http://ms-sys.sourceforge.net/) in the MBR
of both disks.
2. Mark my boot partitions active, e.g.,
echo ',,,*' | sfdisk /dev/sda -N1
echo ',,,*' | sfdisk /dev/sdb -N1
3. Apply the attached patch from Sergey Vlasov <vsu at altlinux.ru>, which
I've forward-ported to grub-0.97, that allows one to map partitions
as well as drives in device.map:
(fd0) /dev/fd0
(hd0) /dev/sda
(hd1) /dev/sdb
(hd0,0) /dev/md1
(hd1,0) /dev/md1
[You can probably find other versions of the patch in ALTLinux
grub SRPMS.]
4. Setup grub with the stage1 in the boot partition bootsector,
which, since /dev/sd[ab] is automatically redirected to /dev/md1,
updates both drives simultaneously and coherently. As long as I
patch GRUB before updating to a later version, no worries.
If one doesn't do this, one should be careful that the RAID1 doesn't
get out of sync. When I have to use an unpatched GRUB without ms-sys,
I do the following:
1. umount /boot
2. mdadm --stop /dev/md1
3. grub-install or equivalent for sda, sdb, to get the MBR in place.
4. mdadm -A /dev/md1 /dev/sda1
5. mdadm /dev/md1 -a /dev/sdb1
6. mount /boot
There are circumstance under which the above will not work, but with
modern disks with linear addressing, blah, blah, it works fine.
Regards,
Bill Rugolsky
-------------- next part --------------
--- grub-0.97/stage2/disk_io.c.alt-partition-map 2004-05-23 12:35:24.000000000 -0400
+++ grub-0.97/stage2/disk_io.c 2005-06-08 13:36:03.000000000 -0400
@@ -373,7 +373,8 @@
embed a Stage 1.5 into a partition instead of a MBR, use system
calls directly instead of biosdisk, because of the bug in
Linux. *sigh* */
- return write_to_partition (device_map, current_drive, current_partition,
+ return write_to_partition (device_map, partition_map,
+ current_drive, current_partition,
sector, sector_count, buf);
}
else
--- grub-0.97/stage2/shared.h.alt-partition-map 2005-06-08 13:36:03.000000000 -0400
+++ grub-0.97/stage2/shared.h 2005-06-08 13:36:03.000000000 -0400
@@ -581,6 +581,8 @@
extern char **device_map;
/* The filename which stores the information about a device map. */
extern char *device_map_file;
+/* The map between drive/partition numbers and UNIX device file names. */
+extern struct partition_map_entry *partition_map;
/* The array of geometries. */
extern struct geometry *disks;
/* Assign DRIVE to a device name DEVICE. */
--- grub-0.97/lib/device.c.alt-partition-map 2005-03-27 18:14:25.000000000 -0500
+++ grub-0.97/lib/device.c 2005-06-08 13:36:03.000000000 -0400
@@ -131,6 +131,122 @@
#include <shared.h>
#include <device.h>
+#if defined(__linux__)
+/* The 2.6 kernel has removed all of the geometry handling for IDE drives
+ * that did fixups for LBA, etc. This means that the geometry we get
+ * with the ioctl has a good chance of being wrong. So, we get to
+ * also know about partition tables and try to read what the geometry
+ * is there. *grumble* Very closely based on code from cfdisk
+ */
+static void get_kernel_geometry(int fd, int *cyl, int *heads, int *sectors) {
+ struct hd_geometry hdg;
+
+ if (ioctl (fd, HDIO_GETGEO, &hdg))
+ return;
+
+ *cyl = hdg.cylinders;
+ *heads = hdg.heads;
+ *sectors = hdg.sectors;
+}
+
+struct partition {
+ unsigned char boot_ind; /* 0x80 - active */
+ unsigned char head; /* starting head */
+ unsigned char sector; /* starting sector */
+ unsigned char cyl; /* starting cylinder */
+ unsigned char sys_ind; /* What partition type */
+ unsigned char end_head; /* end head */
+ unsigned char end_sector; /* end sector */
+ unsigned char end_cyl; /* end cylinder */
+ unsigned char start4[4]; /* starting sector counting from 0 */
+ unsigned char size4[4]; /* nr of sectors in partition */
+};
+
+#define ALIGNMENT 2
+typedef union {
+ struct {
+ unsigned char align[ALIGNMENT];
+ unsigned char b[SECTOR_SIZE];
+ } c;
+ struct {
+ unsigned char align[ALIGNMENT];
+ unsigned char buffer[0x1BE];
+ struct partition part[4];
+ unsigned char magicflag[2];
+ } p;
+} partition_table;
+
+#define PART_TABLE_FLAG0 0x55
+#define PART_TABLE_FLAG1 0xAA
+
+static void
+get_partition_table_geometry(partition_table *bufp, int *cyl, int *heads,
+ int *sectors) {
+ struct partition *p;
+ int i,h,s,hh,ss;
+ int first = 1;
+ int bad = 0;
+
+ if (bufp->p.magicflag[0] != PART_TABLE_FLAG0 ||
+ bufp->p.magicflag[1] != PART_TABLE_FLAG1) {
+ /* Matthew Wilcox: slightly friendlier version of
+ fatal(_("Bad signature on partition table"), 3);
+ */
+ fprintf(stderr, "Unknown partition table signature\n");
+ return;
+ }
+
+ hh = ss = 0;
+ for (i=0; i<4; i++) {
+ p = &(bufp->p.part[i]);
+ if (p->sys_ind != 0) {
+ h = p->end_head + 1;
+ s = (p->end_sector & 077);
+ if (first) {
+ hh = h;
+ ss = s;
+ first = 0;
+ } else if (hh != h || ss != s)
+ bad = 1;
+ }
+ }
+
+ if (!first && !bad) {
+ *heads = hh;
+ *sectors = ss;
+ }
+}
+
+static void get_linux_geometry (int fd, struct geometry *geom) {
+ int kern_cyl = 0, kern_head = 0, kern_sectors = 0;
+ int pt_cyl = 0, pt_head = 0, pt_sectors = 0;
+ partition_table bufp;
+
+ get_kernel_geometry(fd, &kern_cyl, &kern_head, &kern_sectors);
+
+ if (read(fd, bufp.c.b, SECTOR_SIZE) == SECTOR_SIZE) {
+ get_partition_table_geometry(&bufp, &pt_cyl, &pt_head, &pt_sectors);
+ } else {
+ fprintf(stderr, "Unable to read partition table: %s\n", strerror(errno));
+ }
+
+ if (pt_head && pt_sectors) {
+ int cyl_size;
+
+ geom->heads = pt_head;
+ geom->sectors = pt_sectors;
+ cyl_size = pt_head * pt_sectors;
+ geom->cylinders = geom->total_sectors/cyl_size;
+ } else {
+ geom->heads = kern_head;
+ geom->sectors = kern_sectors;
+ geom->cylinders = kern_cyl;
+ }
+
+ return;
+}
+#endif
+
/* Get the geometry of a drive DRIVE. */
void
get_drive_geometry (struct geometry *geom, char **map, int drive)
@@ -151,20 +267,16 @@
#if defined(__linux__)
/* Linux */
{
- struct hd_geometry hdg;
unsigned long nr;
- if (ioctl (fd, HDIO_GETGEO, &hdg))
- goto fail;
-
if (ioctl (fd, BLKGETSIZE, &nr))
goto fail;
-
- /* Got the geometry, so save it. */
- geom->cylinders = hdg.cylinders;
- geom->heads = hdg.heads;
- geom->sectors = hdg.sectors;
+
geom->total_sectors = nr;
+ get_linux_geometry(fd, geom);
+
+ if (!geom->heads && !geom->cylinders && !geom->sectors)
+ goto fail;
goto success;
}
@@ -489,9 +601,27 @@
return 1;
}
+#ifdef __linux__
+/* Find device name for PARTITION on DRIVE in MAP. */
+static const char *
+find_device_for_partition (struct partition_map_entry *map,
+ int drive, int partition)
+{
+ while (map)
+ {
+ if ((map->drive == drive) && (map->partition == partition))
+ return map->device_name;
+ map = map->next;
+ }
+
+ return NULL;
+}
+#endif /* __linux__ */
+
/* Read mapping information from FP, and write it to MAP. */
static int
-read_device_map (FILE *fp, char **map, const char *map_file)
+read_device_map (FILE *fp, char **map, const char *map_file,
+ struct partition_map_entry **partition_map)
{
auto void show_error (int no, const char *msg);
auto void show_warning (int no, const char *msg, ...);
@@ -521,6 +651,9 @@
char *ptr, *eptr;
int drive;
int is_floppy = 0;
+#ifdef __linux__
+ int partition = -1;
+#endif /* __linux__ */
/* Increase the number of lines. */
line_number++;
@@ -571,6 +704,27 @@
if (! is_floppy)
drive += 0x80;
+
+#ifdef __linux__
+ /* Check for a possible partition map entry. */
+ if (*ptr == ',')
+ {
+ if (is_floppy)
+ {
+ show_error (line_number,
+ "Partitions on floppy drives are not allowed");
+ return 0;
+ }
+
+ ptr++;
+ partition = strtoul (ptr, &ptr, 10);
+ if (partition < 0 || partition > 255) /* XXX: max value? */
+ {
+ show_error (line_number, "Bad partition number");
+ return 0;
+ }
+ }
+#endif /* __linux__ */
if (*ptr != ')')
{
@@ -595,6 +749,35 @@
eptr++;
*eptr = 0;
+#ifdef __linux__
+ if (partition != -1)
+ {
+ struct partition_map_entry *new_entry;
+ /* Multiple entries for a given partition is not allowed. */
+ if (find_device_for_partition (*partition_map, drive, partition))
+ {
+ show_error (line_number, "Duplicated entry found");
+ return 0;
+ }
+
+ /* Allocate a new partition map entry. */
+ new_entry = malloc (sizeof (struct partition_map_entry));
+ assert (new_entry);
+
+ /* Fill the entry. */
+ new_entry->next = *partition_map;
+ new_entry->drive = drive;
+ new_entry->partition = partition;
+ new_entry->device_name = strdup (ptr);
+ assert (new_entry->device_name);
+
+ /* Place the new entry at the beginning of the list. */
+ *partition_map = new_entry;
+
+ continue;
+ }
+#endif
+
/* Multiple entries for a given drive is not allowed. */
if (map[drive])
{
@@ -616,7 +799,8 @@
If it is zero, don't probe any floppy at all. If it is one, probe one
floppy. If it is two, probe two floppies. And so on. */
int
-init_device_map (char ***map, const char *map_file, int floppy_disks)
+init_device_map (char ***map, struct partition_map_entry **partition_map,
+ const char *map_file, int floppy_disks)
{
int i;
int num_hd = 0;
@@ -641,7 +825,7 @@
{
int ret;
- ret = read_device_map (fp, *map, map_file);
+ ret = read_device_map (fp, *map, map_file, partition_map);
fclose (fp);
return ret;
}
@@ -812,7 +996,7 @@
/* Restore the memory consumed for MAP. */
void
-restore_device_map (char **map)
+restore_device_map (char **map, struct partition_map_entry *partition_map)
{
int i;
@@ -821,6 +1005,17 @@
free (map[i]);
free (map);
+
+#ifdef __linux__
+ while (partition_map)
+ {
+ struct partition_map_entry *next = partition_map->next;
+ if (partition_map->device_name)
+ free (partition_map->device_name);
+ free (partition_map);
+ partition_map = next;
+ }
+#endif /* __linux__ */
}
#ifdef __linux__
@@ -839,10 +1034,12 @@
}
int
-write_to_partition (char **map, int drive, int partition,
+write_to_partition (char **map, struct partition_map_entry *partition_map,
+ int drive, int partition,
int sector, int size, const char *buf)
{
char dev[PATH_MAX]; /* XXX */
+ const char *partition_dev;
int fd;
if ((partition & 0x00FF00) != 0x00FF00)
@@ -852,16 +1049,27 @@
errnum = ERR_DEV_VALUES;
return 1;
}
-
- assert (map[drive] != 0);
-
- strcpy (dev, map[drive]);
- if (have_devfs ())
+
+ /* First try to find the entry in PARTITION_MAP. */
+ partition_dev = find_device_for_partition (partition_map, drive,
+ (partition >> 16) & 0xFF);
+ if (partition_dev)
+ {
+ strcpy (dev, partition_dev); /* XXX */
+ }
+ else
{
- if (strcmp (dev + strlen(dev) - 5, "/disc") == 0)
- strcpy (dev + strlen(dev) - 5, "/part");
+ assert (map[drive] != 0);
+
+ strcpy (dev, map[drive]);
+ if (have_devfs ())
+ {
+ if (strcmp (dev + strlen(dev) - 5, "/disc") == 0)
+ strcpy (dev + strlen(dev) - 5, "/part");
+ }
+
+ sprintf (dev + strlen(dev), "%d", ((partition >> 16) & 0xFF) + 1);
}
- sprintf (dev + strlen(dev), "%d", ((partition >> 16) & 0xFF) + 1);
/* Open the partition. */
fd = open (dev, O_RDWR);
--- grub-0.97/lib/device.h.alt-partition-map 2004-05-23 12:35:00.000000000 -0400
+++ grub-0.97/lib/device.h 2005-06-08 13:37:58.000000000 -0400
@@ -32,17 +32,31 @@
#define DEFAULT_HD_HEADS 128
#define DEFAULT_HD_SECTORS 63
+
+struct partition_map_entry
+{
+ struct partition_map_entry *next;
+ int drive;
+ int partition;
+ char *device_name;
+};
+
/* Function prototypes. */
extern void get_drive_geometry (struct geometry *geom, char **map, int drive);
extern int check_device (const char *device);
-extern int init_device_map (char ***map, const char *map_file,
+extern int init_device_map (char ***map,
+ struct partition_map_entry **part_map,
+ const char *map_file,
int no_floppies);
-extern void restore_device_map (char **map);
+extern void restore_device_map (char **map,
+ struct partition_map_entry *part_map);
#ifdef __linux__
extern int is_disk_device (char **map, int drive);
-extern int write_to_partition (char **map, int drive, int partition,
- int offset, int size, const char *buf);
+extern int write_to_partition (char **map,
+ struct partition_map_entry *part_map,
+ int drive, int partition,
+ int offset, int size, const char *buf);
#endif /* __linux__ */
#endif /* DEVICE_MAP_HEADER */
--- grub-0.97/grub/asmstub.c.alt-partition-map 2005-06-08 13:36:03.000000000 -0400
+++ grub-0.97/grub/asmstub.c 2005-06-08 13:36:03.000000000 -0400
@@ -86,6 +86,9 @@
/* The map between BIOS drives and UNIX device file names. */
char **device_map = 0;
+/* The map between drive/partition numbers and UNIX device file names. */
+struct partition_map_entry *partition_map = 0;
+
/* The jump buffer for exiting correctly. */
static jmp_buf env_for_exit;
@@ -155,7 +158,8 @@
for (i = 0; i < NUM_DISKS; i++)
disks[i].flags = -1;
- if (! init_device_map (&device_map, device_map_file, floppy_disks))
+ if (! init_device_map (&device_map, &partition_map, device_map_file,
+ floppy_disks))
return 1;
/* Check some invariants. */
@@ -213,8 +217,9 @@
close (serial_fd);
/* Release memory. */
- restore_device_map (device_map);
+ restore_device_map (device_map, partition_map);
device_map = 0;
+ partition_map = 0;
free (disks);
disks = 0;
free (scratch);
More information about the fedora-list
mailing list