rpms/libhugetlbfs/F-12 huge_page_setup_helper.py, NONE, 1.1 libhugetlbfs-2.6-hugeadm-enhancements.patch, NONE, 1.1 libhugetlbfs.spec, 1.31, 1.32

Jarod Wilson jwilson at fedoraproject.org
Mon Oct 5 19:01:40 UTC 2009


Author: jwilson

Update of /cvs/pkgs/rpms/libhugetlbfs/F-12
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv9228

Modified Files:
	libhugetlbfs.spec 
Added Files:
	huge_page_setup_helper.py 
	libhugetlbfs-2.6-hugeadm-enhancements.patch 
Log Message:
* Fri Oct 02 2009 Jarod Wilson <jarod at redhat.com> 2.6-3
- Add hopefully-about-to-be-merged-upstream hugeadm enhancements
- Add huge pages setup helper script, using new hugeadm enhancements



--- NEW FILE huge_page_setup_helper.py ---
#!/usr/bin/python

#
# Tool to set up Linux large page support with minimal effort
#
# by Jarod Wilson <jarod at redhat.com>
# (c) Red Hat, Inc., 2009
#
# Requires hugeadm from libhugetlbfs 2.7 (or backported support)
#
import os

debug = False

# config files we need access to
sysctlConf = "/etc/sysctl.conf"
if not os.access(sysctlConf, os.W_OK):
    print "Cannot access %s" % sysctlConf
    if debug == False:
        os._exit(1)
limitsConf = "/etc/security/limits.d/hugepages.conf"
if not os.access(limitsConf, os.W_OK):
    print "Cannot access %s" % limitsConf
    if debug == False:
        os._exit(1)


# Figure out what we've got in the way of memory
memTotal = 0
hugePageSize = 0
hugePages = 0

hugeadmexplain = os.popen("/usr/bin/hugeadm --explain 2>/dev/null").readlines()

for line in hugeadmexplain:
    if line.startswith("Total System Memory:"):
        memTotal = int(line.split()[3])
        break

if memTotal == 0:
    print "Your version of libhugetlbfs' hugeadm utility is too old!"
    os._exit(1)


# Pick the default huge page size and see how many pages are allocated
poolList = os.popen("/usr/bin/hugeadm --pool-list").readlines()
for line in poolList:
    if line.split()[4] == '*':
        hugePageSize = int(line.split()[0])
        hugePages = int(line.split()[2])
        break


# Get initial sysctl settings
shmmax = 0
hugeGID = 0

for line in hugeadmexplain:
    if line.startswith("A /proc/sys/kernel/shmmax value of"):
        shmmax = int(line.split()[4])
        break

for line in hugeadmexplain:
    if line.strip().startswith("vm.hugetlb_shm_group = "):
        hugeGID = int(line.split()[2])
        break


# translate group into textual version
hugeGIDName = "null"
groupNames = os.popen("/usr/bin/getent group").readlines()
for line in groupNames:
    curGID = int(line.split(":")[2])
    if curGID == hugeGID:
        hugeGIDName = line.split(":")[0]
        break


# dump system config as we see it before we start tweaking it
print "Current configuration:"
print " * Total System Memory......: %6d MB" % memTotal
print " * Shared Mem Max Mapping...: %6d MB" % (shmmax / (1024 * 1024))
print " * System Huge Page Size....: %6d MB" % (hugePageSize / (1024 * 1024))
print " * Number of Huge Pages.....: %6d"    % hugePages
print " * Total size of Huge Pages.: %6d MB" % (hugePages * hugePageSize / (1024 * 1024))
print " * Remaining System Memory..: %6d MB" % (memTotal - (hugePages * hugePageSize / (1024 * 1024)))
print " * Huge Page User Group.....:  %s (%d)" % (hugeGIDName, hugeGID)
print


# ask how memory they want to allocate for huge pages
userIn = None
while not userIn:
    try:
        userIn = raw_input("How much memory would you like to allocate for huge pages? "
                           "(input in MB, unless postfixed with GB): ")
	if userIn[-2:] == "GB":
            userHugePageReqMB = int(userIn[0:-2]) * 1024
	elif userIn[-1:] == "G":
            userHugePageReqMB = int(userIn[0:-1]) * 1024
	elif userIn[-2:] == "MB":
            userHugePageReqMB = int(userIn[0:-2])
	elif userIn[-1:] == "M":
            userHugePageReqMB = int(userIn[0:-1])
        else:
            userHugePageReqMB = int(userIn)
        # As a sanity safeguard, require at least 128M not be allocated to huge pages
        if userHugePageReqMB > (memTotal - 128):
            userIn = None
            print "Refusing to allocate %d, you must leave at least 128MB for the system" % userHugePageReqMB
        else:
            break
    except ValueError:
        userIn = None
        print "Input must be an integer, please try again!"
userHugePageReqKB = userHugePageReqMB * 1024
userHugePagesReq = userHugePageReqKB / (hugePageSize / 1024)
print "Okay, we'll try to allocate %d MB for huge pages..." % userHugePageReqMB
print


# some basic user input validation
badchars = list(' \\\'":;~`!$^&*(){}[]?/><,')
inputIsValid = False
foundbad = False
# ask for the name of the group allowed access to huge pages
while inputIsValid == False:
    userGroupReq = raw_input("What group should have access to the huge pages? "
                             "(The group will be created, if need be): ")
    if userGroupReq[0].isdigit():
        foundbad = True
        print "Group names cannot start with a number, please try again!"
    for char in badchars:
        if char in userGroupReq:
            foundbad = True
            print "Illegal characters in group name, please try again!"
            break
    if foundbad == False:
        inputIsValid = True
print "Okay, we'll give group %s access to the huge pages" % userGroupReq


# see if group already exists, use it if it does, if not, create it
userGIDReq = -1
for line in groupNames:
    curGroupName = line.split(":")[0]
    if curGroupName == userGroupReq:
        userGIDReq = int(line.split(":")[2])
        break

if userGIDReq > -1:
    print "Group %s (gid %d) already exists, we'll use it" % (userGroupReq, userGIDReq)
else:
    if debug == False:
    	os.popen("/usr/sbin/groupadd %s" % userGroupReq)
    else:
        print "/usr/sbin/groupadd %s" % userGroupReq
    groupNames = os.popen("/usr/bin/getent group %s" % userGroupReq).readlines()
    for line in groupNames:
        curGroupName = line.split(":")[0]
        if curGroupName == userGroupReq:
            userGIDReq = int(line.split(":")[2])
            break
    print "Created group %s (gid %d) for huge page use" % (userGroupReq, userGIDReq)
print


# basic user input validation, take 2
# space is valid in this case, wasn't in the prior incarnation
badchars = list('\\\'":;~`!$^&*(){}[]?/><,')
inputIsValid = False
# ask for user(s) that should be in the huge page access group
while inputIsValid == False:
    userUsersReq = raw_input("What user(s) should have access to the huge pages (space-delimited list, users created as needed)? ")
    for char in badchars:
        if char in userUsersReq:
            foundbad = True
            print "Illegal characters in user name(s) or invalid list format, please try again!"
            break
    if foundbad == False:
        inputIsValid = True
# see if user(s) already exist(s)
curUserList = os.popen("/usr/bin/getent passwd").readlines()
hugePageUserList = userUsersReq.split()
for hugeUser in hugePageUserList:
    userExists = False
    for line in curUserList:
        curUser = line.split(":")[0]
        if curUser == hugeUser:
            print "Adding user %s to huge page group" % hugeUser
            userExists = True
            if debug == False:
                os.popen("/usr/sbin/usermod -a -G %s %s" % (userGroupReq, hugeUser))
            else:
                print "/usr/sbin/usermod -a -G %s %s" % (userGroupReq, hugeUser)
        if userExists == True:
            break
    if userExists == False:
        print "Creating user %s with membership in huge page group" % hugeUser
        if debug == False:
            os.popen("/usr/sbin/useradd %s -G %s" % (hugeUser, userGroupReq))
        else:
            print "/usr/sbin/useradd %s -G %s" % (hugeUser, userGroupReq)
print


# set values for the current running environment
if debug == False:
    os.popen("/usr/bin/hugeadm --pool-pages-min DEFAULT:%sM" % userHugePageReqMB)
    os.popen("/usr/bin/hugeadm --pool-pages-max DEFAULT:%sM" % userHugePageReqMB)
    os.popen("/usr/bin/hugeadm --set-shm-group %d" % userGIDReq)
    os.popen("/usr/bin/hugeadm --set-recommended-shmmax")
else:
    print "/usr/bin/hugeadm --pool-pages-min DEFAULT:%sM" % userHugePageReqMB
    print "/usr/bin/hugeadm --pool-pages-max DEFAULT:%sM" % userHugePageReqMB
    print "/usr/bin/hugeadm --set-shm-group %d" % userGIDReq
    print "/usr/bin/hugeadm --set-recommended-shmmax"
    print

# figure out what that shmmax value we just set was
hugeadmexplain = os.popen("/usr/bin/hugeadm --explain 2>/dev/null").readlines()
for line in hugeadmexplain:
    if line.strip().startswith("kernel.shmmax = "):
        shmmax = int(line.split()[2])
        break

# write out sysctl config changes to persist across reboot
if debug == False:
    sysctlConfLines = "# sysctl configuration\n"
    if os.access(sysctlConf, os.W_OK):
        try:
            sysctlConfLines = open(sysctlConf).readlines()
            os.rename(sysctlConf, sysctlConf + ".backup")
            print("Saved original %s as %s.backup" % (sysctlConf, sysctlConf))
        except:
            pass

    fd = open(sysctlConf, "w")
    for line in sysctlConfLines:
        if line.startswith("kernel.shmmax"):
            continue
        elif line.startswith("vm.nr_hugepages"):
            continue
        elif line.startswith("vm.hugetlb_shm_group"):
            continue
        else:
            fd.write(line);

    fd.write("kernel.shmmax = %d\n" % shmmax)
    fd.write("vm.nr_hugepages = %d\n" % userHugePagesReq)
    fd.write("vm.hugetlb_shm_group = %d\n" % userGIDReq)
    fd.close()

else:
    print "Add to %s:" % sysctlConf
    print "kernel.shmmax = %d" % shmmax
    print "vm.nr_hugepages = %d" % userHugePagesReq
    print "vm.hugetlb_shm_group = %d" % userGIDReq
    print


# write out limits.conf changes to persist across reboot
if debug == False:
    limitsConfLines = "# Huge page access configuration\n"
    if os.access(limitsConf, os.W_OK):
        try:
            limitsConfLines = open(limitsConf).readlines()
            os.rename(limitsConf, limitsConf + ".backup")
            print("Saved original %s as %s.backup" % (limitsConf, limitsConf))
        except:
            pass

    fd = open(limitsConf, "w")
    for line in limitsConfLines:
        cfgExist = False
        for hugeUser in hugePageUserList:
            if line.split()[0] == hugeUser:
                cfgExist = True
        if cfgExist == True:
            continue
        else:
            fd.write(line)

    for hugeUser in hugePageUserList:
        fd.write("%s		soft	memlock		%d\n" % (hugeUser, userHugePageReqKB))
        fd.write("%s		hard	memlock		%d\n" % (hugeUser, userHugePageReqKB))
    fd.close()

else:
    print "Add to %s:" % limitsConf
    for hugeUser in hugePageUserList:
        print "%s		soft	memlock		%d" % (hugeUser, userHugePageReqKB)
        print "%s		hard	memlock		%d" % (hugeUser, userHugePageReqKB)


# dump the final configuration of things now that we're done tweaking
print
print "Final configuration:"
print " * Total System Memory......: %6d MB" % memTotal
if debug == False:
    print " * Shared Mem Max Mapping...: %6d MB" % (shmmax / (1024 * 1024))
else:
    # This should be what we *would* have set it to, had we actually run hugeadm --set-recommended-shmmax
    print " * Shared Mem Max Mapping...: %6d MB" % (userHugePagesReq * hugePageSize / (1024 * 1024))
print " * System Huge Page Size....: %6d MB" % (hugePageSize / (1024 * 1024))
print " * Available Huge Pages.....: %6d"    % userHugePagesReq
print " * Total size of Huge Pages.: %6d MB" % (userHugePagesReq * hugePageSize / (1024 * 1024))
print " * Remaining System Memory..: %6d MB" % (memTotal - userHugePageReqMB)
print " * Huge Page User Group.....:  %s (%d)" % (userGroupReq, userGIDReq)
print



libhugetlbfs-2.6-hugeadm-enhancements.patch:
 hugeadm.c     |  223 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 man/hugeadm.8 |   41 ++++++++--
 2 files changed, 245 insertions(+), 19 deletions(-)

--- NEW FILE libhugetlbfs-2.6-hugeadm-enhancements.patch ---
 hugeadm.c     |  223 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 man/hugeadm.8 |   41 ++++++++---
 2 files changed, 245 insertions(+), 19 deletions(-)

diff --git a/hugeadm.c b/hugeadm.c
index a793267..1666e27 100644
--- a/hugeadm.c
+++ b/hugeadm.c
@@ -67,12 +67,15 @@ extern char *optarg;
 #define PROCMOUNTS "/proc/mounts"
 #define PROCHUGEPAGES_MOVABLE "/proc/sys/vm/hugepages_treat_as_movable"
 #define PROCMINFREEKBYTES "/proc/sys/vm/min_free_kbytes"
+#define PROCSHMMAX "/proc/sys/kernel/shmmax"
+#define PROCHUGETLBGROUP "/proc/sys/vm/hugetlb_shm_group"
 #define PROCZONEINFO "/proc/zoneinfo"
 #define FS_NAME "hugetlbfs"
 #define MIN_COL 20
 #define MAX_SIZE_MNTENT (64 + PATH_MAX + 32 + 128 + 2 * sizeof(int))
 #define FORMAT_LEN 20
 
+#define MEM_TOTAL "MemTotal:"
 #define SWAP_FREE "SwapFree:"
 #define SWAP_TOTAL "SwapTotal:"
 
@@ -86,13 +89,17 @@ void print_usage()
 	OPTION("--hard", "specified with --pool-pages-min to make");
 	CONT("multiple attempts at adjusting the pool size to the");
 	CONT("specified count on failure");
-	OPTION("--pool-pages-min <size>:[+|-]<count>", "");
+	OPTION("--pool-pages-min <size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
 	CONT("Adjust pool 'size' lower bound");
-	OPTION("--pool-pages-max <size>:[+|-]<count>", "");
+	OPTION("--pool-pages-max <size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
 	CONT("Adjust pool 'size' upper bound");
 	OPTION("--set-recommended-min_free_kbytes", "");
 	CONT("Sets min_free_kbytes to a recommended value to improve availability of");
 	CONT("huge pages at runtime");
+	OPTION("--set-recommended-shmmax", "Sets shmmax to a recommended value to");
+	CONT("maximise the size possible for shared memory pools");
+	OPTION("--set-shm-group <gid|groupname>", "Sets hugetlb_shm_group to the");
+	CONT("specified group, which has permission to use hugetlb shared memory pools");
 	OPTION("--add-temp-swap[=count]", "Specified with --pool-pages-min to create");
 	CONT("temporary swap space for the duration of the pool resize. Default swap");
 	CONT("size is 5 huge pages. Optional arg sets size to 'count' huge pages");
@@ -135,6 +142,8 @@ int opt_dry_run = 0;
 int opt_hard = 0;
 int opt_movable = -1;
 int opt_set_recommended_minfreekbytes = 0;
+int opt_set_recommended_shmmax = 0;
+int opt_set_hugetlb_shm_group = 0;
 int opt_temp_swap = 0;
 int opt_ramdisk_swap = 0;
 int opt_swap_persist = 0;
@@ -215,6 +224,8 @@ void verbose_expose(void)
 #define LONG_POOL_MAX_ADJ	(LONG_POOL|'M')
 
 #define LONG_SET_RECOMMENDED_MINFREEKBYTES	('k' << 8)
+#define LONG_SET_RECOMMENDED_SHMMAX		('x' << 8)
+#define LONG_SET_HUGETLB_SHM_GROUP		('R' << 8)
 
 #define LONG_MOVABLE		('z' << 8)
 #define LONG_MOVABLE_ENABLE	(LONG_MOVABLE|'e')
@@ -589,6 +600,19 @@ void create_mounts(char *user, char *group, char *base, mode_t mode)
 }
 
 /**
+ * show_mem shouldn't change the behavior of any of its
+ * callers, it only prints a message to the user showing the
+ * total amount of memory in the system (in megabytes).
+ */
+void show_mem()
+{
+	long mem_total;
+
+	mem_total = read_meminfo(MEM_TOTAL);
+	printf("Total System Memory: %ld MB\n\n", mem_total / 1024);
+}
+
+/**
  * check_swap shouldn't change the behavior of any of its
  * callers, it only prints a message to the user if something
  * is being done that might fail without swap available.  i.e.
@@ -668,12 +692,130 @@ void check_minfreekbytes(void)
 	/* There should be at least one pageblock free per zone in the system */
 	if (recommended_min > min_free_kbytes) {
 		printf("\n");
-		printf("The " PROCMINFREEKBYTES " of %ld is too small. To maximiuse efficiency\n", min_free_kbytes);
+		printf("The " PROCMINFREEKBYTES " of %ld is too small. To maximise efficiency\n", min_free_kbytes);
 		printf("of fragmentation avoidance, there should be at least one huge page free per zone\n");
 		printf("in the system which minimally requires a min_free_kbytes value of %ld\n", recommended_min);
 	}
 }
 
+long recommended_shmmax(void)
+{
+	struct hpage_pool pools[MAX_POOLS];
+	long recommended_shmmax = 0;
+	int pos, cnt;
+
+	cnt = hpool_sizes(pools, MAX_POOLS);
+	if (cnt < 0) {
+		ERROR("unable to obtain pools list");
+		exit(EXIT_FAILURE);
+	}
+
+	for (pos = 0; cnt--; pos++)
+		recommended_shmmax += (pools[pos].maximum * pools[pos].pagesize);
+
+	return recommended_shmmax;
+}
+
+void set_recommended_shmmax(void)
+{
+	int ret;
+	long recommended = recommended_shmmax();
+
+	if (recommended == 0) {
+		printf("\n");
+		WARNING("We can only set a recommended shmmax when huge pages are configured!\n");
+		return;
+	}
+
+	DEBUG("Setting shmmax to %ld\n", recommended);
+	ret = file_write_ulong(PROCSHMMAX, (unsigned long)recommended);
+
+	if (!ret) {
+		INFO("To make shmmax settings persistent, add the following line to /etc/sysctl.conf:\n");
+		INFO("  kernel.shmmax = %ld\n", recommended);
+	}
+}
+
+void check_shmmax(void)
+{
+	long current_shmmax = file_read_ulong(PROCSHMMAX, NULL);
+	long recommended = recommended_shmmax();
+
+	if (current_shmmax != recommended) {
+		printf("\n");
+		printf("A " PROCSHMMAX " value of %ld bytes may be sub-optimal. To maximise\n", current_shmmax);
+		printf("shared memory usage, this should be set to the size of the largest shared memory\n");
+		printf("segment size you want to be able to use. Alternatively, set it to a size matching\n");
+		printf("the maximum possible allocation size of all huge pages. This can be done\n");
+		printf("automatically, using the --set-recommended-shmmax option.\n");
+	}
+
+	if (recommended == 0) {
+		printf("\n");
+		WARNING("We can't make a shmmax recommendation until huge pages are configured!\n");
+		return;
+	}
+
+	printf("\n");
+	printf("The recommended shmmax for your currently allocated huge pages is %ld bytes.\n", recommended);
+	printf("To make shmmax settings persistent, add the following line to /etc/sysctl.conf:\n");
+	printf("  kernel.shmmax = %ld\n", recommended);
+}
+
+void set_hugetlb_shm_group(gid_t gid, char *group)
+{
+	int ret;
+
+	DEBUG("Setting hugetlb_shm_group to %d (%s)\n", gid, group);
+	ret = file_write_ulong(PROCHUGETLBGROUP, (unsigned long)gid);
+
+	if (!ret) {
+		INFO("To make hugetlb_shm_group settings persistent, add the following line to /etc/sysctl.conf:\n");
+		INFO("  vm.hugetlb_shm_group = %d\n", gid);
+	}
+}
+
+/* heisted from shadow-utils/libmisc/list.c::is_on_list() */
+static int user_in_group(char *const *list, const char *member)
+{
+	while (*list != NULL) {
+		if (strcmp(*list, member) == 0) {
+			return 1;
+		}
+		list++;
+	}
+
+	return 0;
+}
+
+void check_user(void)
+{
+	uid_t uid;
+	gid_t gid;
+	struct passwd *pwd;
+	struct group *grp;
+
+	gid = (gid_t)file_read_ulong(PROCHUGETLBGROUP, NULL);
+	grp = getgrgid(gid);
+	if (!grp) {
+		printf("\n");
+		WARNING("Group ID %d in hugetlb_shm_group doesn't appear to be a valid group!\n", gid);
+		return;
+	}
+
+	uid = getuid();
+	pwd = getpwuid(uid);
+
+	if (gid != pwd->pw_gid && !user_in_group(grp->gr_mem, pwd->pw_name) && uid != 0) {
+		printf("\n");
+		WARNING("User %s (uid: %d) is not a member of the hugetlb_shm_group %s (gid: %d)!\n", pwd->pw_name, uid, grp->gr_name, gid);
+	} else {
+		printf("\n");
+		printf("To make your hugetlb_shm_group settings persistent, add the following line to /etc/sysctl.conf:\n");
+		printf("  vm.hugetlb_shm_group = %d\n", gid);
+	}
+}
+
 void add_temp_swap(long page_size)
 {
 	char path[PATH_MAX];
@@ -828,18 +970,37 @@ enum {
 	POOL_BOTH,
 };
 
-static long value_adjust(char *adjust_str, long base)
+static long value_adjust(char *adjust_str, long base, long page_size)
 {
 	long adjust;
 	char *iter;
 
 	/* Convert and validate the adjust. */
+	errno = 0;
 	adjust = strtol(adjust_str, &iter, 0);
-	if (*iter) {
+	/* Catch strtol errors and sizes that overflow the native word size */
+	if (errno || adjust_str == iter) {
+		if (errno == ERANGE)
+			errno = EOVERFLOW;
+		else
+			errno = EINVAL;
 		ERROR("%s: invalid adjustment\n", adjust_str);
 		exit(EXIT_FAILURE);
 	}
 
+	switch (*iter) {
+	case 'G':
+	case 'g':
+		adjust = size_to_smaller_unit(adjust);
+	case 'M':
+	case 'm':
+		adjust = size_to_smaller_unit(adjust);
+	case 'K':
+	case 'k':
+		adjust = size_to_smaller_unit(adjust);
+		adjust = adjust / page_size;
+	}
+
 	if (adjust_str[0] != '+' && adjust_str[0] != '-')
 		base = 0;
 
@@ -852,6 +1013,8 @@ static long value_adjust(char *adjust_str, long base)
 	}
 	base += adjust;
 
+	DEBUG("Returning page count of %ld\n", base);
+
 	return base;
 }
 
@@ -885,7 +1048,12 @@ void pool_adjust(char *cmd, unsigned int counter)
 					page_size_str, adjust_str, counter);
 
 	/* Convert and validate the page_size. */
-	page_size = parse_page_size(page_size_str);
+	if (strcmp(page_size_str, "DEFAULT") == 0)
+		page_size = kernel_default_hugepage_size();
+	else
+		page_size = parse_page_size(page_size_str);
+
+	DEBUG("Working with page_size of %ld\n", page_size);
 
 	cnt = hpool_sizes(pools, MAX_POOLS);
 	if (cnt < 0) {
@@ -905,14 +1073,14 @@ void pool_adjust(char *cmd, unsigned int counter)
 	max = pools[pos].maximum;
 
 	if (counter == POOL_BOTH) {
-		min = value_adjust(adjust_str, min);
+		min = value_adjust(adjust_str, min, page_size);
 		max = min;
 	} else if (counter == POOL_MIN) {
-		min = value_adjust(adjust_str, min);
+		min = value_adjust(adjust_str, min, page_size);
 		if (min > max)
 			max = min;
 	} else {
-		max = value_adjust(adjust_str, max);
+		max = value_adjust(adjust_str, max, page_size);
 		if (max < min)
 			min = max;
 	}
@@ -1003,13 +1171,16 @@ void page_sizes(int all)
 
 void explain()
 {
+	show_mem();
 	mounts_list_all();
 	printf("\nHuge page pools:\n");
 	pool_list();
 	printf("\nHuge page sizes with configured pools:\n");
 	page_sizes(0);
 	check_minfreekbytes();
+	check_shmmax();
 	check_swap();
+	check_user();
 	printf("\nNote: Permanent swap space should be preferred when dynamic "
 		"huge page pools are used.\n");
 }
@@ -1027,6 +1198,9 @@ int main(int argc, char** argv)
 	int opt_global_mounts = 0, opt_pgsizes = 0, opt_pgsizes_all = 0;
 	int opt_explain = 0, minadj_count = 0, maxadj_count = 0;
 	int ret = 0, index = 0;
+	gid_t opt_gid = 0;
+	struct group *opt_grp = NULL;
+	int group_invalid = 0;
 	struct option long_opts[] = {
 		{"help",       no_argument, NULL, 'h'},
 		{"verbose",    required_argument, NULL, 'v' },
@@ -1036,6 +1210,8 @@ int main(int argc, char** argv)
 		{"pool-pages-min", required_argument, NULL, LONG_POOL_MIN_ADJ},
 		{"pool-pages-max", required_argument, NULL, LONG_POOL_MAX_ADJ},
 		{"set-recommended-min_free_kbytes", no_argument, NULL, LONG_SET_RECOMMENDED_MINFREEKBYTES},
+		{"set-recommended-shmmax", no_argument, NULL, LONG_SET_RECOMMENDED_SHMMAX},
+		{"set-shm-group", required_argument, NULL, LONG_SET_HUGETLB_SHM_GROUP},
 		{"enable-zone-movable", no_argument, NULL, LONG_MOVABLE_ENABLE},
 		{"disable-zone-movable", no_argument, NULL, LONG_MOVABLE_DISABLE},
 		{"hard", no_argument, NULL, LONG_HARD},
@@ -1153,6 +1329,29 @@ int main(int argc, char** argv)
 			opt_set_recommended_minfreekbytes = 1;
 			break;
 
+		case LONG_SET_RECOMMENDED_SHMMAX:
+			opt_set_recommended_shmmax = 1;
+			break;
+
+		case LONG_SET_HUGETLB_SHM_GROUP:
+			opt_grp = getgrnam(optarg);
+			if (!opt_grp) {
+				opt_gid = atoi(optarg);
+				if (opt_gid == 0 && strcmp(optarg, "0"))
+					group_invalid = 1;
+				opt_grp = getgrgid(opt_gid);
+				if (!opt_grp)
+					group_invalid = 1;
+			} else {
+				opt_gid = opt_grp->gr_gid;
+			}
+			if (group_invalid) {
+				ERROR("Invalid group specification (%s)\n", optarg);
+				exit(EXIT_FAILURE);
+			}
+			opt_set_hugetlb_shm_group = 1;
+			break;
+
 		case LONG_MOVABLE_DISABLE:
 			opt_movable = 0;
 			break;
@@ -1208,6 +1407,12 @@ int main(int argc, char** argv)
 	if (opt_set_recommended_minfreekbytes)
 		set_recommended_minfreekbytes();
 
+	if (opt_set_recommended_shmmax)
+		set_recommended_shmmax();
+
+	if (opt_set_hugetlb_shm_group)
+		set_hugetlb_shm_group(opt_gid, opt_grp->gr_name);
+
 	while (--minadj_count >= 0) {
 		if (! kernel_has_overcommit())
 			pool_adjust(opt_min_adj[minadj_count], POOL_BOTH);
diff --git a/man/hugeadm.8 b/man/hugeadm.8
index 6342980..fc8c8ac 100644
--- a/man/hugeadm.8
+++ b/man/hugeadm.8
@@ -2,7 +2,7 @@
 .\" First parameter, NAME, should be all caps
 .\" Second parameter, SECTION, should be 1-8, maybe w/ subsection
 .\" other parameters are allowed: see man(7), man(1)
-.TH HUGEADM 8 "October 10, 2008"
+.TH HUGEADM 8 "October 1, 2009"
 .\" Please adjust this date whenever revising the manpage.
 .\"
 .\" Some roff macros, for reference:
@@ -87,6 +87,22 @@ avoiding mixing is to increase /proc/sys/vm/min_free_kbytes. This parameter
 sets min_free_kbytes to a recommended value to aid fragmentation avoidance.
 
 .TP
+.B --set-recommended-shmmax
+
+The maximum shared memory segment size should be set to at least the size
+of the largest shared memory segment size you want available for applications
+using huge pages, via /proc/sys/kernel/shmmax. Optionally, it can be set
+automatically to match the maximum possible size of all huge page allocations
+and thus the maximum possible shared memory segment size, using this switch.
+
+.TP
+.B --set-shm-group=<gid|groupname>
+
+Users in the group specified in /proc/sys/vm/hugetlb_shm_group are granted
+full access to huge pages. The sysctl takes a numeric gid, but this hugeadm
+option can set it for you, using either a gid or group name.
+
+.TP
 .B --page-sizes
 
 This displays every page size supported by the system and has a pool
@@ -107,25 +123,30 @@ This displays all active mount points for hugetlbfs.
 The following options configure the pool.
 
 .TP
-.B --pool-pages-min=<size>:[+|-]<count>
+.B --pool-pages-min=<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>
 
 This option sets or adjusts the Minimum number of hugepages in the pool for
 pagesize \fBsize\fP. \fBsize\fP may be specified in bytes or in kilobytes,
-megabytes, or gigabytes by appending K, M, or G respectively. The pool is set
-to \fBcount\fP pages if + or - are not specified. If + or - are specified,
-then the size of the pool will adjust by that amount. Note that there is
-no guarantee that the system can allocate the hugepages requested for the
-Minimum pool. The size of the pools should be checked after executing this
-command to ensure they were successful.
+megabytes, or gigabytes by appending K, M, or G respectively, or as DEFAULT,
+which uses the system's default huge page size for \fBsize\fP. The pool size
+adjustment can be specified by \fBpagecount\fP pages or by \fBmemsize\fP, if
+postfixed with G, M, or K, for gigabytes, megabytes, or kilobytes,
+respectively. If the adjustment is specified via \fBmemsize\fP, then the
+\fBpagecount\fP will be calculated for you, based on page size \fBsize\fP.
+The pool is set to \fBpagecount\fP pages if + or - are not specified. If
++ or - are specified, then the size of the pool will adjust by that amount.
+Note that there is no guarantee that the system can allocate the hugepages
+requested for the Minimum pool. The size of the pools should be checked after
+executing this command to ensure they were successful.
 
 .TP
-.B --pool-pages-max=<size>:[+|-]<count>
+.B --pool-pages-max=<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>
 
 This option sets or adjusts the Maximum number of hugepages. Note that while
 the Minimum number of pages are guaranteed to be available to applications,
 there is not guarantee that the system can allocate the pages on demand when
 the number of huge pages requested by applications is between the Minimum and
-Maximum pool sizes.
+Maximum pool sizes. See --pool-pages-min for usage syntax.
 
 .TP
 .B --enable-zone-movable


Index: libhugetlbfs.spec
===================================================================
RCS file: /cvs/pkgs/rpms/libhugetlbfs/F-12/libhugetlbfs.spec,v
retrieving revision 1.31
retrieving revision 1.32
diff -u -p -r1.31 -r1.32
--- libhugetlbfs.spec	3 Sep 2009 09:47:36 -0000	1.31
+++ libhugetlbfs.spec	5 Oct 2009 19:01:37 -0000	1.32
@@ -1,13 +1,17 @@
 Name: libhugetlbfs
 Version: 2.6
-Release: 2%{?dist}
+Release: 3%{?dist}
 Summary: A library which provides easy access to huge pages of memory
 
 Group: System Environment/Libraries
 License: LGPLv2+
 URL: http://libhugetlbfs.sourceforge.net/
 Source0: http://downloads.sourceforge.net/libhugetlbfs/%{name}-%{version}.tar.gz
+# Sent upstream for review and possible inclusion in next release on 2009.10.02
+Source1: huge_page_setup_helper.py
 Patch0: libhugetlbfs-2.6-s390x-build.patch
+# Sent upstream for review 2009.10.02, all acked at least once now
+Patch1: libhugetlbfs-2.6-hugeadm-enhancements.patch
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
 BuildRequires: kernel-devel glibc-headers
 Conflicts: kernel < 2.6.16
@@ -43,6 +47,7 @@ pool size control. pagesize lists page s
 %prep
 %setup -q -n %{name}-%{version}
 %patch0 -p1 -b .s390x-build
+%patch1 -p1 -b .enhanced
 
 %build
 # Parallel builds are not reliable
@@ -51,6 +56,9 @@ make BUILDTYPE=NATIVEONLY
 %install
 rm -rf $RPM_BUILD_ROOT
 make install PREFIX=%{_prefix} DESTDIR=$RPM_BUILD_ROOT LDSCRIPTDIR=%{ldscriptdir} BUILDTYPE=NATIVEONLY
+install -p -m755 %{SOURCE1} $RPM_BUILD_ROOT/%{_bindir}/
+mkdir -p -m755 $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d
+touch $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/hugepages.conf
 
 # remove statically built libraries:
 rm -f $RPM_BUILD_ROOT/%{_libdir}/*.a
@@ -69,6 +77,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/libhugetlbfs.so*
 %{_datadir}/%{name}/
 %{_mandir}/man7/libhugetlbfs.7.gz
+%ghost %config(noreplace) %{_sysconfdir}/security/limits.d/hugepages.conf
 %exclude %{_libdir}/libhugetlbfs_privutils.so
 %doc README HOWTO LGPL-2.1 NEWS
 
@@ -88,6 +97,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_bindir}/hugeadm
 %{_bindir}/hugectl
 %{_bindir}/pagesize
+%{_bindir}/huge_page_setup_helper.py*
 %exclude %{_bindir}/cpupcstat
 %exclude %{_bindir}/oprofile_map_events.pl
 %exclude %{_bindir}/oprofile_start.sh
@@ -99,6 +109,10 @@ rm -rf $RPM_BUILD_ROOT
 %exclude /usr/lib/perl5/TLBC
 
 %changelog
+* Fri Oct 02 2009 Jarod Wilson <jarod at redhat.com> 2.6-3
+- Add hopefully-about-to-be-merged-upstream hugeadm enhancements
+- Add huge pages setup helper script, using new hugeadm enhancements
+
 * Thu Sep 03 2009 Nils Philippsen <nils at redhat.com> 2.6-2
 - fix building on s390x
 




More information about the fedora-extras-commits mailing list