public inbox for lvm2-cvs@sourceware.org
help / color / mirror / Atom feed
* LVM2 ./WHATS_NEW doc/example.conf.in lib/confi ...
@ 2012-01-20 17:00 zkabelac
  0 siblings, 0 replies; 6+ messages in thread
From: zkabelac @ 2012-01-20 17:00 UTC (permalink / raw)
  To: lvm-devel, lvm2-cvs

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	zkabelac@sourceware.org	2012-01-20 17:00:00

Modified files:
	.              : WHATS_NEW 
	doc            : example.conf.in 
	lib/config     : defaults.h 
	lib/display    : display.c 

Log message:
	Update lvdisplay to show more info about thin LVs
	
	Reformat name and path how the LV is represented with lvm1 compatible option,
	to switch to the old way - which had number of  problem - i.e. many links
	do not exist - since for private devices we are not creating them.
	Add more info about thin pools and volumes.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.2227&r2=1.2228
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/doc/example.conf.in.diff?cvsroot=lvm2&r1=1.41&r2=1.42
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/config/defaults.h.diff?cvsroot=lvm2&r1=1.93&r2=1.94
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/display/display.c.diff?cvsroot=lvm2&r1=1.124&r2=1.125

--- LVM2/WHATS_NEW	2012/01/20 10:55:28	1.2227
+++ LVM2/WHATS_NEW	2012/01/20 16:59:58	1.2228
@@ -1,5 +1,6 @@
 Version 2.02.89 - 
 ==================================
+  Update lvdisplay with backward compat. config opt. lvm1_compatible_display.
   Do not report linear segtype for non-striped targets.
   Keep info about creation host and time for each logical volume.
   Make error message hit when preallocated memlock memory exceeded clearer.
--- LVM2/doc/example.conf.in	2012/01/12 01:51:56	1.41
+++ LVM2/doc/example.conf.in	2012/01/20 16:59:59	1.42
@@ -445,6 +445,11 @@
     # Specify the '--type <mirror|raid1>' option to override this default
     # setting.
     mirror_segtype_default = "mirror"
+
+    # Whether to lvdisplay LV name in lvm1 compatible format /dev/vgname/lvname
+    # Default is now to display LV name and path (if exists) separately.
+    # If the old behavior is preffered use set to 1.
+    # lvm1_compatible_display = 1
 }
 
 activation {
--- LVM2/lib/config/defaults.h	2012/01/19 15:34:32	1.93
+++ LVM2/lib/config/defaults.h	2012/01/20 16:59:59	1.94
@@ -49,6 +49,7 @@
 #define DEFAULT_PRIORITISE_WRITE_LOCKS 1
 #define DEFAULT_USE_MLOCKALL 0
 #define DEFAULT_METADATA_READ_ONLY 0
+#define DEFAULT_LVM1_COMPATIBLE_DISPLAY 0
 
 #define DEFAULT_MIRROR_SEGTYPE "mirror"
 #define DEFAULT_MIRRORLOG "disk"
--- LVM2/lib/display/display.c	2012/01/19 15:23:50	1.124
+++ LVM2/lib/display/display.c	2012/01/20 16:59:59	1.125
@@ -19,6 +19,7 @@
 #include "activate.h"
 #include "toolcontext.h"
 #include "segtype.h"
+#include "defaults.h"
 
 #define SIZE_BUF 128
 
@@ -503,7 +504,13 @@
 	char uuid[64] __attribute__((aligned(8)));
 	const char *access_str;
 	struct lv_segment *snap_seg = NULL, *mirror_seg = NULL;
+	struct lv_segment *seg = NULL;
+	int lvm1compat;
 	percent_t snap_percent;
+	int thin_data_active = 0, thin_metadata_active = 0;
+	percent_t thin_data_percent, thin_metadata_percent;
+	int thin_active = 0;
+	percent_t thin_percent;
 
 	if (!id_write_format(&lv->lvid.id[1], uuid, sizeof(uuid)))
 		return_0;
@@ -519,13 +526,28 @@
 
 	log_print("--- Logical volume ---");
 
-	log_print("LV Name                %s%s/%s", lv->vg->cmd->dev_dir,
-		  lv->vg->name, lv->name);
-	log_print("VG Name                %s", lv->vg->name);
+	lvm1compat = find_config_tree_int(cmd, "global/lvm1_compatible_display",
+					  DEFAULT_LVM1_COMPATIBLE_DISPLAY);
 
-	log_print("LV UUID                %s", uuid);
+	if (lvm1compat) {
+		/* Note: Invisible devices do not get /dev/vg/lv */
+		log_print("LV Name                %s%s/%s",
+			  lv->vg->cmd->dev_dir, lv->vg->name, lv->name);
+	} else if (lv_is_visible(lv)) {
+		/* Thin pool does not have /dev/vg/name link */
+		if (!lv_is_thin_pool(lv))
+			log_print("LV Path                %s%s/%s",
+				  lv->vg->cmd->dev_dir,
+				  lv->vg->name, lv->name);
+		log_print("LV Name                %s", lv->name);
+	} else
+		log_print("Invisible LV Name      %s", lv->name);
 
+	log_print("VG Name                %s", lv->vg->name);
+	log_print("LV UUID                %s", uuid);
 	log_print("LV Write Access        %s", access_str);
+	log_print("LV Creation host, time %s, %s",
+		  lv_host_dup(cmd->mem, lv), lv_time_dup(cmd->mem, lv));
 
 	if (lv_is_origin(lv)) {
 		log_print("LV snapshot status     source of");
@@ -537,10 +559,15 @@
 							       &snap_percent)))
 				if (snap_percent == PERCENT_INVALID)
 					snap_active = 0;
-			log_print("                       %s%s/%s [%s]",
-				  lv->vg->cmd->dev_dir, lv->vg->name,
-				  snap_seg->cow->name,
-				  snap_active ? "active" : "INACTIVE");
+			if (lvm1compat)
+				log_print("                       %s%s/%s [%s]",
+					  lv->vg->cmd->dev_dir, lv->vg->name,
+					  snap_seg->cow->name,
+					  snap_active ? "active" : "INACTIVE");
+			else
+				log_print("                       %s [%s]",
+					  snap_seg->cow->name,
+					  snap_active ? "active" : "INACTIVE");
 		}
 		snap_seg = NULL;
 	} else if ((snap_seg = find_cow(lv))) {
@@ -550,25 +577,39 @@
 			if (snap_percent == PERCENT_INVALID)
 				snap_active = 0;
 
-		log_print("LV snapshot status     %s destination for %s%s/%s",
-			  snap_active ? "active" : "INACTIVE",
-			  lv->vg->cmd->dev_dir, lv->vg->name,
-			  snap_seg->origin->name);
+		if (lvm1compat)
+			log_print("LV snapshot status     %s destination for %s%s/%s",
+				  snap_active ? "active" : "INACTIVE",
+				  lv->vg->cmd->dev_dir, lv->vg->name,
+				  snap_seg->origin->name);
+		else
+			log_print("LV snapshot status     %s destination for %s",
+				  snap_active ? "active" : "INACTIVE",
+				  snap_seg->origin->name);
 	}
 
 	if (lv_is_thin_volume(lv)) {
-		log_print("LV Thin pool           %s%s/%s", lv->vg->cmd->dev_dir,
-			  lv->vg->name, first_seg(lv)->pool_lv->name);
+		seg = first_seg(lv);
+		log_print("LV Pool name           %s", seg->pool_lv->name);
+		if (seg->origin)
+			log_print("LV Thin origin name    %s",
+				  seg->origin->name);
+		if (inkernel)
+			thin_active = lv_thin_percent(lv, 0, &thin_percent);
 	} else if (lv_is_thin_pool(lv)) {
+		if (inkernel) {
+			thin_data_active = lv_thin_pool_percent(lv, 0, &thin_data_percent);
+			thin_metadata_active = lv_thin_pool_percent(lv, 1, &thin_metadata_percent);
+		}
 		/* FIXME: display thin_pool targets transid for activated LV as well */
-		log_print("LV Thin transaction ID %" PRIu64,
-			  first_seg(lv)->transaction_id);
-		log_print("LV Thin metadata       %s%s/%s", lv->vg->cmd->dev_dir,
-			  lv->vg->name, first_seg(lv)->metadata_lv->name);
-		log_print("LV Thin data pool      %s%s/%s", lv->vg->cmd->dev_dir,
-			  lv->vg->name, seg_lv(first_seg(lv), 0)->name);
+		seg = first_seg(lv);
+		log_print("LV Pool transaction ID %" PRIu64, seg->transaction_id);
+		log_print("LV Pool metadata       %s", seg->metadata_lv->name);
+		log_print("LV Pool data           %s", seg_lv(seg, 0)->name);
+		log_print("LV Pool chunk size     %s",
+			  display_size(cmd, seg->data_block_size));
 		log_print("LV Zero new blocks     %s",
-			  first_seg(lv)->zero_new_blocks ? "yes" : "no");
+			  seg->zero_new_blocks ? "yes" : "no");
 	}
 
 	if (inkernel && info.suspended)
@@ -588,6 +629,18 @@
 		  display_size(cmd,
 			       snap_seg ? snap_seg->origin->size : lv->size));
 
+	if (thin_data_active)
+		log_print("Allocated pool data    %.2f%%",
+			  percent_to_float(thin_data_percent));
+
+	if (thin_metadata_active)
+		log_print("Allocated metadata     %.2f%%",
+			  percent_to_float(thin_metadata_percent));
+
+	if (thin_active)
+		log_print("Mapped size            %.2f%%",
+			  percent_to_float(thin_percent));
+
 	log_print("Current LE             %u",
 		  snap_seg ? snap_seg->origin->le_count : lv->le_count);
 
@@ -597,7 +650,7 @@
 		log_print("COW-table LE           %u", lv->le_count);
 
 		if (snap_active)
-			log_print("Allocated to snapshot  %.2f%% ",
+			log_print("Allocated to snapshot  %.2f%%",
 				  percent_to_float(snap_percent));
 
 		log_print("Snapshot chunk size    %s",
@@ -605,7 +658,7 @@
 	}
 
 	if (lv->status & MIRRORED) {
- 		mirror_seg = first_seg(lv);
+		mirror_seg = first_seg(lv);
 		log_print("Mirrored volumes       %" PRIu32, mirror_seg->area_count);
 		if (lv->status & CONVERTING)
 			log_print("LV type        Mirror undergoing conversion");


^ permalink raw reply	[flat|nested] 6+ messages in thread

* LVM2 ./WHATS_NEW doc/example.conf.in lib/confi ...
@ 2011-04-12 21:59 snitzer
  0 siblings, 0 replies; 6+ messages in thread
From: snitzer @ 2011-04-12 21:59 UTC (permalink / raw)
  To: lvm-devel, lvm2-cvs

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	snitzer@sourceware.org	2011-04-12 21:59:02

Modified files:
	.              : WHATS_NEW 
	doc            : example.conf.in 
	lib/config     : defaults.h 
	lib/device     : dev-io.c device.c device.h 
	lib/metadata   : pv_manip.c 
	man            : lvm.conf.5.in 

Log message:
	Add "devices/issue_discards" to lvm.conf.
	Issue discards on lvremove if enabled and both storage and kernel have support.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.1970&r2=1.1971
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/doc/example.conf.in.diff?cvsroot=lvm2&r1=1.20&r2=1.21
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/config/defaults.h.diff?cvsroot=lvm2&r1=1.72&r2=1.73
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/device/dev-io.c.diff?cvsroot=lvm2&r1=1.75&r2=1.76
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/device/device.c.diff?cvsroot=lvm2&r1=1.37&r2=1.38
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/device/device.h.diff?cvsroot=lvm2&r1=1.47&r2=1.48
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/pv_manip.c.diff?cvsroot=lvm2&r1=1.28&r2=1.29
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/man/lvm.conf.5.in.diff?cvsroot=lvm2&r1=1.18&r2=1.19

--- LVM2/WHATS_NEW	2011/04/09 19:05:23	1.1970
+++ LVM2/WHATS_NEW	2011/04/12 21:59:01	1.1971
@@ -1,5 +1,7 @@
 Version 2.02.85 - 
 ===================================
+  Add "devices/issue_discards" to lvm.conf.
+  Issue discards on lvremove if enabled and both storage and kernel have support.
   Fix incorrect tests for dm_snprintf() failure.
   Fix some unmatching sign comparation gcc warnings in the code.
   Allow lv_extend() to work on zero length intrinsically layered LVs.
--- LVM2/doc/example.conf.in	2011/04/12 20:44:41	1.20
+++ LVM2/doc/example.conf.in	2011/04/12 21:59:01	1.21
@@ -151,6 +151,14 @@
     # Example: Ignore devices smaller than 2MB (i.e. floppy drives).
     # pv_min_size = 2048
     pv_min_size = 512
+
+    # Issue discards to an LV's underlying PV(s) when the LV is removed.
+    # Discards inform the storage that a region is no longer in use.  If set
+    # to 1, discards will only be issued if both the storage and kernel provide
+    # support.  Not all storage will support or benefit from discards but SSDs
+    # or thinly provisioned LUNs generally do.
+    # 1 enables; 0 disables.
+    issue_discards = 0
 }
 
 # This section allows you to configure the way in which LVM selects
--- LVM2/lib/config/defaults.h	2011/02/27 00:38:32	1.72
+++ LVM2/lib/config/defaults.h	2011/04/12 21:59:01	1.73
@@ -37,6 +37,7 @@
 #define DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID 1
 #define DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION 1
 #define DEFAULT_DATA_ALIGNMENT_DETECTION 1
+#define DEFAULT_ISSUE_DISCARDS 0
 
 #define DEFAULT_LOCKING_LIB "liblvm2clusterlock.so"
 #define DEFAULT_FALLBACK_TO_LOCAL_LOCKING 1
--- LVM2/lib/device/dev-io.c	2011/03/29 20:19:03	1.75
+++ LVM2/lib/device/dev-io.c	2011/04/12 21:59:01	1.76
@@ -36,6 +36,9 @@
 #  ifndef BLKGETSIZE64		/* fs.h out-of-date */
 #    define BLKGETSIZE64 _IOR(0x12, 114, size_t)
 #  endif /* BLKGETSIZE64 */
+#  ifndef BLKDISCARD
+#    define BLKDISCARD	_IO(0x12,119)
+#  endif
 #else
 #  include <sys/disk.h>
 #  define BLKBSZGET DKIOCGETBLOCKSIZE
@@ -301,6 +304,33 @@
 	return 1;
 }
 
+static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes)
+{
+	uint64_t discard_range[2];
+
+	if (!dev_open(dev))
+		return_0;
+
+	discard_range[0] = offset_bytes;
+	discard_range[1] = size_bytes;
+
+	log_debug("Discarding %" PRIu64 " bytes offset %" PRIu64 " bytes on %s.",
+		  size_bytes, offset_bytes, dev_name(dev));
+	if (ioctl(dev->fd, BLKDISCARD, &discard_range) < 0) {
+		log_error("%s: BLKDISCARD ioctl at offset %" PRIu64 " size %" PRIu64 " failed: %s.",
+			  dev_name(dev), offset_bytes, size_bytes, strerror(errno));
+		if (!dev_close(dev))
+			stack;
+		/* It doesn't matter if discard failed, so return success. */
+		return 1;
+	}
+
+	if (!dev_close(dev))
+		stack;
+
+	return 1;
+}
+
 /*-----------------------------------------------------------------
  * Public functions
  *---------------------------------------------------------------*/
@@ -329,6 +359,17 @@
 	return _dev_read_ahead_dev(dev, read_ahead);
 }
 
+int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes)
+{
+	if (!dev)
+		return 0;
+
+	if (dev->flags & DEV_REGULAR)
+		return 1;
+
+	return _dev_discard_blocks(dev, offset_bytes, size_bytes);
+}
+
 /* FIXME Unused
 int dev_get_sectsize(struct device *dev, uint32_t *size)
 {
--- LVM2/lib/device/device.c	2011/03/13 22:52:20	1.37
+++ LVM2/lib/device/device.c	2011/04/12 21:59:01	1.38
@@ -455,6 +455,20 @@
 				       sysfs_dir, dev);
 }
 
+unsigned long dev_discard_max_bytes(const char *sysfs_dir,
+				    struct device *dev)
+{
+	return _dev_topology_attribute("queue/discard_max_bytes",
+				       sysfs_dir, dev);
+}
+
+unsigned long dev_discard_granularity(const char *sysfs_dir,
+				      struct device *dev)
+{
+	return _dev_topology_attribute("queue/discard_granularity",
+				       sysfs_dir, dev);
+}
+
 #else
 
 int get_primary_dev(const char *sysfs_dir,
@@ -481,4 +495,16 @@
 	return 0UL;
 }
 
+unsigned long dev_discard_max_bytes(const char *sysfs_dir,
+				    struct device *dev)
+{
+	return 0UL;
+}
+
+unsigned long dev_discard_granularity(const char *sysfs_dir,
+				      struct device *dev)
+{
+	return 0UL;
+}
+
 #endif
--- LVM2/lib/device/device.h	2011/02/18 23:09:55	1.47
+++ LVM2/lib/device/device.h	2011/04/12 21:59:01	1.48
@@ -68,6 +68,7 @@
 int dev_get_size(const struct device *dev, uint64_t *size);
 int dev_get_sectsize(struct device *dev, uint32_t *size);
 int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
+int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
 
 /* Use quiet version if device number could change e.g. when opening LV */
 int dev_open(struct device *dev);
@@ -115,4 +116,10 @@
 unsigned long dev_optimal_io_size(const char *sysfs_dir,
 				  struct device *dev);
 
+unsigned long dev_discard_max_bytes(const char *sysfs_dir,
+				    struct device *dev);
+
+unsigned long dev_discard_granularity(const char *sysfs_dir,
+				      struct device *dev);
+
 #endif
--- LVM2/lib/metadata/pv_manip.c	2011/03/29 20:19:04	1.28
+++ LVM2/lib/metadata/pv_manip.c	2011/04/12 21:59:01	1.29
@@ -20,6 +20,7 @@
 #include "archiver.h"
 #include "locking.h"
 #include "lvmcache.h"
+#include "defaults.h"
 
 static struct pv_segment *_alloc_pv_segment(struct dm_pool *mem,
 					    struct physical_volume *pv,
@@ -190,12 +191,38 @@
 
 int release_pv_segment(struct pv_segment *peg, uint32_t area_reduction)
 {
+	uint64_t discard_offset;
+	uint64_t pe_start = peg->pv->pe_start;
+	uint64_t discard_area_reduction = area_reduction;
+
 	if (!peg->lvseg) {
 		log_error("release_pv_segment with unallocated segment: "
 			  "%s PE %" PRIu32, pv_dev_name(peg->pv), peg->pe);
 		return 0;
 	}
 
+	/*
+	 * Only issue discards if enabled in lvm.conf and both
+	 * the device and kernel (>= 2.6.35) supports discards.
+	 */
+	if (find_config_tree_bool(peg->pv->fmt->cmd,
+				  "devices/issue_discards", DEFAULT_ISSUE_DISCARDS) &&
+	    dev_discard_max_bytes(peg->pv->fmt->cmd->sysfs_dir, peg->pv->dev) &&
+	    dev_discard_granularity(peg->pv->fmt->cmd->sysfs_dir, peg->pv->dev)) {
+		if (!pe_start) {
+			/* skip the first extent */
+			pe_start = peg->pv->vg->extent_size;
+			discard_area_reduction--;
+		}
+		discard_offset = peg->pe + peg->lvseg->area_len - area_reduction;
+		discard_offset = (discard_offset * peg->pv->vg->extent_size) + pe_start;
+		log_debug("Discarding %" PRIu32 " extents offset %" PRIu64 " sectors on %s.",
+			 discard_area_reduction, discard_offset, dev_name(peg->pv->dev));
+		if (!dev_discard_blocks(peg->pv->dev, discard_offset << SECTOR_SHIFT,
+					discard_area_reduction * peg->pv->vg->extent_size * SECTOR_SIZE))
+			return_0;
+	}
+
 	if (peg->lvseg->area_len == area_reduction) {
 		peg->pv->pe_alloc_count -= area_reduction;
 		peg->lvseg->lv->vg->free_count += area_reduction;
--- LVM2/man/lvm.conf.5.in	2011/04/12 21:21:08	1.18
+++ LVM2/man/lvm.conf.5.in	2011/04/12 21:59:02	1.19
@@ -180,6 +180,13 @@
 .IP
 pv_min_size = 2048
 .IP
+\fBissue_discards\fP \(em
+Issue discards to an LV's underlying PV(s) when the LV is removed.  Discards
+inform the storage that a region is no longer in use.  If set to 1, discards will
+only be issued if both the storage and kernel provide support.  Not all storage
+will support or benefit from discards but SSDs or thinly provisioned LUNs
+generally do.
+.IP
 .TP
 \fBallocation\fP \(em Space allocation policies
 .IP


^ permalink raw reply	[flat|nested] 6+ messages in thread

* LVM2 ./WHATS_NEW doc/example.conf.in lib/confi ...
@ 2011-02-27  0:38 agk
  0 siblings, 0 replies; 6+ messages in thread
From: agk @ 2011-02-27  0:38 UTC (permalink / raw)
  To: lvm-devel, lvm2-cvs

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	agk@sourceware.org	2011-02-27 00:38:32

Modified files:
	.              : WHATS_NEW 
	doc            : example.conf.in 
	lib/config     : defaults.h 
	lib/metadata   : lv_manip.c 

Log message:
	Various changes to the allocation algorithms: Expect some fallout.
	There is a lot to test.
	
	Two new config settings added that are intended to make the code behave
	closely to the way it did before - worth a try if you find problems.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.1926&r2=1.1927
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/doc/example.conf.in.diff?cvsroot=lvm2&r1=1.18&r2=1.19
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/config/defaults.h.diff?cvsroot=lvm2&r1=1.71&r2=1.72
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/lv_manip.c.diff?cvsroot=lvm2&r1=1.248&r2=1.249

--- LVM2/WHATS_NEW	2011/02/25 14:08:54	1.1926
+++ LVM2/WHATS_NEW	2011/02/27 00:38:31	1.1927
@@ -1,5 +1,9 @@
 Version 2.02.85 - 
 ===================================
+  Extend normal policy to allow mirror logs on same devs as images if necessary.
+  Improve cling policy to recognise devs already allocated in the transaction.
+  Improve normal allocation algorithm to include clinging to existing areas.
+  Add allocation/maximise_cling & mirror_logs_require_separate_pvs to lvm.conf.
   Fix metadata balance code to work with recent changes in metadata handling.
   Add old_uuid field to physical_volume and fix pvchange -u for recent changes.
   Allow pvresize on a PV with two metadata areas (for PVs not in a VG).
--- LVM2/doc/example.conf.in	2011/02/18 14:11:22	1.18
+++ LVM2/doc/example.conf.in	2011/02/27 00:38:32	1.19
@@ -171,6 +171,19 @@
 #
 #    cling_tag_list = [ "@site1", "@site2" ]
 #    cling_tag_list = [ "@*" ]
+#
+#    Changes made in version 2.02.85 extended the reach of the 'cling'
+#    policies to detect more situations where data can be grouped
+#    onto the same disks.  Set this to 0 to revert to the previous
+#    algorithm.
+#
+#    maximise_cling = 1
+#
+#    Set to 1 to guarantee that mirror logs will always be placed on 
+#    different PVs from the mirror images.  This was the default
+#    until version 2.02.85.
+#
+#    mirror_logs_require_separate_pvs = 0
 #}
 
 # This section that allows you to configure the nature of the
--- LVM2/lib/config/defaults.h	2010/10/25 11:20:55	1.71
+++ LVM2/lib/config/defaults.h	2011/02/27 00:38:32	1.72
@@ -79,6 +79,8 @@
 #define DEFAULT_MAX_PV 0
 #define DEFAULT_MAX_LV 0
 #define DEFAULT_ALLOC_POLICY ALLOC_NORMAL
+#define DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS 0
+#define DEFAULT_MAXIMISE_CLING 1
 #define DEFAULT_CLUSTERED 0
 
 #define DEFAULT_MSG_PREFIX "  "
--- LVM2/lib/metadata/lv_manip.c	2011/02/18 14:47:30	1.248
+++ LVM2/lib/metadata/lv_manip.c	2011/02/27 00:38:32	1.249
@@ -26,6 +26,41 @@
 #include "archiver.h"
 #include "activate.h"
 #include "str_list.h"
+#include "defaults.h"
+
+typedef enum {
+	PREFERRED,
+	USE_AREA,
+	NEXT_PV,
+	NEXT_AREA
+} area_use_t;
+
+/* FIXME These ended up getting used differently from first intended.  Refactor. */
+#define A_CONTIGUOUS		0x01
+#define A_CLING			0x02
+#define A_CLING_BY_TAGS		0x04
+#define A_CLING_TO_ALLOCED	0x08	/* Only for ALLOC_NORMAL */
+#define A_CAN_SPLIT		0x10
+
+/*
+ * Constant parameters during a single allocation attempt.
+ */
+struct alloc_parms {
+	alloc_policy_t alloc;
+	unsigned flags;		/* Holds A_* */
+	struct lv_segment *prev_lvseg;
+	uint32_t extents_still_needed;
+};
+
+/*
+ * Holds varying state of each allocation attempt.
+ */
+struct alloc_state {
+	struct pv_area_used *areas;
+	uint32_t areas_size;
+	uint32_t log_area_count_still_needed;	/* Number of areas still needing to be allocated for the log */
+	uint32_t allocated;	/* Total number of extents allocated so far */
+};
 
 struct lv_names {
 	const char *old;
@@ -526,6 +561,9 @@
 	uint32_t region_size;		/* Mirror region size */
 	uint32_t total_area_len;	/* Total number of parallel extents */
 
+	unsigned maximise_cling;
+	unsigned mirror_logs_separate;	/* Must mirror logs be on separate PVs? */
+
 	const struct config_node *cling_tag_list_cn;
 
 	struct dm_list *parallel_areas;	/* PVs to avoid */
@@ -644,6 +682,10 @@
 
 	ah->cling_tag_list_cn = find_config_tree_node(cmd, "allocation/cling_tag_list");
 
+	ah->maximise_cling = find_config_tree_bool(cmd, "allocation/maximise_cling", DEFAULT_MAXIMISE_CLING);
+
+	ah->mirror_logs_separate = find_config_tree_bool(cmd, "allocation/mirror_logs_require_separate_pvs", DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS);
+
 	return ah;
 }
 
@@ -653,6 +695,69 @@
 		dm_pool_destroy(ah->mem);
 }
 
+/* Is there enough total space or should we give up immediately? */
+static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms, uint32_t allocated, uint32_t extents_still_needed)
+{
+	uint32_t total_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
+	uint32_t free_pes = pv_maps_size(pvms);
+
+	if (total_extents_needed > free_pes) {
+		log_error("Insufficient free space: %" PRIu32 " extents needed,"
+			  " but only %" PRIu32 " available",
+			  total_extents_needed, free_pes);
+		return 0;
+	}
+
+	return 1;
+}
+
+/* For striped mirrors, all the areas are counted, through the mirror layer */
+static uint32_t _stripes_per_mimage(struct lv_segment *seg)
+{
+	struct lv_segment *last_lvseg;
+
+	if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
+		last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
+		if (seg_is_striped(last_lvseg))
+			return last_lvseg->area_count;
+	}
+
+	return 1;
+}
+
+static void _init_alloc_parms(struct alloc_handle *ah, struct alloc_parms *alloc_parms, alloc_policy_t alloc,
+			      struct lv_segment *prev_lvseg, unsigned can_split,
+			      uint32_t allocated, uint32_t extents_still_needed)
+{
+	alloc_parms->alloc = alloc;
+	alloc_parms->prev_lvseg = prev_lvseg;
+	alloc_parms->flags = 0;
+	alloc_parms->extents_still_needed = extents_still_needed;
+
+	/* Are there any preceding segments we must follow on from? */
+	if (alloc_parms->prev_lvseg) {
+		if ((alloc_parms->alloc == ALLOC_CONTIGUOUS))
+			alloc_parms->flags |= A_CONTIGUOUS;
+		else if ((alloc_parms->alloc == ALLOC_CLING))
+			alloc_parms->flags |= A_CLING;
+		else if ((alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
+			alloc_parms->flags |= A_CLING;
+			alloc_parms->flags |= A_CLING_BY_TAGS;
+		}
+	}
+
+	/*
+	 * For normal allocations, if any extents have already been found 
+	 * for allocation, prefer to place further extents on the same disks as
+	 * have already been used.
+	 */
+	if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL && allocated != alloc_parms->extents_still_needed)
+		alloc_parms->flags |= A_CLING_TO_ALLOCED;
+
+	if (can_split)
+		alloc_parms->flags |= A_CAN_SPLIT;
+}
+
 static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas)
 {
 	struct seg_pvs *spvs;
@@ -759,14 +864,13 @@
  * If the complete area is not needed then it gets split.
  * The part used is removed from the pv_map so it can't be allocated twice.
  */
-static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t needed,
-				struct pv_area_used *areas, uint32_t *allocated,
-				unsigned log_needs_allocating, uint32_t ix_log_offset)
+static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate,
+				struct alloc_state *alloc_state, uint32_t ix_log_offset)
 {
-	uint32_t area_len, len, remaining;
+	uint32_t area_len, len;
 	uint32_t s;
 	uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
-	uint32_t total_area_count = ah->area_count + (log_needs_allocating ? ah->log_area_count : 0);
+	uint32_t total_area_count = ah->area_count + alloc_state->log_area_count_still_needed;
 	struct alloced_area *aa;
 
 	if (!total_area_count) {
@@ -774,13 +878,12 @@
 		return 1;
 	}
 
-	remaining = needed - *allocated;
-	area_len = remaining / ah->area_multiple;
+	area_len = max_to_allocate / ah->area_multiple;
 
 	/* Reduce area_len to the smallest of the areas */
 	for (s = 0; s < ah->area_count; s++)
-		if (area_len > areas[s].used)
-			area_len = areas[s].used;
+		if (area_len > alloc_state->areas[s].used)
+			area_len = alloc_state->areas[s].used;
 
 	if (!(aa = dm_pool_alloc(ah->mem, sizeof(*aa) * total_area_count))) {
 		log_error("alloced_area allocation failed");
@@ -799,36 +902,22 @@
 			len = ah->log_len;
 		}
 
-		aa[s].pv = areas[s + ix_log_skip].pva->map->pv;
-		aa[s].pe = areas[s + ix_log_skip].pva->start;
+		aa[s].pv = alloc_state->areas[s + ix_log_skip].pva->map->pv;
+		aa[s].pe = alloc_state->areas[s + ix_log_skip].pva->start;
 		aa[s].len = len;
 
 		log_debug("Allocating parallel area %" PRIu32
 			  " on %s start PE %" PRIu32 " length %" PRIu32 ".",
 			  s, dev_name(aa[s].pv->dev), aa[s].pe, len);
 
-		consume_pv_area(areas[s + ix_log_skip].pva, len);
+		consume_pv_area(alloc_state->areas[s + ix_log_skip].pva, len);
 
 		dm_list_add(&ah->alloced_areas[s], &aa[s].list);
 	}
 
 	ah->total_area_len += area_len;
 
-	*allocated += area_len * ah->area_multiple;
-
-	return 1;
-}
-
-/* For striped mirrors, all the areas are counted, through the mirror layer */
-static uint32_t _stripes_per_mimage(struct lv_segment *seg)
-{
-	struct lv_segment *last_lvseg;
-
-	if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
-		last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
-		if (seg_is_striped(last_lvseg))
-			return last_lvseg->area_count;
-	}
+	alloc_state->allocated += area_len * ah->area_multiple;
 
 	return 1;
 }
@@ -1026,12 +1115,28 @@
 	return 1;
 }
 
+static void _reserve_area(struct pv_area_used *area_used, struct pv_area *pva, uint32_t required,
+			  uint32_t ix_pva, uint32_t unreserved)
+{
+	log_debug("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
+		  " length %" PRIu32 " leaving %" PRIu32 ".",
+		  area_used->pva ? "Changing   " : "Considering", 
+		  ix_pva - 1, area_used->pva ? "to" : "as", 
+		  dev_name(pva->map->pv->dev), pva->start, required, unreserved);
+
+	area_used->pva = pva;
+	area_used->used = required;
+}
+
 static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
 			 struct pv_segment *pvseg, uint32_t s,
 			 void *data)
 {
 	struct pv_match *pvmatch = data;
 
+	if (pvmatch->areas[s].pva)
+		return 1;	/* Area already assigned */
+
 	if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
 		return 1;	/* Continue */
 
@@ -1039,16 +1144,10 @@
 		return 1;
 
 	/*
-	 * Only used for cling and contiguous policies so it's safe to say all
-	 * the available space is used.
+	 * Only used for cling and contiguous policies (which only make one allocation per PV)
+	 * so it's safe to say all the available space is used.
 	 */
-	pvmatch->areas[s].pva = pvmatch->pva;
-	pvmatch->areas[s].used = pvmatch->pva->count;
-
-	log_debug("Trying allocation area %" PRIu32 " on %s start PE %" PRIu32
-		  " length %" PRIu32 ".",
-		  s, dev_name(pvmatch->pva->map->pv->dev), pvmatch->pva->start, 
-		  pvmatch->pva->count);
+	_reserve_area(&pvmatch->areas[s], pvmatch->pva, pvmatch->pva->count, s + 1, 0);
 
 	return 2;	/* Finished */
 }
@@ -1056,23 +1155,33 @@
 /*
  * Is pva on same PV as any existing areas?
  */
-static int _check_cling(struct cmd_context *cmd,
+static int _check_cling(struct alloc_handle *ah,
 			const struct config_node *cling_tag_list_cn,
 			struct lv_segment *prev_lvseg, struct pv_area *pva,
-			struct pv_area_used *areas, uint32_t areas_size)
+			struct alloc_state *alloc_state)
 {
 	struct pv_match pvmatch;
 	int r;
+	uint32_t le, len;
 
 	pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
-	pvmatch.areas = areas;
-	pvmatch.areas_size = areas_size;
+	pvmatch.areas = alloc_state->areas;
+	pvmatch.areas_size = alloc_state->areas_size;
 	pvmatch.pva = pva;
 	pvmatch.cling_tag_list_cn = cling_tag_list_cn;
 
+	if (ah->maximise_cling) {
+		/* Check entire LV */
+		le = 0;
+		len = prev_lvseg->le + prev_lvseg->len;
+	} else {
+		/* Only check 1 LE at end of previous LV segment */
+		le = prev_lvseg->le + prev_lvseg->len - 1;
+		len = 1;
+	}
+
 	/* FIXME Cope with stacks by flattening */
-	if (!(r = _for_each_pv(cmd, prev_lvseg->lv,
-			       prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL,
+	if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL,
 			       0, 0, -1, 1,
 			       _is_condition, &pvmatch)))
 		stack;
@@ -1088,14 +1197,14 @@
  */
 static int _check_contiguous(struct cmd_context *cmd,
 			     struct lv_segment *prev_lvseg, struct pv_area *pva,
-			     struct pv_area_used *areas, uint32_t areas_size)
+			     struct alloc_state *alloc_state)
 {
 	struct pv_match pvmatch;
 	int r;
 
 	pvmatch.condition = _is_contiguous;
-	pvmatch.areas = areas;
-	pvmatch.areas_size = areas_size;
+	pvmatch.areas = alloc_state->areas;
+	pvmatch.areas_size = alloc_state->areas_size;
 	pvmatch.pva = pva;
 	pvmatch.cling_tag_list_cn = NULL;
 
@@ -1113,262 +1222,465 @@
 }
 
 /*
- * Choose sets of parallel areas to use, respecting any constraints.
+ * Is pva on same PV as any areas already used in this allocation attempt?
+ */
+static int _check_cling_to_alloced(struct alloc_handle *ah, struct pv_area *pva, struct alloc_state *alloc_state)
+{
+	unsigned s;
+	struct alloced_area *aa;
+
+	/*
+	 * Ignore log areas.  They are always allocated whole as part of the
+	 * first allocation.  If they aren't yet set, we know we've nothing to do.
+	 */
+	if (alloc_state->log_area_count_still_needed)
+		return 0;
+
+	for (s = 0; s < ah->area_count; s++) {
+		if (alloc_state->areas[s].pva)
+			continue;	/* Area already assigned */
+		dm_list_iterate_items(aa, &ah->alloced_areas[s]) {
+			if (pva->map->pv == aa[0].pv) {
+				_reserve_area(&alloc_state->areas[s], pva, pva->count, s + 1, 0);
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs)
+{
+	struct pv_list *pvl;
+
+	dm_list_iterate_items(pvl, parallel_pvs)
+		if (pv == pvl->pv)
+			return 1;
+
+	return 0;
+}
+
+/*
+ * Decide whether or not to try allocation from supplied area pva.
+ * alloc_state->areas may get modified.
+ */
+static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint32_t still_needed,
+			     const struct alloc_parms *alloc_parms, struct alloc_state *alloc_state,
+			     unsigned already_found_one, unsigned iteration_count, unsigned log_iteration_count)
+{
+	unsigned s;
+
+	/* Skip fully-reserved areas (which are not currently removed from the list). */
+	if (!pva->unreserved)
+		return NEXT_AREA;
+
+	if (iteration_count + log_iteration_count) {
+		/*
+		 * Don't use an area twice.
+		 * Only ALLOC_ANYWHERE currently supports that, by destroying the data structures,
+		 * which is OK because they are not needed again afterwards.
+		 */
+		for (s = 0; s < alloc_state->areas_size; s++)
+			if (alloc_state->areas[s].pva == pva)
+				return NEXT_AREA;
+	}
+
+	/* If maximise_cling is set, perform several checks, otherwise perform exactly one. */
+	if (!iteration_count && !log_iteration_count && alloc_parms->flags & (A_CONTIGUOUS | A_CLING | A_CLING_TO_ALLOCED)) {
+		/* Contiguous? */
+		if (((alloc_parms->flags & A_CONTIGUOUS) || ah->maximise_cling) &&
+		    alloc_parms->prev_lvseg && _check_contiguous(ah->cmd, alloc_parms->prev_lvseg, pva, alloc_state))
+			return PREFERRED;
+	
+		/* Try next area on same PV if looking for contiguous space */
+		if (alloc_parms->flags & A_CONTIGUOUS)
+			return NEXT_AREA;
+	
+		/* Cling_to_alloced? */
+		if ((alloc_parms->flags & A_CLING_TO_ALLOCED) &&
+		    _check_cling_to_alloced(ah, pva, alloc_state))
+			return PREFERRED;
+
+		/* Cling? */
+		if (!(alloc_parms->flags & A_CLING_BY_TAGS) &&
+		    alloc_parms->prev_lvseg && _check_cling(ah, NULL, alloc_parms->prev_lvseg, pva, alloc_state))
+			/* If this PV is suitable, use this first area */
+			return PREFERRED;
+
+		if (!ah->maximise_cling && !(alloc_parms->flags & A_CLING_BY_TAGS))
+			return NEXT_PV;
+
+		/* Cling_by_tags? */
+		if ((alloc_parms->flags & (A_CLING_BY_TAGS | A_CLING_TO_ALLOCED)) && ah->cling_tag_list_cn &&
+		    alloc_parms->prev_lvseg && _check_cling(ah, ah->cling_tag_list_cn, alloc_parms->prev_lvseg, pva, alloc_state))
+			return PREFERRED;
+	
+		if (alloc_parms->flags & A_CLING_BY_TAGS)
+			return NEXT_PV;
+
+		/* All areas on this PV give same result so pointless checking more */
+		return NEXT_PV;
+	}
+
+	/* Normal/Anywhere */
+
+	/* Is it big enough on its own? */
+	if (pva->unreserved * ah->area_multiple < still_needed &&
+	    ((!(alloc_parms->flags & A_CAN_SPLIT) && !ah->log_area_count) ||
+	     (already_found_one && alloc_parms->alloc != ALLOC_ANYWHERE)))
+		return NEXT_PV;
+
+	return USE_AREA;
+}
+
+/*
+ * Decide how many extents we're trying to obtain from a given area.
+ * Removes the extents from further consideration.
+ */
+static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *pva, unsigned ix_pva, uint32_t max_to_allocate, alloc_policy_t alloc)
+{
+	uint32_t required = max_to_allocate / ah->area_multiple;
+
+	/* FIXME Maintain unreserved all the time, so other policies can split areas too. */
+
+	if (alloc == ALLOC_ANYWHERE) {
+		/*
+		 * Update amount unreserved - effectively splitting an area 
+		 * into two or more parts.  If the whole stripe doesn't fit,
+		 * reduce amount we're looking for.
+		 */
+		if (ix_pva - 1 >= ah->area_count)
+			required = ah->log_len;
+		if (required >= pva->unreserved) {
+			required = pva->unreserved;
+			pva->unreserved = 0;
+		} else {
+			pva->unreserved -= required;
+			reinsert_reduced_pv_area(pva);
+		}
+	} else {
+		if (required < ah->log_len)
+			required = ah->log_len;
+		if (required > pva->count)
+			required = pva->count;
+	}
+
+	return required;
+}
+
+static int _reserve_required_area(struct alloc_handle *ah, uint32_t max_to_allocate,
+				  unsigned ix_pva, struct pv_area *pva,
+				  struct alloc_state *alloc_state, alloc_policy_t alloc)
+{
+	uint32_t required = _calc_required_extents(ah, pva, ix_pva, max_to_allocate, alloc);
+	uint32_t s;
+
+	/* Expand areas array if needed after an area was split. */
+	if (ix_pva > alloc_state->areas_size) {
+		alloc_state->areas_size *= 2;
+		if (!(alloc_state->areas = dm_realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
+			log_error("Memory reallocation for parallel areas failed.");
+			return 0;
+		}
+		for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
+			alloc_state->areas[s].pva = NULL;
+	}
+
+	_reserve_area(&alloc_state->areas[ix_pva - 1], pva, required, ix_pva, 
+		  (alloc == ALLOC_ANYWHERE) ? pva->unreserved : pva->count - required);
+
+	return 1;
+}
+
+static void _clear_areas(struct alloc_state *alloc_state)
+{
+	uint32_t s;
+
+	for (s = 0; s < alloc_state->areas_size; s++)
+		alloc_state->areas[s].pva = NULL;
+}
+
+/*
+ * Returns 1 regardless of whether any space was found, except on error.
  */
-static int _find_parallel_space(struct alloc_handle *ah, alloc_policy_t alloc,
-				struct dm_list *pvms, struct pv_area_used **areas_ptr,
-				uint32_t *areas_size_ptr, unsigned can_split,
-				struct lv_segment *prev_lvseg,
-				uint32_t *allocated, uint32_t *log_needs_allocating, uint32_t needed)
+static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc_parms *alloc_parms,
+				     struct dm_list *pvms, struct alloc_state *alloc_state,
+				     struct dm_list *parallel_pvs, uint32_t max_to_allocate)
 {
+	unsigned ix = 0;
+	unsigned last_ix;
 	struct pv_map *pvm;
 	struct pv_area *pva;
-	struct pv_list *pvl;
-	unsigned already_found_one = 0;
-	unsigned contiguous = 0, cling = 0, use_cling_tags = 0, preferred_count = 0;
-	unsigned ix, last_ix;
+	unsigned preferred_count = 0;
+	unsigned already_found_one;
 	unsigned ix_offset = 0;	/* Offset for non-preferred allocations */
 	unsigned ix_log_offset; /* Offset to start of areas to use for log */
 	unsigned too_small_for_log_count; /* How many too small for log? */
-	uint32_t max_parallel;	/* Maximum extents to allocate */
-	uint32_t next_le;
-	uint32_t required;	/* Extents we're trying to obtain from a given area */
-	struct seg_pvs *spvs;
-	struct dm_list *parallel_pvs;
-	uint32_t free_pes;
+	unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */
+	unsigned log_iteration_count = 0; /* extra iteration for logs on data devices */
 	struct alloced_area *aa;
 	uint32_t s;
-	uint32_t total_extents_needed = (needed - *allocated) * ah->area_count / ah->area_multiple;
-
-	/* Is there enough total space? */
-	free_pes = pv_maps_size(pvms);
-	if (total_extents_needed > free_pes) {
-		log_error("Insufficient free space: %" PRIu32 " extents needed,"
-			  " but only %" PRIu32 " available",
-			  total_extents_needed, free_pes);
-		return 0;
-	}
-
-	/* FIXME Select log PV appropriately if there isn't one yet */
 
-	/* Are there any preceding segments we must follow on from? */
-	if (prev_lvseg) {
-		ix_offset = _stripes_per_mimage(prev_lvseg) * prev_lvseg->area_count;
-		if ((alloc == ALLOC_CONTIGUOUS))
-			contiguous = 1;
-		else if ((alloc == ALLOC_CLING))
-			cling = 1;
-		else if ((alloc == ALLOC_CLING_BY_TAGS)) {
-			cling = 1;
-			use_cling_tags = 1;
-		} else
-			ix_offset = 0;
-	}
+	/* ix_offset holds the number of parallel allocations that must be contiguous/cling */
+	if (alloc_parms->flags & (A_CONTIGUOUS | A_CLING) && alloc_parms->prev_lvseg)
+		ix_offset = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
+
+	if (alloc_parms->flags & A_CLING_TO_ALLOCED)
+		ix_offset = ah->area_count;
+
+	if (alloc_parms->alloc == ALLOC_NORMAL)
+		log_debug("Cling_to_allocated is %sset",
+			  alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
+
+	_clear_areas(alloc_state);
+
+	log_debug("Still need %" PRIu32 " extents for %" PRIu32 " parallel areas and %" PRIu32 " log areas of %" PRIu32 " extents. "
+		  "(Total %" PRIu32 " extents.)",
+		  (ah->new_extents - alloc_state->allocated) / ah->area_multiple,
+		  ah->area_count, alloc_state->log_area_count_still_needed,
+		  alloc_state->log_area_count_still_needed ? ah->log_len : 0,
+		  (ah->new_extents - alloc_state->allocated) * ah->area_count / ah->area_multiple +
+			alloc_state->log_area_count_still_needed * ah->log_len);
 
-	/* FIXME This algorithm needs a lot of cleaning up! */
-	/* FIXME anywhere doesn't find all space yet */
-	/* ix_offset holds the number of allocations that must be contiguous */
 	/* ix holds the number of areas found on other PVs */
 	do {
-		ix = 0;
-		preferred_count = 0;
+		if (log_iteration_count) {
+			log_debug("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, ah->area_count, alloc_state->log_area_count_still_needed);
+		} else if (iteration_count)
+			log_debug("Filled %u out of %u preferred areas so far.", preferred_count, ix_offset);
 
-		parallel_pvs = NULL;
-		max_parallel = needed;
+		/*
+		 * Provide for escape from the loop if no progress is made.
+		 * This should not happen: ALLOC_ANYWHERE should be able to use
+		 * all available space. (If there aren't enough extents, the code
+		 * should not reach this point.)
+		 */
+		last_ix = ix;
 
 		/*
-		 * If there are existing parallel PVs, avoid them and reduce
-		 * the maximum we can allocate in one go accordingly.
+		 * Put the smallest area of each PV that is at least the
+		 * size we need into areas array.  If there isn't one
+		 * that fits completely and we're allowed more than one
+		 * LV segment, then take the largest remaining instead.
 		 */
-		if (ah->parallel_areas) {
-			next_le = (prev_lvseg ? prev_lvseg->le + prev_lvseg->len : 0) + *allocated / ah->area_multiple;
-			dm_list_iterate_items(spvs, ah->parallel_areas) {
-				if (next_le >= spvs->le + spvs->len)
-					continue;
+		dm_list_iterate_items(pvm, pvms) {
+			/* PV-level checks */
+			if (dm_list_empty(&pvm->areas))
+				continue;	/* Next PV */
+
+			if (alloc_parms->alloc != ALLOC_ANYWHERE) {
+				/* Don't allocate onto the log PVs */
+				if (ah->log_area_count)
+					dm_list_iterate_items(aa, &ah->alloced_areas[ah->area_count])
+						for (s = 0; s < ah->log_area_count; s++)
+							if (!aa[s].pv)
+								goto next_pv;
 
-				if (max_parallel > (spvs->le + spvs->len) * ah->area_multiple)
-					max_parallel = (spvs->le + spvs->len) * ah->area_multiple;
-				parallel_pvs = &spvs->pvs;
-				break;
+				/* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */
+				/* (I've temporatily disabled the check.) */
+				/* Avoid PVs used by existing parallel areas */
+				if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs))
+					goto next_pv;
+
+				/*
+				 * Avoid PVs already set aside for log.  
+				 * We only reach here if there were enough PVs for the main areas but
+				 * not enough for the logs.
+				 */
+				if (log_iteration_count) {
+					for (s = ah->area_count; s < ix + ix_offset; s++)
+						if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
+							goto next_pv;
+				/* On a second pass, avoid PVs already used in an uncommitted area */
+ 				} else if (iteration_count)
+					for (s = 0; s < ah->area_count; s++)
+						if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
+							goto next_pv;
 			}
-		}
 
-		do {
-			/*
-			 * Provide for escape from the loop if no progress is made.
-			 * This should not happen: ALLOC_ANYWHERE should be able to use
-			 * all available space. (If there aren't enough extents, the code
-			 * should not reach this point.)
-			 */
-			last_ix = ix;
-
-			/*
-			 * Put the smallest area of each PV that is at least the
-			 * size we need into areas array.  If there isn't one
-			 * that fits completely and we're allowed more than one
-			 * LV segment, then take the largest remaining instead.
-			 */
-			dm_list_iterate_items(pvm, pvms) {
-				if (dm_list_empty(&pvm->areas))
-					continue;	/* Next PV */
-
-				if (alloc != ALLOC_ANYWHERE) {
-					/* Don't allocate onto the log pv */
-					if (ah->log_area_count)
-						dm_list_iterate_items(aa, &ah->alloced_areas[ah->area_count])
-							for (s = 0; s < ah->log_area_count; s++)
-								if (!aa[s].pv)
-									goto next_pv;
-
-					/* Avoid PVs used by existing parallel areas */
-					if (parallel_pvs)
-						dm_list_iterate_items(pvl, parallel_pvs)
-							if (pvm->pv == pvl->pv)
-								goto next_pv;
-				}
+			already_found_one = 0;
+			/* First area in each list is the largest */
+			dm_list_iterate_items(pva, &pvm->areas) {
+				/*
+				 * There are two types of allocations, which can't be mixed at present.
+				 * PREFERRED are stored immediately in a specific parallel slot.
+				 * USE_AREA are stored for later, then sorted and chosen from.
+				 */
+				switch(_check_pva(ah, pva, max_to_allocate, alloc_parms,
+						  alloc_state, already_found_one, iteration_count, log_iteration_count)) {
 
-				already_found_one = 0;
-				/* First area in each list is the largest */
-				dm_list_iterate_items(pva, &pvm->areas) {
-					/* Skip fully-reserved areas (which are not currently removed from the list). */
-					if (!pva->unreserved)
-						continue;
-					if (contiguous) {
-						if (prev_lvseg &&
-						    _check_contiguous(ah->cmd,
-								      prev_lvseg,
-								      pva, *areas_ptr,
-								      *areas_size_ptr)) {
-							preferred_count++;
-							goto next_pv;
-						}
-						continue;
-					}
+				case PREFERRED:
+					preferred_count++;
 
-					if (cling) {
-						if (prev_lvseg &&
-						    _check_cling(ah->cmd,
-								 use_cling_tags ? ah->cling_tag_list_cn : NULL,
-								 prev_lvseg,
-								 pva, *areas_ptr,
-								 *areas_size_ptr)) {
-							preferred_count++;
-						}
-						goto next_pv;
-					}
+				case NEXT_PV:
+					goto next_pv;
 
-					/* Is it big enough on its own? */
-					if (pva->unreserved * ah->area_multiple <
-					    max_parallel - *allocated &&
-					    ((!can_split && !ah->log_area_count) ||
-					     (already_found_one &&
-					      !(alloc == ALLOC_ANYWHERE))))
-						goto next_pv;
+				case NEXT_AREA:
+					continue;
 
+				case USE_AREA:
 					/*
 					 * Except with ALLOC_ANYWHERE, replace first area with this
 					 * one which is smaller but still big enough.
 					 */
 					if (!already_found_one ||
-					    alloc == ALLOC_ANYWHERE) {
+					    alloc_parms->alloc == ALLOC_ANYWHERE) {
 						ix++;
 						already_found_one = 1;
 					}
 
-					required = (max_parallel - *allocated) / ah->area_multiple;
-
-					if (alloc == ALLOC_ANYWHERE) {
-						/*
-						 * Update amount unreserved - effectively splitting an area 
-						 * into two or more parts.  If the whole stripe doesn't fit,
-						 * reduce amount we're looking for.
-						 */
-						if (ix + ix_offset - 1 >= ah->area_count)
-							required = ah->log_len;
-						if (required >= pva->unreserved) {
-							required = pva->unreserved;
-							pva->unreserved = 0;
-						} else {
-							pva->unreserved -= required;
-							reinsert_reduced_pv_area(pva);
-						}
-					} else {
-						if (required < ah->log_len)
-							required = ah->log_len;
-						if (required > pva->count)
-							required = pva->count;
-					}
-
-					/* Expand areas array if needed after an area was split. */
-					if (ix + ix_offset > *areas_size_ptr) {
-						*areas_size_ptr *= 2;
-						if (!(*areas_ptr = dm_realloc(*areas_ptr,
-									     sizeof(**areas_ptr) *
-									     (*areas_size_ptr)))) {
-							log_error("Memory reallocation for parallel areas failed.");
-							return 0;
-						}
-					}
-					(*areas_ptr)[ix + ix_offset - 1].pva = pva;
-						(*areas_ptr)[ix + ix_offset - 1].used = required;
-					log_debug("Trying allocation area %" PRIu32 " on %s start PE %" PRIu32
-						  " length %" PRIu32 " leaving %" PRIu32 ".",
-						  ix + ix_offset - 1, dev_name(pva->map->pv->dev), pva->start, required,
-						  (alloc == ALLOC_ANYWHERE) ? pva->unreserved : pva->count - required);
+					/* Reserve required amount of pva */
+					if (!_reserve_required_area(ah, max_to_allocate, ix + ix_offset,
+								    pva, alloc_state, alloc_parms->alloc))
+						return_0;
 				}
-			next_pv:
-				/* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */
-				/* With cling and contiguous we stop if we found a match for *all* the areas */
-				/* FIXME Rename these variables! */
-				if ((alloc == ALLOC_ANYWHERE &&
-				    ix + ix_offset >= ah->area_count + (*log_needs_allocating ? ah->log_area_count : 0)) ||
-				    (preferred_count == ix_offset &&
-				     (ix_offset == ah->area_count + (*log_needs_allocating ? ah->log_area_count : 0))))
-					break;
+
 			}
-		} while (alloc == ALLOC_ANYWHERE && last_ix != ix && ix < ah->area_count + (*log_needs_allocating ? ah->log_area_count : 0));
 
-		if (preferred_count < ix_offset)
-			break;
+		next_pv:
+			/* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */
+			/* With cling and contiguous we stop if we found a match for *all* the areas */
+			/* FIXME Rename these variables! */
+			if ((alloc_parms->alloc == ALLOC_ANYWHERE &&
+			    ix + ix_offset >= ah->area_count + alloc_state->log_area_count_still_needed) ||
+			    (preferred_count == ix_offset &&
+			     (ix_offset == ah->area_count + alloc_state->log_area_count_still_needed)))
+				break;
+		}
+	} while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < ah->area_count + alloc_state->log_area_count_still_needed) ||
+		/* With cling_to_alloced, if there were gaps in the preferred areas, have a second iteration */
+		 (alloc_parms->alloc == ALLOC_NORMAL && preferred_count &&
+		  (preferred_count < ix_offset || alloc_state->log_area_count_still_needed) &&
+		  (alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) ||
+		/* Extra iteration needed to fill log areas on PVs already used? */
+		 (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == ix_offset && !ah->mirror_logs_separate &&
+		  (ix + preferred_count < ah->area_count + alloc_state->log_area_count_still_needed) && !log_iteration_count++));
 
-		if (ix + ix_offset < ah->area_count +
-		   (*log_needs_allocating ? ah->log_area_count : 0))
-			break;
 
-		/* Sort the areas so we allocate from the biggest */
-		if (ix > 1)
-			qsort((*areas_ptr) + ix_offset, ix, sizeof(**areas_ptr),
+	if (preferred_count < ix_offset && !(alloc_parms->flags & A_CLING_TO_ALLOCED))
+		return 1;
+
+	if (ix + preferred_count < ah->area_count + alloc_state->log_area_count_still_needed)
+		return 1;
+
+	/* Sort the areas so we allocate from the biggest */
+	if (log_iteration_count) {
+		if (ix > ah->area_count + 1) {
+			log_debug("Sorting %u log areas", ix - ah->area_count);
+			qsort(alloc_state->areas + ah->area_count, ix - ah->area_count, sizeof(*alloc_state->areas),
 			      _comp_area);
+		}
+	} else if (ix > 1) {
+		log_debug("Sorting %u areas", ix);
+		qsort(alloc_state->areas + ix_offset, ix, sizeof(*alloc_state->areas),
+		      _comp_area);
+	}
+
+	/* If there are gaps in our preferred areas, fill then from the sorted part of the array */
+	if (preferred_count && preferred_count != ix_offset) {
+		for (s = 0; s < ah->area_count; s++)
+			if (!alloc_state->areas[s].pva) {
+				alloc_state->areas[s].pva = alloc_state->areas[ix_offset].pva;
+				alloc_state->areas[s].used = alloc_state->areas[ix_offset].used;
+				alloc_state->areas[ix_offset++].pva = NULL;
+			}
+	}
+	
+	/*
+	 * First time around, if there's a log, allocate it on the
+	 * smallest device that has space for it.
+	 */
+	too_small_for_log_count = 0;
+	ix_log_offset = 0;
+
+	/* FIXME This logic is due to its heritage and can be simplified! */
+	if (alloc_state->log_area_count_still_needed) {
+		/* How many areas are too small for the log? */
+		while (too_small_for_log_count < ix_offset + ix &&
+		       (*(alloc_state->areas + ix_offset + ix - 1 -
+			  too_small_for_log_count)).used < ah->log_len)
+			too_small_for_log_count++;
+		ix_log_offset = ix_offset + ix - too_small_for_log_count - ah->log_area_count;
+	}
+
+	if (ix + ix_offset < ah->area_count +
+	    (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
+				    too_small_for_log_count : 0))
+		return 1;
+
+	/*
+	 * Finally add the space identified to the list of areas to be used.
+	 */
+	if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset))
+		return_0;
+
+	/*
+	 * Log is always allocated first time.
+	 */
+	alloc_state->log_area_count_still_needed = 0;
+
+	return 1;
+}
+
+/*
+ * Choose sets of parallel areas to use, respecting any constraints 
+ * supplied in alloc_parms.
+ */
+static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, struct alloc_parms *alloc_parms,
+						   struct dm_list *pvms, struct alloc_state *alloc_state)
+{
+	uint32_t max_to_allocate;	/* Maximum extents to allocate this time */
+	uint32_t old_allocated;
+	uint32_t next_le;
+	struct seg_pvs *spvs;
+	struct dm_list *parallel_pvs;
+
+	/* FIXME This algorithm needs a lot of cleaning up! */
+	/* FIXME anywhere doesn't find all space yet */
+	do {
+		parallel_pvs = NULL;
+		max_to_allocate = alloc_parms->extents_still_needed - alloc_state->allocated;
 
 		/*
-		 * First time around, if there's a log, allocate it on the
-		 * smallest device that has space for it.
+		 * If there are existing parallel PVs, avoid them and reduce
+		 * the maximum we can allocate in one go accordingly.
 		 */
-		too_small_for_log_count = 0;
-		ix_log_offset = 0;
+		if (ah->parallel_areas) {
+			next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) + alloc_state->allocated / ah->area_multiple;
+			dm_list_iterate_items(spvs, ah->parallel_areas) {
+				if (next_le >= spvs->le + spvs->len)
+					continue;
 
-		/* FIXME This logic is due to its heritage and can be simplified! */
-		if (*log_needs_allocating) {
-			/* How many areas are too small for the log? */
-			while (too_small_for_log_count < ix_offset + ix &&
-			       (*((*areas_ptr) + ix_offset + ix - 1 -
-				  too_small_for_log_count)).used < ah->log_len)
-				too_small_for_log_count++;
-			ix_log_offset = ix_offset + ix - too_small_for_log_count - ah->log_area_count;
-		}
-
-		if (ix + ix_offset < ah->area_count +
-		    (*log_needs_allocating ? ah->log_area_count +
-					    too_small_for_log_count : 0))
-			break;
+				if (max_to_allocate + alloc_state->allocated > (spvs->le + spvs->len) * ah->area_multiple)
+					max_to_allocate = (spvs->le + spvs->len) * ah->area_multiple - alloc_state->allocated;
+				parallel_pvs = &spvs->pvs;
+				break;
+			}
+		}
 
-		if (!_alloc_parallel_area(ah, max_parallel, *areas_ptr, allocated,
-					  *log_needs_allocating, ix_log_offset))
-			return_0;
+		old_allocated = alloc_state->allocated;
 
-		*log_needs_allocating = 0;
+		if (!_find_some_parallel_space(ah, alloc_parms, pvms, alloc_state, parallel_pvs, max_to_allocate))
+			return_0;
 
-	} while ((alloc != ALLOC_CONTIGUOUS) && *allocated != needed && can_split);
+		/*
+		 * If we didn't allocate anything this time and had
+		 * A_CLING_TO_ALLOCED set, try again without it.
+		 *
+		 * For ALLOC_NORMAL, if we did allocate something without the
+		 * flag set, set it and continue so that further allocations
+		 * remain on the same disks where possible.
+		 */
+		if (old_allocated == alloc_state->allocated) {
+			if (alloc_parms->flags & A_CLING_TO_ALLOCED)
+				alloc_parms->flags &= ~A_CLING_TO_ALLOCED;
+			else
+				break;	/* Give up */
+		} else if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL &&
+			   !(alloc_parms->flags & A_CLING_TO_ALLOCED))
+			alloc_parms->flags |= A_CLING_TO_ALLOCED;
+	} while ((alloc_parms->alloc != ALLOC_CONTIGUOUS) && alloc_state->allocated != alloc_parms->extents_still_needed && (alloc_parms->flags & A_CAN_SPLIT));
 
 	return 1;
 }
@@ -1384,23 +1696,22 @@
 		     unsigned can_split,
 		     struct dm_list *allocatable_pvs)
 {
-	struct pv_area_used *areas;
-	uint32_t allocated = lv ? lv->le_count : 0;
 	uint32_t old_allocated;
 	struct lv_segment *prev_lvseg = NULL;
 	int r = 0;
 	struct dm_list *pvms;
-	uint32_t areas_size;
 	alloc_policy_t alloc;
-	unsigned log_needs_allocating = 0;
+	struct alloc_parms alloc_parms;
+	struct alloc_state alloc_state;
+
+	alloc_state.allocated = lv ? lv->le_count : 0;
 
-	if (allocated >= ah->new_extents && !ah->log_area_count) {
+	if (alloc_state.allocated >= ah->new_extents && !ah->log_area_count) {
 		log_error("_allocate called with no work to do!");
 		return 1;
 	}
 
-	if (ah->log_area_count)
-		log_needs_allocating = 1;
+	alloc_state.log_area_count_still_needed = ah->log_area_count;
 
 	if (ah->alloc == ALLOC_CONTIGUOUS)
 		can_split = 0;
@@ -1417,24 +1728,24 @@
 	if (!_log_parallel_areas(ah->mem, ah->parallel_areas))
 		stack;
 
-	areas_size = dm_list_size(pvms);
-	if (areas_size && areas_size < (ah->area_count + ah->log_area_count)) {
-		if (ah->alloc != ALLOC_ANYWHERE) {
+	alloc_state.areas_size = dm_list_size(pvms);
+	if (alloc_state.areas_size && alloc_state.areas_size < (ah->area_count + ah->log_area_count)) {
+		if (ah->alloc != ALLOC_ANYWHERE && ah->mirror_logs_separate) {
 			log_error("Not enough PVs with free space available "
 				  "for parallel allocation.");
 			log_error("Consider --alloc anywhere if desperate.");
 			return 0;
 		}
-		areas_size = ah->area_count + ah->log_area_count;
+		alloc_state.areas_size = ah->area_count + ah->log_area_count;
 	}
 
 	/* Upper bound if none of the PVs in prev_lvseg is in pvms */
 	/* FIXME Work size out properly */
 	if (prev_lvseg)
-		areas_size += _stripes_per_mimage(prev_lvseg) * prev_lvseg->area_count;
+		alloc_state.areas_size += _stripes_per_mimage(prev_lvseg) * prev_lvseg->area_count;
 
 	/* Allocate an array of pv_areas to hold the largest space on each PV */
-	if (!(areas = dm_malloc(sizeof(*areas) * areas_size))) {
+	if (!(alloc_state.areas = dm_malloc(sizeof(*alloc_state.areas) * alloc_state.areas_size))) {
 		log_error("Couldn't allocate areas array.");
 		return 0;
 	}
@@ -1451,36 +1762,33 @@
 		/* Skip cling_by_tags if no list defined */
 		if (alloc == ALLOC_CLING_BY_TAGS && !ah->cling_tag_list_cn)
 			continue;
-		old_allocated = allocated;
-		log_debug("Trying allocation using %s policy.  "
-			  "Need %" PRIu32 " extents for %" PRIu32 " parallel areas and %" PRIu32 " log areas of %" PRIu32 " extents. "
-			  "(Total %" PRIu32 " extents.)",
-			  get_alloc_string(alloc),
-			  (ah->new_extents - allocated) / ah->area_multiple,
-			  ah->area_count, log_needs_allocating ? ah->log_area_count : 0,
-			  log_needs_allocating ? ah->log_len : 0,
-			  (ah->new_extents - allocated) * ah->area_count / ah->area_multiple +
-				(log_needs_allocating ? ah->log_area_count * ah->log_len : 0));
-		if (!_find_parallel_space(ah, alloc, pvms, &areas,
-					  &areas_size, can_split,
-					  prev_lvseg, &allocated, &log_needs_allocating, ah->new_extents))
+		old_allocated = alloc_state.allocated;
+		log_debug("Trying allocation using %s policy.", get_alloc_string(alloc));
+
+		if (!_sufficient_pes_free(ah, pvms, alloc_state.allocated, ah->new_extents))
 			goto_out;
-		if ((allocated == ah->new_extents && !log_needs_allocating) || (ah->alloc == alloc) ||
-		    (!can_split && (allocated != old_allocated)))
+
+		_init_alloc_parms(ah, &alloc_parms, alloc, prev_lvseg, can_split, alloc_state.allocated, ah->new_extents);
+
+		if (!_find_max_parallel_space_for_one_policy(ah, &alloc_parms, pvms, &alloc_state))
+			goto_out;
+
+		if ((alloc_state.allocated == ah->new_extents && !alloc_state.log_area_count_still_needed) || (ah->alloc == alloc) ||
+		    (!can_split && (alloc_state.allocated != old_allocated)))
 			break;
 	}
 
-	if (allocated != ah->new_extents) {
+	if (alloc_state.allocated != ah->new_extents) {
 		log_error("Insufficient suitable %sallocatable extents "
 			  "for logical volume %s: %u more required",
 			  can_split ? "" : "contiguous ",
 			  lv ? lv->name : "",
-			  (ah->new_extents - allocated) * ah->area_count
+			  (ah->new_extents - alloc_state.allocated) * ah->area_count
 			  / ah->area_multiple);
 		goto out;
 	}
 
-	if (log_needs_allocating) {
+	if (alloc_state.log_area_count_still_needed) {
 		log_error("Insufficient free space for log allocation "
 			  "for logical volume %s.",
 			  lv ? lv->name : "");
@@ -1490,7 +1798,7 @@
 	r = 1;
 
       out:
-	dm_free(areas);
+	dm_free(alloc_state.areas);
 	return r;
 }
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* LVM2 ./WHATS_NEW doc/example.conf.in lib/confi ...
@ 2010-08-20 20:59 snitzer
  0 siblings, 0 replies; 6+ messages in thread
From: snitzer @ 2010-08-20 20:59 UTC (permalink / raw)
  To: lvm-devel, lvm2-cvs

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	snitzer@sourceware.org	2010-08-20 20:59:07

Modified files:
	.              : WHATS_NEW 
	doc            : example.conf.in 
	lib/config     : defaults.h 
	lib/format_text: format-text.c 
	lib/metadata   : metadata.c 

Log message:
	Update heuristic used for default and detected data alignment.
	
	Add "devices/default_data_alignment" to lvm.conf to control the internal
	default that LVM2 uses: 0==64k, 1==1MB, 2==2MB, etc.
	
	If --dataalignment (or lvm.conf's "devices/data_alignment") is specified
	then it is always used to align the start of the data area.  This means
	the md_chunk_alignment and data_alignment_detection are disabled if set.
	
	(Same now applies to pvcreate --dataalignmentoffset, the specified value
	will be used instead of the result from data_alignment_offset_detection)
	
	set_pe_align() still looks to use the determined default alignment
	(based on lvm.conf's default_data_alignment) if the default is a
	multiple of the MD or topology detected values.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.1718&r2=1.1719
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/doc/example.conf.in.diff?cvsroot=lvm2&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/config/defaults.h.diff?cvsroot=lvm2&r1=1.66&r2=1.67
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/format_text/format-text.c.diff?cvsroot=lvm2&r1=1.142&r2=1.143
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/metadata.c.diff?cvsroot=lvm2&r1=1.395&r2=1.396

--- LVM2/WHATS_NEW	2010/08/20 20:35:55	1.1718
+++ LVM2/WHATS_NEW	2010/08/20 20:59:05	1.1719
@@ -1,5 +1,7 @@
 Version 2.02.74 - 
 ==================================
+  Update heuristic used for default and detected data alignment.
+  Add "devices/default_data_alignment" to lvm.conf.
   Add implmentation for simple numeric 'get' property functions.
   Define GET_NUM_PROPERTY_FN macro to simplify numeric property 'get' function
   Add properties.[ch] to lib/report, defined based on columns.h.
--- LVM2/doc/example.conf.in	2010/08/16 22:54:36	1.8
+++ LVM2/doc/example.conf.in	2010/08/20 20:59:05	1.9
@@ -98,6 +98,11 @@
     # 1 enables; 0 disables.
     md_chunk_alignment = 1
 
+    # Default alignment of the start of a data area in MB.  If set to 0,
+    # a small value of 64KB will be used, which was the default until
+    # LVM2 version 2.02.73.  Set to 1 for 1MiB, 2 for 2MiB, etc.
+    default_data_alignment = 1
+
     # By default, the start of a PV's data area will be a multiple of
     # the 'minimum_io_size' or 'optimal_io_size' exposed in sysfs.
     # - minimum_io_size - the smallest request the device can perform
@@ -111,9 +116,9 @@
     data_alignment_detection = 1
 
     # Alignment (in KB) of start of data area when creating a new PV.
-    # If a PV is placed directly upon an md device and md_chunk_alignment or
-    # data_alignment_detection is enabled this parameter is ignored.
-    # Set to 0 for the default alignment of 1MB or page size, if larger.
+    # md_chunk_alignment and data_alignment_detection are disabled if set.
+    # Set to 0 for the default alignment (see: data_alignment_default)
+    # or page size, if larger.
     data_alignment = 0
 
     # By default, the start of the PV's aligned data area will be shifted by
@@ -122,6 +127,7 @@
     # windows partitioning will have an alignment_offset of 3584 bytes
     # (sector 7 is the lowest aligned logical block, the 4KB sectors start
     # at LBA -1, and consequently sector 63 is aligned on a 4KB boundary).
+    # But note that pvcreate --dataalignmentoffset will skip this detection.
     # 1 enables; 0 disables.
     data_alignment_offset_detection = 1
 
--- LVM2/lib/config/defaults.h	2010/08/16 22:54:36	1.66
+++ LVM2/lib/config/defaults.h	2010/08/20 20:59:05	1.67
@@ -17,6 +17,7 @@
 #define _LVM_DEFAULTS_H
 
 #define DEFAULT_PE_ALIGN 2048
+#define DEFAULT_PE_ALIGN_OLD 128
 
 #define DEFAULT_ARCHIVE_ENABLED 1
 #define DEFAULT_BACKUP_ENABLED 1
@@ -33,6 +34,7 @@
 #define DEFAULT_MD_CHUNK_ALIGNMENT 1
 #define DEFAULT_IGNORE_SUSPENDED_DEVICES 1
 #define DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID 1
+#define DEFAULT_DATA_ALIGNMENT 1
 #define DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION 1
 #define DEFAULT_DATA_ALIGNMENT_DETECTION 1
 
--- LVM2/lib/format_text/format-text.c	2010/08/03 18:19:42	1.142
+++ LVM2/lib/format_text/format-text.c	2010/08/20 20:59:07	1.143
@@ -1861,16 +1861,20 @@
 						      0) * 2;
 
 		if (set_pe_align(pv, data_alignment) != data_alignment &&
-		    data_alignment)
-			log_warn("WARNING: %s: Overriding data alignment to "
-				 "%lu sectors (requested %lu sectors)",
-				 pv_dev_name(pv), pv->pe_align, data_alignment);
+		    data_alignment) {
+			log_error("%s: invalid data alignment of "
+				  "%lu sectors (requested %lu sectors)",
+				  pv_dev_name(pv), pv->pe_align, data_alignment);
+			return 0;
+		}
 
 		if (set_pe_align_offset(pv, data_alignment_offset) != data_alignment_offset &&
-		    data_alignment_offset)
-			log_warn("WARNING: %s: Overriding data alignment offset to "
-				 "%lu sectors (requested %lu sectors)",
-				 pv_dev_name(pv), pv->pe_align_offset, data_alignment_offset);
+		    data_alignment_offset) {
+			log_error("%s: invalid data alignment offset of "
+				  "%lu sectors (requested %lu sectors)",
+				  pv_dev_name(pv), pv->pe_align_offset, data_alignment_offset);
+			return 0;
+		}
 
 		if (pv->pe_align < pv->pe_align_offset) {
 			log_error("%s: pe_align (%lu sectors) must not be less "
--- LVM2/lib/metadata/metadata.c	2010/08/20 12:43:49	1.395
+++ LVM2/lib/metadata/metadata.c	2010/08/20 20:59:07	1.396
@@ -62,23 +62,38 @@
 const char _really_init[] =
     "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
 
-static int _alignment_overrides_default(unsigned long data_alignment)
+static int _alignment_overrides_default(unsigned long data_alignment,
+					unsigned long default_pe_align)
 {
-	return data_alignment && (DEFAULT_PE_ALIGN % data_alignment);
+	return data_alignment && (default_pe_align % data_alignment);
 }
 
 unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
 {
-	unsigned long temp_pe_align;
+	unsigned long default_pe_align, temp_pe_align;
 
 	if (pv->pe_align)
 		goto out;
 
-	if (data_alignment)
+	if (data_alignment) {
+		/* Always use specified data_alignment */
 		pv->pe_align = data_alignment;
+		goto out;
+	}
+
+	default_pe_align = find_config_tree_int(pv->fmt->cmd,
+						"devices/default_data_alignment",
+						DEFAULT_DATA_ALIGNMENT);
+
+	if (default_pe_align)
+		/* align on 1 MiB multiple */
+		default_pe_align *= DEFAULT_PE_ALIGN;
 	else
-		pv->pe_align = MAX((DEFAULT_PE_ALIGN << SECTOR_SHIFT),
-				   lvm_getpagesize()) >> SECTOR_SHIFT;
+		/* align on 64 KiB multiple (old default) */
+		default_pe_align = DEFAULT_PE_ALIGN_OLD;
+
+	pv->pe_align = MAX((default_pe_align << SECTOR_SHIFT),
+			   lvm_getpagesize()) >> SECTOR_SHIFT;
 
 	if (!pv->dev)
 		goto out;
@@ -89,8 +104,8 @@
 	if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
 				  DEFAULT_MD_CHUNK_ALIGNMENT)) {
 		temp_pe_align = dev_md_stripe_width(pv->fmt->cmd->sysfs_dir, pv->dev);
-		if (_alignment_overrides_default(temp_pe_align))
-			pv->pe_align = temp_pe_align;
+		if (_alignment_overrides_default(temp_pe_align, default_pe_align))
+			pv->pe_align = MAX(pv->pe_align, temp_pe_align);
 	}
 
 	/*
@@ -104,18 +119,18 @@
 				  "devices/data_alignment_detection",
 				  DEFAULT_DATA_ALIGNMENT_DETECTION)) {
 		temp_pe_align = dev_minimum_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
-		if (_alignment_overrides_default(temp_pe_align))
-			pv->pe_align = temp_pe_align;
+		if (_alignment_overrides_default(temp_pe_align, default_pe_align))
+			pv->pe_align = MAX(pv->pe_align, temp_pe_align);
 
 		temp_pe_align = dev_optimal_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
-		if (_alignment_overrides_default(temp_pe_align))
-			pv->pe_align = temp_pe_align;
+		if (_alignment_overrides_default(temp_pe_align, default_pe_align))
+			pv->pe_align = MAX(pv->pe_align, temp_pe_align);
 	}
 
+out:
 	log_very_verbose("%s: Setting PE alignment to %lu sectors.",
 			 dev_name(pv->dev), pv->pe_align);
 
-out:
 	return pv->pe_align;
 }
 
@@ -125,8 +140,11 @@
 	if (pv->pe_align_offset)
 		goto out;
 
-	if (data_alignment_offset)
+	if (data_alignment_offset) {
+		/* Always use specified data_alignment_offset */
 		pv->pe_align_offset = data_alignment_offset;
+		goto out;
+	}
 
 	if (!pv->dev)
 		goto out;
@@ -142,10 +160,10 @@
 		pv->pe_align_offset = MAX(pv->pe_align_offset, align_offset);
 	}
 
+out:
 	log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
 			 dev_name(pv->dev), pv->pe_align_offset);
 
-out:
 	return pv->pe_align_offset;
 }
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* LVM2 ./WHATS_NEW doc/example.conf.in lib/confi ...
@ 2010-08-12  4:11 snitzer
  0 siblings, 0 replies; 6+ messages in thread
From: snitzer @ 2010-08-12  4:11 UTC (permalink / raw)
  To: lvm-devel, lvm2-cvs

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	snitzer@sourceware.org	2010-08-12 04:11:50

Modified files:
	.              : WHATS_NEW 
	doc            : example.conf.in 
	lib/config     : defaults.h 
	lib/metadata   : metadata.c 
	test           : lvm-utils.sh t-pvcreate-operation-md.sh 
	                 t-pvcreate-usage.sh t-topology-support.sh 
	                 t-vgcreate-usage.sh t-vgextend-usage.sh 
	                 t-vgsplit-operation.sh test-utils.sh 

Log message:
	Change default alignment of pe_start to 1MB.
	
	The new standard in the storage industry is to default alignment of data
	areas to 1MB.  fdisk, parted, and mdadm have all been updated to this
	default.
	
	Update LVM to align the PV's data area start (pe_start) to 1MB.  This
	provides a more useful default than the previous default of 64K (which
	generally ended up being a 192K pe_start once the first metadata area
	was created).
	
	Before this patch:
	# pvs -o name,vg_mda_size,pe_start
	PV         VMdaSize  1st PE
	/dev/sdd     188.00k 192.00k
	
	After this patch:
	# pvs -o name,vg_mda_size,pe_start
	PV         VMdaSize  1st PE
	/dev/sdd    1020.00k   1.00m
	
	The heuristic for setting the default alignment for LVM data areas is:
	- If the default value (1MB) is a multiple of the detected alignment
	then just use the default.
	- Otherwise, use the detected value.
	
	In practice this means we'll almost always use 1MB -- that is unless:
	- the alignment was explicitly specified with --dataalignment
	- or MD's full stripe width, or the {minimum,optimal}_io_size exceeds
	1MB
	- or the specified/detected value is not a power-of-2

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.1700&r2=1.1701
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/doc/example.conf.in.diff?cvsroot=lvm2&r1=1.6&r2=1.7
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/config/defaults.h.diff?cvsroot=lvm2&r1=1.64&r2=1.65
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/metadata.c.diff?cvsroot=lvm2&r1=1.390&r2=1.391
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/lvm-utils.sh.diff?cvsroot=lvm2&r1=1.26&r2=1.27
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-pvcreate-operation-md.sh.diff?cvsroot=lvm2&r1=1.7&r2=1.8
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-pvcreate-usage.sh.diff?cvsroot=lvm2&r1=1.15&r2=1.16
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-topology-support.sh.diff?cvsroot=lvm2&r1=1.4&r2=1.5
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-vgcreate-usage.sh.diff?cvsroot=lvm2&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-vgextend-usage.sh.diff?cvsroot=lvm2&r1=1.4&r2=1.5
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-vgsplit-operation.sh.diff?cvsroot=lvm2&r1=1.25&r2=1.26
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/test-utils.sh.diff?cvsroot=lvm2&r1=1.47&r2=1.48

--- LVM2/WHATS_NEW	2010/08/12 04:08:59	1.1700
+++ LVM2/WHATS_NEW	2010/08/12 04:11:48	1.1701
@@ -1,5 +1,6 @@
 Version 2.02.73 - 
 ================================
+  Change default alignment of pe_start to 1MB.
   Add --norestorefile option to pvcreate.
   Require --restorefile when using pvcreate --uuid.
   Recognise and give preference to md device partitions (blkext major).
--- LVM2/doc/example.conf.in	2010/08/12 04:08:59	1.6
+++ LVM2/doc/example.conf.in	2010/08/12 04:11:48	1.7
@@ -113,7 +113,7 @@
     # Alignment (in KB) of start of data area when creating a new PV.
     # If a PV is placed directly upon an md device and md_chunk_alignment or
     # data_alignment_detection is enabled this parameter is ignored.
-    # Set to 0 for the default alignment of 64KB or page size, if larger.
+    # Set to 0 for the default alignment of 1MB or page size, if larger.
     data_alignment = 0
 
     # By default, the start of the PV's aligned data area will be shifted by
--- LVM2/lib/config/defaults.h	2010/08/12 04:08:59	1.64
+++ LVM2/lib/config/defaults.h	2010/08/12 04:11:48	1.65
@@ -16,6 +16,8 @@
 #ifndef _LVM_DEFAULTS_H
 #define _LVM_DEFAULTS_H
 
+#define DEFAULT_PE_ALIGN 2048
+
 #define DEFAULT_ARCHIVE_ENABLED 1
 #define DEFAULT_BACKUP_ENABLED 1
 
--- LVM2/lib/metadata/metadata.c	2010/07/30 16:47:27	1.390
+++ LVM2/lib/metadata/metadata.c	2010/08/12 04:11:49	1.391
@@ -62,15 +62,23 @@
 const char _really_init[] =
     "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
 
+static int _alignment_overrides_default(unsigned long data_alignment)
+{
+	return data_alignment && (DEFAULT_PE_ALIGN % data_alignment);
+}
+
 unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
 {
+	unsigned long temp_pe_align;
+
 	if (pv->pe_align)
 		goto out;
 
 	if (data_alignment)
 		pv->pe_align = data_alignment;
 	else
-		pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
+		pv->pe_align = MAX((DEFAULT_PE_ALIGN << SECTOR_SHIFT),
+				   lvm_getpagesize()) >> SECTOR_SHIFT;
 
 	if (!pv->dev)
 		goto out;
@@ -79,10 +87,11 @@
 	 * Align to stripe-width of underlying md device if present
 	 */
 	if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
-				  DEFAULT_MD_CHUNK_ALIGNMENT))
-		pv->pe_align = MAX(pv->pe_align,
-				   dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
-						       pv->dev));
+				  DEFAULT_MD_CHUNK_ALIGNMENT)) {
+		temp_pe_align = dev_md_stripe_width(pv->fmt->cmd->sysfs_dir, pv->dev);
+		if (_alignment_overrides_default(temp_pe_align))
+			pv->pe_align = temp_pe_align;
+	}
 
 	/*
 	 * Align to topology's minimum_io_size or optimal_io_size if present
@@ -94,13 +103,13 @@
 	if (find_config_tree_bool(pv->fmt->cmd,
 				  "devices/data_alignment_detection",
 				  DEFAULT_DATA_ALIGNMENT_DETECTION)) {
-		pv->pe_align = MAX(pv->pe_align,
-				   dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
-						       pv->dev));
-
-		pv->pe_align = MAX(pv->pe_align,
-				   dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
-						       pv->dev));
+		temp_pe_align = dev_minimum_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
+		if (_alignment_overrides_default(temp_pe_align))
+			pv->pe_align = temp_pe_align;
+
+		temp_pe_align = dev_optimal_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
+		if (_alignment_overrides_default(temp_pe_align))
+			pv->pe_align = temp_pe_align;
 	}
 
 	log_very_verbose("%s: Setting PE alignment to %lu sectors.",
--- LVM2/test/lvm-utils.sh	2010/08/02 13:20:50	1.26
+++ LVM2/test/lvm-utils.sh	2010/08/12 04:11:49	1.27
@@ -103,9 +103,10 @@
     local pv=$1;
     local field=$2;
     local expected=$3;
+    local pvs_args=$4; # optional
     local actual;
 
-    actual=$(trim $(pvs --noheadings -o $field $pv))
+    actual=$(trim $(pvs --noheadings $pvs_args -o $field $pv))
 if test "$verbose" = "t"
 then
   echo "check_pv_field_ PV=$pv, field=$field, actual=$actual, expected=$expected"
--- LVM2/test/t-pvcreate-operation-md.sh	2010/07/21 14:12:47	1.7
+++ LVM2/test/t-pvcreate-operation-md.sh	2010/08/12 04:11:49	1.8
@@ -52,14 +52,14 @@
 
 # Test alignment of PV on MD without any MD-aware or topology-aware detection
 # - should treat $mddev just like any other block device
-pv_align="192.00k"
+pv_align="1.00m"
 pvcreate --metadatasize 128k \
     --config 'devices {md_chunk_alignment=0 data_alignment_detection=0 data_alignment_offset_detection=0}' \
     $mddev
 check_pv_field_ $mddev pe_start $pv_align
 
 # Test md_chunk_alignment independent of topology-aware detection
-pv_align="256.00k"
+pv_align="1.00m"
 pvcreate --metadatasize 128k \
     --config 'devices {data_alignment_detection=0 data_alignment_offset_detection=0}' \
     $mddev
@@ -71,7 +71,8 @@
 # Test newer topology-aware alignment detection
 # - first added to 2.6.31 but not "reliable" until 2.6.33
 if [ $linux_minor -ge 33 ]; then
-    pv_align="256.00k"
+    pv_align="1.00m"
+    # optimal_io_size=131072, minimum_io_size=65536
     pvcreate --metadatasize 128k \
 	--config 'devices { md_chunk_alignment=0 }' $mddev
     check_pv_field_ $mddev pe_start $pv_align
@@ -103,15 +104,9 @@
 	alignment_offset=`cat $sysfs_alignment_offset` || \
 	alignment_offset=0
 
-    if [ "$alignment_offset" = "512" ]; then
-	pv_align="256.50k"
-	pvcreate --metadatasize 128k $mddev_p
-	check_pv_field_ $mddev_p pe_start $pv_align
-	pvremove $mddev_p
-    elif [ "$alignment_offset" = "2048" ]; then
-	pv_align="258.00k"
-	pvcreate --metadatasize 128k $mddev_p
-	check_pv_field_ $mddev_p pe_start $pv_align
-	pvremove $mddev_p
-    fi
+    # default alignment is 1M, add alignment_offset
+    pv_align=$((1048576+$alignment_offset))B
+    pvcreate --metadatasize 128k $mddev_p
+    check_pv_field_ $mddev_p pe_start $pv_align "--units b"
+    pvremove $mddev_p
 fi
--- LVM2/test/t-pvcreate-usage.sh	2010/06/30 12:17:55	1.15
+++ LVM2/test/t-pvcreate-usage.sh	2010/08/12 04:11:50	1.16
@@ -119,11 +119,11 @@
 pvcreate --metadatasize 128k --metadatacopies 2 --dataalignment 3.5k $dev1
 check_pv_field_ $dev1 pe_start $pv_align
 
-# data area is aligned to 64k by default,
+# data area is aligned to 1M by default,
 # data area start is shifted by the specified alignment_offset
-pv_align="195.50k"
+pv_align="1052160B" # 1048576 + (7*512)
 pvcreate --metadatasize 128k --dataalignmentoffset 7s $dev1
-check_pv_field_ $dev1 pe_start $pv_align
+check_pv_field_ $dev1 pe_start $pv_align "--units b"
 
 # 2nd metadata area is created without problems when
 # data area start is shifted by the specified alignment_offset
--- LVM2/test/t-topology-support.sh	2010/05/24 19:27:38	1.4
+++ LVM2/test/t-topology-support.sh	2010/08/12 04:11:50	1.5
@@ -57,7 +57,7 @@
 # FIXME add more topology-specific tests and validation (striped LVs, etc)
 
 NUM_DEVS=1
-PER_DEV_SIZE=33
+PER_DEV_SIZE=34
 DEV_SIZE=$(($NUM_DEVS*$PER_DEV_SIZE))
 
 # ---------------------------------------------
--- LVM2/test/t-vgcreate-usage.sh	2010/06/28 20:39:39	1.16
+++ LVM2/test/t-vgcreate-usage.sh	2010/08/12 04:11:50	1.17
@@ -130,11 +130,11 @@
 vgremove -f $vg
 pvremove -f $dev1
 
-# data area is aligned to 64k by default,
+# data area is aligned to 1M by default,
 # data area start is shifted by the specified alignment_offset
-pv_align="195.50k"
+pv_align="1052160B" # 1048576 + (7*512)
 vgcreate -c n --metadatasize 128k --dataalignmentoffset 7s $vg $dev1
-check_pv_field_ $dev1 pe_start $pv_align
+check_pv_field_ $dev1 pe_start $pv_align "--units b"
 vgremove -f $vg
 pvremove -f $dev1
 
--- LVM2/test/t-vgextend-usage.sh	2010/06/30 13:04:59	1.4
+++ LVM2/test/t-vgextend-usage.sh	2010/08/12 04:11:50	1.5
@@ -67,11 +67,11 @@
 vgreduce $vg $dev1
 pvremove -f $dev1
 
-# data area is aligned to 64k by default,
+# data area is aligned to 1M by default,
 # data area start is shifted by the specified alignment_offset
-pv_align="195.50k"
+pv_align="1052160B" # 1048576 + (7*512)
 vgextend --metadatasize 128k --dataalignmentoffset 7s $vg $dev1
-check_pv_field_ $dev1 pe_start $pv_align
+check_pv_field_ $dev1 pe_start $pv_align "--units b"
 vgremove -f $vg
 pvremove -f $dev1
 
--- LVM2/test/t-vgsplit-operation.sh	2010/03/29 16:40:51	1.25
+++ LVM2/test/t-vgsplit-operation.sh	2010/08/12 04:11:50	1.26
@@ -17,7 +17,7 @@
 	LAST_TEST="$@"
 }
 
-prepare_pvs 5 257
+prepare_pvs 5 258
 # FIXME: paramaterize lvm1 vs lvm2 metadata; most of these tests should run
 # fine with lvm1 metadata as well; for now, just add disks 5 and 6 as lvm1
 # metadata
--- LVM2/test/test-utils.sh	2010/08/02 13:18:42	1.47
+++ LVM2/test/test-utils.sh	2010/08/12 04:11:50	1.48
@@ -264,7 +264,7 @@
 	local n="$1"
 	test -z "$n" && n=3
 	local devsize="$2"
-	test -z "$devsize" && devsize=33
+	test -z "$devsize" && devsize=34
 	local pvname="$3"
 	test -z "$pvname" && pvname="pv"
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* LVM2 ./WHATS_NEW doc/example.conf.in lib/confi ...
@ 2010-08-12  4:09 snitzer
  0 siblings, 0 replies; 6+ messages in thread
From: snitzer @ 2010-08-12  4:09 UTC (permalink / raw)
  To: lvm-devel, lvm2-cvs

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	snitzer@sourceware.org	2010-08-12 04:09:00

Modified files:
	.              : WHATS_NEW 
	doc            : example.conf.in 
	lib/config     : defaults.h 
	man            : pvcreate.8.in 
	test           : t-covercmd.sh t-pvcreate-operation.sh 
	                 t-vgcfgbackup-usage.sh 
	tools          : args.h commands.h pvcreate.c 

Log message:
	Require --restorefile when using pvcreate --uuid.
	
	Introduce --norestorefile to allow user to override the new requirement.
	
	This can also be overridden with "devices/require_restorefile_with_uuid"
	in lvm.conf -- however the default is 1.
	
	Signed-off-by: Mike Snitzer <snitzer@redhat.com>

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.1699&r2=1.1700
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/doc/example.conf.in.diff?cvsroot=lvm2&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/config/defaults.h.diff?cvsroot=lvm2&r1=1.63&r2=1.64
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/man/pvcreate.8.in.diff?cvsroot=lvm2&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-covercmd.sh.diff?cvsroot=lvm2&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-pvcreate-operation.sh.diff?cvsroot=lvm2&r1=1.18&r2=1.19
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/test/t-vgcfgbackup-usage.sh.diff?cvsroot=lvm2&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/tools/args.h.diff?cvsroot=lvm2&r1=1.78&r2=1.79
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/tools/commands.h.diff?cvsroot=lvm2&r1=1.153&r2=1.154
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/tools/pvcreate.c.diff?cvsroot=lvm2&r1=1.90&r2=1.91

--- LVM2/WHATS_NEW	2010/08/11 12:14:23	1.1699
+++ LVM2/WHATS_NEW	2010/08/12 04:08:59	1.1700
@@ -1,5 +1,7 @@
 Version 2.02.73 - 
 ================================
+  Add --norestorefile option to pvcreate.
+  Require --restorefile when using pvcreate --uuid.
   Recognise and give preference to md device partitions (blkext major).
   Never scan internal LVM devices.
   Split-mirror operations were ignoring user-specified PVs.
--- LVM2/doc/example.conf.in	2010/07/02 02:09:57	1.5
+++ LVM2/doc/example.conf.in	2010/08/12 04:08:59	1.6
@@ -130,6 +130,9 @@
     # Set this to 1 to skip such devices.  This should only be needed
     # in recovery situations.
     ignore_suspended_devices = 0
+
+    # Allow use of pvcreate --uuid without requiring --restorefile.
+    require_restorefile_with_uuid = 1
 }
 
 # This section that allows you to configure the nature of the
--- LVM2/lib/config/defaults.h	2010/06/30 12:49:28	1.63
+++ LVM2/lib/config/defaults.h	2010/08/12 04:08:59	1.64
@@ -30,6 +30,7 @@
 #define DEFAULT_MD_COMPONENT_DETECTION 1
 #define DEFAULT_MD_CHUNK_ALIGNMENT 1
 #define DEFAULT_IGNORE_SUSPENDED_DEVICES 1
+#define DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID 1
 #define DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION 1
 #define DEFAULT_DATA_ALIGNMENT_DETECTION 1
 
--- LVM2/man/pvcreate.8.in	2010/07/13 15:04:23	1.8
+++ LVM2/man/pvcreate.8.in	2010/08/12 04:09:00	1.9
@@ -17,6 +17,7 @@
 .RB [ \-\-dataalignment alignment ]
 .RB [ \-\-dataalignmentoffset alignment_offset ]
 .RB [ \-\-restorefile file ]
+.RB [ \-\-norestorefile ]
 .RB [ \-\-setphysicalvolumesize size ]
 .RB [ \-u | \-\-uuid uuid ]
 .RB [ \-\-version ]
@@ -60,7 +61,9 @@
 Without this option, \fBpvcreate\fP generates a random uuid.
 All of your physical volumes must have unique uuids.
 You need to use this option before restoring a backup of LVM metadata 
-onto a replacement device - see \fBvgcfgrestore\fP(8).
+onto a replacement device - see \fBvgcfgrestore\fP(8).  As such, use of
+\fB--restorefile\fP is compulsory unless the \fB--norestorefile\fP is
+used.
 .TP
 .BR \-y ", " \-\-yes
 Answer yes to all questions.
@@ -138,6 +141,10 @@
 a mechanism to upgrade the metadata format or to add/remove metadata
 areas. Use with care. See also \fBvgconvert\fP(8).
 .TP
+.BR \-\-norestorefile
+In conjunction with \fB--uuid\fP, this allows a uuid to be specified
+without also requiring that a backup of the metadata be provided.
+.TP
 .BR \-\-labelsector " sector"
 By default the PV is labelled with an LVM2 identifier in its second 
 sector (sector 1).  This lets you use a different sector near the
--- LVM2/test/t-covercmd.sh	2010/06/28 21:49:31	1.8
+++ LVM2/test/t-covercmd.sh	2010/08/12 04:09:00	1.9
@@ -30,7 +30,7 @@
 pvcreate --metadatacopies 0 $dev2
 pvcreate --metadatacopies 0 $dev3
 pvcreate $dev4
-pvcreate -u $TEST_UUID --metadatacopies 0 $dev5
+pvcreate --norestorefile -u $TEST_UUID --metadatacopies 0 $dev5
 vgcreate -c n $vg $devs
 lvcreate -n $lv -l 5 -i5 -I256 $vg
 
--- LVM2/test/t-pvcreate-operation.sh	2010/04/07 14:46:26	1.18
+++ LVM2/test/t-pvcreate-operation.sh	2010/08/12 04:09:00	1.19
@@ -95,17 +95,20 @@
 bogusuuid=fred
 
 # pvcreate rejects uuid option with less than 32 characters
-not pvcreate --uuid $bogusuuid $dev1
+not pvcreate --norestorefile --uuid $bogusuuid $dev1
+
+# pvcreate rejects uuid option without restorefile
+not pvcreate --uuid $uuid1 $dev1
 
 # pvcreate rejects uuid already in use
-pvcreate --uuid $uuid1 $dev1
-not pvcreate --uuid $uuid1 $dev2
+pvcreate --norestorefile --uuid $uuid1 $dev1
+not pvcreate --norestorefile --uuid $uuid1 $dev2
 
 # pvcreate rejects non-existent file given with restorefile
 not pvcreate --uuid $uuid1 --restorefile $backupfile $dev1
 
 # pvcreate rejects restorefile with uuid not found in file
-pvcreate --uuid $uuid1 $dev1
+pvcreate --norestorefile --uuid $uuid1 $dev1
 vgcfgbackup -f $backupfile
 not pvcreate --uuid $uuid2 --restorefile $backupfile $dev2
 
--- LVM2/test/t-vgcfgbackup-usage.sh	2010/04/12 19:00:23	1.3
+++ LVM2/test/t-vgcfgbackup-usage.sh	2010/08/12 04:09:00	1.4
@@ -37,6 +37,6 @@
 pvcreate -ff -y $dev2
 vgcfgbackup -f "$(pwd)/backup.$$" $vg
 sed 's/flags = \[\"MISSING\"\]/flags = \[\]/' "$(pwd)/backup.$$" > "$(pwd)/backup.$$1"
-pvcreate -ff -y -u $pv1_uuid $dev1
-pvcreate -ff -y -u $pv2_uuid $dev2
+pvcreate -ff -y --norestorefile -u $pv1_uuid $dev1
+pvcreate -ff -y --norestorefile -u $pv2_uuid $dev2
 vgcfgrestore -f "$(pwd)/backup.$$1" $vg
--- LVM2/tools/args.h	2010/06/30 20:21:03	1.78
+++ LVM2/tools/args.h	2010/08/12 04:09:00	1.79
@@ -27,6 +27,7 @@
 arg(metadatacopies_ARG, '\0', "metadatacopies", metadatacopies_arg, 0)
 arg(metadatasize_ARG, '\0', "metadatasize", size_mb_arg, 0)
 arg(metadataignore_ARG, '\0', "metadataignore", yes_no_arg, 0)
+arg(norestorefile_ARG, '\0', "norestorefile", NULL, 0)
 arg(restorefile_ARG, '\0', "restorefile", string_arg, 0)
 arg(labelsector_ARG, '\0', "labelsector", int_arg, 0)
 arg(driverloaded_ARG, '\0', "driverloaded", yes_no_arg, 0)
--- LVM2/tools/commands.h	2010/07/07 19:14:58	1.153
+++ LVM2/tools/commands.h	2010/08/12 04:09:00	1.154
@@ -497,6 +497,7 @@
    "Initialize physical volume(s) for use by LVM",
    0,
    "pvcreate " "\n"
+   "\t[--norestorefile]\n"
    "\t[--restorefile file]\n"
    "\t[-d|--debug]" "\n"
    "\t[-f[f]|--force [--force]] " "\n"
@@ -517,8 +518,9 @@
    "\tPhysicalVolume [PhysicalVolume...]\n",
 
    dataalignment_ARG, dataalignmentoffset_ARG, force_ARG, test_ARG,
-   labelsector_ARG, metadatatype_ARG, metadatacopies_ARG, pvmetadatacopies_ARG,
-   metadatasize_ARG, metadataignore_ARG, physicalvolumesize_ARG,
+   labelsector_ARG, metadatatype_ARG, metadatacopies_ARG,
+   metadatasize_ARG, metadataignore_ARG, norestorefile_ARG,
+   physicalvolumesize_ARG, pvmetadatacopies_ARG,
    restorefile_ARG, uuidstr_ARG, yes_ARG, zero_ARG)
 
 xx(pvdata,
--- LVM2/tools/pvcreate.c	2010/03/16 15:48:27	1.90
+++ LVM2/tools/pvcreate.c	2010/08/12 04:09:00	1.91
@@ -36,6 +36,16 @@
 		return 0;
 	}
 
+	if (!arg_count(cmd, restorefile_ARG) && arg_count(cmd, uuidstr_ARG)) {
+		if (!arg_count(cmd, norestorefile_ARG) &&
+		    find_config_tree_bool(cmd,
+					  "devices/require_restorefile_with_uuid",
+					  DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID)) {
+			log_error("--restorefile is required with --uuid");
+			return 0;
+		}
+	}
+
 	if (arg_count(cmd, uuidstr_ARG) && argc != 1) {
 		log_error("Can only set uuid on one volume at once");
 		return 0;


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-01-20 17:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-20 17:00 LVM2 ./WHATS_NEW doc/example.conf.in lib/confi zkabelac
  -- strict thread matches above, loose matches on Subject: below --
2011-04-12 21:59 snitzer
2011-02-27  0:38 agk
2010-08-20 20:59 snitzer
2010-08-12  4:11 snitzer
2010-08-12  4:09 snitzer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).