From 14645e7643103a0ce9eccbc3a44773e76b4674a9 Mon Sep 17 00:00:00 2001
From: Thomas Bellman <bellman@nsc.liu.se>
Date: Fri, 12 Aug 2022 19:15:08 +0200
Subject: [PATCH] Add function llnl_hostlist_expand().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function takes an "LLNL hostlist" as parameter, and returns the
expansion of it as a list of the items the parameter represents.

An LLNL hostlist is a compact string representation for a set of many
items (typically host names), in particular when the items are names
in combination with one or more sequence numbers.  A few examples:

    LLNL hostlist: foo[8-11]
    Meaning:       foo8, foo9, foo10, foo11

    LLNL hostlist: rack[01-3]-node[01-2,5]
    Meaning:       rack01-node01, rack01-node02, rack01-node5,
                   rack02-node01, rack02-node02, rack02-node5,
                   rack03-node01, rack03-node02, rack03-node5

    LLNL hostlist: d[1-5],n[01-05]
    Meaning:       d1, d2, d3, d4, d5, n01, n02, n03, n04, n05

This syntax is used by several programs originating at the Lawrence
Livermore National Laboratory, used for dealing with HPC clusters,
e.g. the batch queue system SLURM, or the pdsh program for running
a command on many nodes using ssh in parallel.

The implementation of this function is a pretty straight translation
of the "python-hostlist" Python module into Ruby.  Python-hostlist is
written mainly by Kent Engström at National Supercomputer Centre (NSC)
in Sweden.  It is free software, released under the GNU Public License
version 2 or later; thus we can release it here under GPL v3+.  Python-
hostlist can be found at https://www.nsc.liu.se/~kent/python-hostlist/,
with Git repository at git://www.nsc.liu.se/~kent/python-hostlist.git.

There are half a dozen people who have made contributions to the
python-hostlist package, but the only ones who have touched the code
doing the actual parsing and expansion of LLNL hostlist are Kent and
Torbjörn Lönnemark, thus they are the only ones included in the list
of copyright holders at the beginning of the file (plus me, who have
done the translation into Ruby).

A difference from the python-hostlist function, is that this version
*always* sorts the result, and *always* removes duplicates.  No option
is offered for those, as I believe there are no real use-cases for not
sorting or for keeping duplicates.
---
 .../parser/functions/llnl_hostlist_expand.rb  | 218 ++++++++++++++++++
 1 file changed, 218 insertions(+)
 create mode 100644 lib/puppet/parser/functions/llnl_hostlist_expand.rb

diff --git a/lib/puppet/parser/functions/llnl_hostlist_expand.rb b/lib/puppet/parser/functions/llnl_hostlist_expand.rb
new file mode 100644
index 0000000..9699bc4
--- /dev/null
+++ b/lib/puppet/parser/functions/llnl_hostlist_expand.rb
@@ -0,0 +1,218 @@
+#	-*- coding: utf-8 -*-
+#
+# Copyright (C) 2008-2022
+#		Thomas Bellman, National Supercomputer Centre, Sweden
+#		Kent Engström, National Supercomputer Centre, Sweden
+#		Torbjörn Lönnemark, National Supercomputer Centre, Sweden
+#
+# Licensed under the GNU GPL v3+; see the README file for more information.
+#
+# This implementation is heavily based on the python-hostlist Python
+# module by the above people (mostly Kent Engström).  It can be found
+# at https://www.nsc.liu.se/~kent/python-hostlist/.
+
+
+module Puppet::Parser::Functions
+    newfunction(:llnl_hostlist_expand, :type => :rvalue, :doc => "\
+	Expand an LLNL hostlist expression into a list of individual names.
+
+	(Note: While this talks about 'hosts', there is no connection with
+	e.g. DNS.  These are just strings, and you can use them to name
+	anything or nothing.)
+
+	A couple of examples probably is the easiest way to explain:
+
+	    llnl_hostlist_expand('n[8-11]')
+	      ==>  ['n8', 'n9', 'n10', 'n11']
+
+	    llnl_hostlist_expand('n[008-11]')
+	      ==>  ['n008', 'n009', 'n010', 'n011']
+
+	    llnl_hostlist_expand('n[6-8,01-3]')
+	      ==>  ['n01', 'n02', 'n03', 'n6', 'n7', 'n8']
+
+	    llnl_hostlist_expand('n[1-3]b,x[1-3].[07-08],a[30,20,10]')
+	      ==>  ['a10', 'a20', 'a30', 'n1b', 'n2b', 'n3b',
+		    'x1.07', 'x1.08', 'x2.07', 'x2.08', 'x3.07', 'x3.08']
+
+	The resulting lists are always sorted in a \"natural\" order, and
+	duplicates are removed.
+
+	This syntax is used by several programs originating at the Lawrence
+	Livermore National Laboratory, e.g. SLURM and pdsh.
+    ") \
+    do |args|
+	if args.length != 1
+	    raise(Puppet::ParseError,
+		  "llnl_hostlist_expand(): Wrong number of arguments")
+	end
+	hostlist = args[0]
+	NSC_Utils::llnl_hostlist_expand(hostlist)
+    end
+end
+
+
+# Helper functions, doing the actua work
+module NSC_Utils
+
+    # Guard against ridiculously long expanded lists
+    LLNL_HOSTLIST_MAXSIZE = 100000
+
+    # Exception raised for bad hostlists
+    class BadLLNLHostlist < RuntimeError; end
+
+
+    # Expand a hostlist expression string to a Python list.
+    #
+    # Example: expand_hostlist("n[9-11],d[01-02]") ==>
+    #          ['n9', 'n10', 'n11', 'd01', 'd02']
+    #
+    # Duplicates will be removed, and the results will be sorted.
+    #
+    def llnl_hostlist_expand(hostlist)
+
+	results = []
+	bracket_level = 0
+	part = ""
+
+	(hostlist+",").each_char do |c|
+	    if c == "," && bracket_level == 0
+		# Comma at top level, split!
+		if part != ""
+		    results += NSC_Utils::__hostlist_expand_part(part)
+		end
+		part = ""
+	    else
+		part += c
+	    end
+
+	    if c == "["
+		bracket_level += 1
+	    elsif c == "]"
+		bracket_level -= 1
+	    end
+
+	    if bracket_level > 1
+		raise(NSC_Utils::BadLLNLHostlist, "nested brackets")
+	    elsif bracket_level < 0
+		raise(NSC_Utils::BadLLNLHostlist, "unbalanced brackets")
+	    end
+
+	end
+
+	if bracket_level > 0
+	    raise(NSC_Utils::BadLLNLHostlist, "unbalanced brackets")
+	end
+
+	results.uniq!
+
+	# Sort the results in a "natural" order, making sure that e.g.
+	# "n9" comes before "n10".
+	#
+	# Split names into a list of alternating numerical (decimal) and
+	# non-numerical parts, convert the numerical parts into Intgers,
+	# and compare the lists.  This splitting will result in an empty
+	# string first if the name starts with a number, which means that
+	# the list comparison will always compare elements of equal types.
+	#
+	# Converting all elements once before sorting, and then back after,
+	# is significantly faster than calling sort with a comparison block
+	# which does the splitting "on demand" for each comparison.
+	#
+	results.collect! { |name|
+	    name.split(/([0-9]+)/).collect { |part|
+		/^[0-9]+$/ =~ part ? part.to_i(10) : part
+	    }
+	}
+	results.sort!
+	results.collect! { |partlist| partlist.join("") }
+
+	return results
+
+    end
+    module_function :llnl_hostlist_expand
+
+
+    # Expand a part (e.g. "x[1-2]y[1-3][1-3]") (no outer level commas).
+    #
+    def __hostlist_expand_part(s)
+
+	# Base case: the empty part expand to the singleton list of ""
+	return [""]  if s == ""
+
+	# Split into:
+	# 1) prefix string (may be empty)
+	# 2) rangelist in brackets (may be missing)
+	# 3) the rest
+
+	/([^,\[]*)(\[[^\]]*\])?(.*)/ =~ s
+	prefix, rangelist, rest = $1, $2, $3
+
+	# Expand the rest first (here is where we recurse!)
+	rest_expanded = NSC_Utils::__hostlist_expand_part(rest)
+
+	# Expand our own part
+	if rangelist.nil?
+	    # If there is no rangelist, our own contribution is the prefix only
+	    us_expanded = [prefix]
+	else
+	    # Otherwise expand the rangelist (adding the prefix before)
+	    us_expanded = NSC_Utils::__hostlist_expand_rangelist(
+		prefix, rangelist[1..-2])
+	end
+
+	# Combine our list with the list from the expansion of the rest
+	# (but guard against too large results first)
+	if us_expanded.length * rest_expanded.length > NSC_Utils::LLNL_HOSTLIST_MAXSIZE
+	    raise(NSC_Utils::BadLLNLHostlist, "results too large")
+	end
+
+	result = us_expanded.product(rest_expanded).collect {
+	    |us_part, rest_part| us_part + rest_part
+	}
+	return result
+
+    end
+    module_function :__hostlist_expand_part
+
+
+    # Expand a rangelist (e.g. "1-10,14"), putting a prefix before.
+    #
+    def __hostlist_expand_rangelist(prefix, rangelist)
+	# Split at commas and expand each range separately
+	results = []
+	rangelist.split(",").each do |range_|
+	    results += NSC_Utils::__hostlist_expand_range(prefix, range_)
+	end
+	return results
+    end
+    module_function :__hostlist_expand_rangelist
+
+
+    # Expand a range (e.g. 1-10 or 14), putting a prefix before.
+    #
+    def __hostlist_expand_range(prefix, range_)
+
+	# Check for a single number first
+	return [ prefix + range_ ]  if /^[0-9]+$/ =~ range_
+
+	# Otherwise split low-high
+	if /^([0-9]+)-([0-9]+)$/ !~ range_
+	    raise(NSC_Utils::BadLLNLHostlist, "bad range")
+	end
+	width = $1.length
+	low, high = $1.to_i, $2.to_i
+	if high <low
+	    raise(NSC_Utils::BadLLNLHostlist, "start > stop")
+	elsif high - low > NSC_Utils::LLNL_HOSTLIST_MAXSIZE
+	    raise(NSC_Utils::BadLLNLHostlist, "range too large")
+	end
+
+	results = (low .. high).collect { |i|
+	    "%s%0*d" % [ prefix, width, i ]
+	}
+	return results
+    end
+    module_function :__hostlist_expand_range
+
+end
-- 
GitLab