add utils::split_set and move split_foreach

this also improves the perfromance of utils::split because it no longer uses stringstreams.
2025-05-21 16:52:21 +00:00 · 2020-05-21 23:05:08 +02:00 · 2020-05-21 23:05:08 +02:00 · 89e56d8fb9
commit 89e56d8fb9
parent fa08ff291b
10 changed files with 84 additions and 63 deletions
--- a/src/ai/formula/ai.cpp
+++ b/src/ai/formula/ai.cpp
@ -675,7 +675,7 @@ void formula_ai::on_create(){
 		const t_string &inputs = func["inputs"];
 		const t_string &formula_str = func["formula"];

-		std::vector<std::string> args = utils::split(inputs);
+		std::vector<std::string> args = utils::split(inputs.str());
 		try {
 			add_formula_function(name,
 					     create_optional_formula(formula_str),
--- a/src/config_attribute_value.cpp
+++ b/src/config_attribute_value.cpp
@ -22,6 +22,7 @@

 #include "lexical_cast.hpp"
 #include "log.hpp"
+#include "serialization/string_utils.hpp"
 #include "utils/const_clone.hpp"
 #include "utils/functional.hpp"

@ -425,3 +426,10 @@ std::ostream& operator<<(std::ostream& os, const config_attribute_value& v)
 	// involved.
 	return os << v.value_;
 }
+
+namespace utils
+{
+	std::vector<std::string> split(const config_attribute_value& val) {
+		return utils::split(val.str());
+	}
+}
--- a/src/config_attribute_value.hpp
+++ b/src/config_attribute_value.hpp
@ -240,3 +240,9 @@ private:
 	static const std::string s_yes, s_no;
 	static const std::string s_true, s_false;
 };
+
+namespace utils
+{
+	std::vector<std::string> split(const config_attribute_value& val);
+}
+
--- a/src/game_initialization/depcheck.cpp
+++ b/src/game_initialization/depcheck.cpp
@ -183,7 +183,7 @@ std::vector<std::string> manager::get_required(const elem& e) const
 	config data = depinfo_.find_child(e.type, "id", e.id);

 	if(data.has_attribute("force_modification")) {
-		result = utils::split(data["force_modification"], ',');
+		result = utils::split(data["force_modification"].str(), ',');
 	}

 	return result;
--- a/src/preferences/game.cpp
+++ b/src/preferences/game.cpp
@ -110,8 +110,7 @@ manager::manager() :
 		}
 	}

-	const std::vector<std::string> v (utils::split(preferences::get("encountered_units")));
-	encountered_units_set.insert(v.begin(), v.end());
+	encountered_units_set = utils::split_set(preferences::get("encountered_units"));

 	const t_translation::ter_list terrain (t_translation::read_list(preferences::get("encountered_terrain_list")));
 	encountered_terrains_set.insert(terrain.begin(), terrain.end());
@ -145,8 +144,8 @@ manager::~manager()
 		campaigns.add_child("campaign", cmp);
 	}
 	preferences::set_child("completed_campaigns", campaigns);
-	std::vector<std::string> v (encountered_units_set.begin(), encountered_units_set.end());
-	preferences::set("encountered_units", utils::join(v));
+
+	preferences::set("encountered_units", utils::join(encountered_units_set));
 	t_translation::ter_list terrain (encountered_terrains_set.begin(), encountered_terrains_set.end());
 	preferences::set("encountered_terrain_list", t_translation::write_list(terrain));

--- a/src/scripting/lua_terrainfilter.cpp
+++ b/src/scripting/lua_terrainfilter.cpp
@ -22,6 +22,7 @@
 #include "scripting/lua_common.hpp"
 #include "scripting/push_check.hpp"
 #include "scripting/game_lua_kernel.hpp"
+#include "serialization/string_utils.hpp"

 #include "formula/callable_objects.hpp"
 #include "formula/formula.hpp"
@ -67,39 +68,6 @@ namespace utils {
 }
 //helper functions for parsing
 namespace {
-	bool iswhitespace(char c)
-	{
-		return c == ' ' || c == '\t' || c == '\n' || c == '\r';
-	}
-
-	void trim(string_view& s)
-	{
-		while(!s.empty() && iswhitespace(s.front())) {
-			s.remove_prefix(1);
-		}
-		while(!s.empty() && iswhitespace(s.back())) {
-			s.remove_suffix(1);
-		}
-	}
-
-	template<typename F>
-	void split_foreach(string_view s, char sep, const F& f)
-	{
-		if(s.empty()) {
-			return;
-		}
-		while(true)
-		{
-			int partend = s.find(sep);
-			if(partend == int(string_view::npos)) {
-				break;
-			}
-			f(s.substr(0, partend));
-			s.remove_prefix(partend + 1);
-		}
-		f(s);
-	}
-
 	int atoi(string_view s)
 	{
 		if(s.empty()) {
@ -128,8 +96,7 @@ namespace {
 	dynamic_bitset parse_range(string_view s)
 	{
 		dynamic_bitset res;
-		split_foreach(s, ',', [&](string_view part){
-			trim(part);
+		utils::split_foreach(s, ',', utils::STRIP_SPACES, [&](string_view part){
 			auto pair = parse_single_range(part);
 			int m = std::max(pair.first, pair.second);
 			if(m >= int(res.size())) {
@ -188,8 +155,7 @@ namespace {

 	void parse_rel_sequence(string_view s, offset_list_t& even, offset_list_t& odd)
 	{
-		split_foreach(s, ',', [&](string_view part){
-			trim(part);
+		utils::split_foreach(s, ',', utils::STRIP_SPACES, [&](string_view part){
 			parse_rel(part, even, odd);
 		});
 	}
--- a/src/serialization/schema/tag.cpp
+++ b/src/serialization/schema/tag.cpp
@ -394,7 +394,7 @@ void wml_tag::add_switch(const config& switch_cfg)
 	bool allow_missing = false;
 	for(const auto& case_cfg : switch_cfg.child_range("case")) {
 		if(case_cfg.has_attribute("value")) {
-			const std::vector<std::string> values = utils::split(case_cfg["value"], ',', utils::STRIP_SPACES);
+			const std::vector<std::string> values = utils::split(case_cfg["value"].str(), ',', utils::STRIP_SPACES);
 			config filter;
 			for(const auto& value : values) {
 				// An [or] filter only works if there's something in the main filter.
--- a/src/serialization/schema_validator.cpp
+++ b/src/serialization/schema_validator.cpp
@ -504,7 +504,7 @@ void schema_self_validator::validate_key(const config& cfg, const std::string& n
 			referenced_types_.emplace_back(cfg["link"], file, start_line, tag_name);
 		} else if(tag_name == "link" && name == "name") {
 			referenced_tag_paths_.emplace_back(cfg["name"], file, start_line, tag_name);
-			std::string link_name = utils::split(cfg["name"], '/').back();
+			std::string link_name = utils::split(cfg["name"].str(), '/').back();
 			links_.emplace(current_path() + "/" + link_name, cfg["name"]);
 		} else if(tag_name == "tag" && name == "super") {
 			for(auto super : utils::split(cfg["super"])) {
--- a/src/serialization/string_utils.cpp
+++ b/src/serialization/string_utils.cpp
@ -59,6 +59,17 @@ bool notspace(const char c)
 	return !portable_isspace(c);
 }

+void trim(string_view& s)
+{
+	s.remove_prefix(std::min(s.find_first_not_of(" \t\r\n"), s.size()));
+	if(s.empty()) {
+		return;
+	}
+	//find_last_not_of never returns npos because !s.empty()
+	size_t first_to_trim = s.find_last_not_of(" \t\r\n") + 1;
+	s = s.substr(0, first_to_trim);
+}
+
 /**
 * Splits a (comma-)separated string into a vector of pieces.
 * @param[in]  val    A (comma-)separated string.
@ -67,27 +78,22 @@ bool notspace(const char c)
 *                    This is a bit field with two settings (both on by default):
 *                    REMOVE_EMPTY causes empty pieces to be skipped/removed.
 *                    STRIP_SPACES causes the leading and trailing spaces of each piece to be ignored/stripped.
- *
- *                    Basic method taken from http://stackoverflow.com/a/236803
 */
-std::vector<std::string> split(const std::string& val, const char c, const int flags)
+std::vector<std::string> split(string_view s, const char sep, const int flags)
 {
 	std::vector<std::string> res;
+	split_foreach(s, sep, flags, [&](string_view item) {
+		res.emplace_back(item);
+	});
+	return res;
+}

-	std::stringstream ss;
-	ss.str(val);
-
-	std::string item;
-	while(std::getline(ss, item, c)) {
-		if(flags & STRIP_SPACES) {
-			boost::trim(item);
-		}
-
-		if(!(flags & REMOVE_EMPTY) || !item.empty()) {
-			res.push_back(std::move(item));
-		}
-	}
-
+std::set<std::string> split_set(string_view s, char sep, const int flags)
+{
+	std::set<std::string> res;
+	split_foreach(s, sep, flags, [&](string_view item) {
+		res.emplace(item);
+	});
 	return res;
 }

--- a/src/serialization/string_utils.hpp
+++ b/src/serialization/string_utils.hpp
@ -42,8 +42,44 @@ enum {
 	STRIP_SPACES = 0x02  /** STRIP_SPACES: strips leading and trailing blank spaces. */
 };

+void trim(string_view& s);
+
+template<typename F>
+void split_foreach_impl(string_view s, char sep, const F& f)
+{
+	if(s.empty()) {
+		return;
+	}
+	while(true)
+	{
+		int partend = s.find(sep);
+		if(partend == int(string_view::npos)) {
+			break;
+		}
+		f(s.substr(0, partend));
+		s.remove_prefix(partend + 1);
+	}
+	f(s);
+}
+
+template<typename F>
+void split_foreach(string_view s, char sep, const int flags, const F& f)
+{
+	split_foreach_impl(s, sep, [&](string_view item) {
+		if(flags & STRIP_SPACES) {
+			trim(item);
+		}
+		if(!(flags & REMOVE_EMPTY) || !item.empty()) {
+			f(item);
+		}
+	});
+}
+
+
+
 /** Splits a (comma-)separated string into a vector of pieces. */
-std::vector<std::string> split(const std::string& val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
+std::vector<std::string> split(string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
+std::set<std::string> split_set(string_view val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);

 /**
 * This function is identical to split(), except it does not split when it otherwise would if the