From 4ff1e96892fff90c317c5d46ea47d3d91c02548a Mon Sep 17 00:00:00 2001 From: SomeTroglodyte <63000004+SomeTroglodyte@users.noreply.github.com> Date: Wed, 29 Apr 2020 18:02:48 +0200 Subject: [PATCH] Make tr() a little faster (#2538) * Make tr() a little faster * Make tr() a little faster - more consistent whitespace * Make tr() a little faster - fix translation file generator * Make tr() a little faster - adapted unit tests --- .../models/translations/TranslationEntry.kt | 15 +++-- .../translations/TranslationFileWriter.kt | 15 ++++- .../unciv/models/translations/Translations.kt | 59 +++++++++++++------ .../src/com/unciv/testing/TranslationTests.kt | 36 +++++++++-- 4 files changed, 92 insertions(+), 33 deletions(-) diff --git a/core/src/com/unciv/models/translations/TranslationEntry.kt b/core/src/com/unciv/models/translations/TranslationEntry.kt index 33dd9472..64e7bc68 100644 --- a/core/src/com/unciv/models/translations/TranslationEntry.kt +++ b/core/src/com/unciv/models/translations/TranslationEntry.kt @@ -4,11 +4,10 @@ import java.util.HashMap class TranslationEntry(val entry: String) : HashMap() { - /** For memory performance on .tr(), which was atrociously memory-expensive */ - var entryWithShortenedSquareBrackets ="" - - init { - if(entry.contains('[')) - entryWithShortenedSquareBrackets=entry.replace(squareBraceRegex,"[]") - } -} \ No newline at end of file + // Now stored in the key of the hashmap storing the instances of this class +// /** For memory performance on .tr(), which was atrociously memory-expensive */ +// val entryWithShortenedSquareBrackets = +// if (entry.contains('[')) +// entry.replace(squareBraceRegex,"[]") +// else "" +} diff --git a/core/src/com/unciv/models/translations/TranslationFileWriter.kt b/core/src/com/unciv/models/translations/TranslationFileWriter.kt index 5e9ae8de..40b391a8 100644 --- a/core/src/com/unciv/models/translations/TranslationFileWriter.kt +++ b/core/src/com/unciv/models/translations/TranslationFileWriter.kt @@ -76,14 +76,22 @@ object TranslationFileWriter { } val translationKey = line.split(" = ")[0].replace("\\n", "\n") + val hashMapKey = if (translationKey.contains('[')) + translationKey.replace(squareBraceRegex,"[]") + else translationKey var translationValue = "" - val translationEntry = translations[translationKey] + val translationEntry = translations[hashMapKey] if (translationEntry != null && translationEntry.containsKey(language)) { translationValue = translationEntry[language]!! translationsOfThisLanguage++ } else stringBuilder.appendln(" # Requires translation!") + // Testing assertion only + if (translationEntry != null && translationEntry.entry != translationKey) { + println("Assertion failure in generateTranslationFiles: translationEntry.entry != translationKey") + } + val lineToWrite = translationKey.replace("\n", "\\n") + " = " + translationValue.replace("\n", "\\n") stringBuilder.appendln(lineToWrite) @@ -144,7 +152,10 @@ object TranslationFileWriter { val jsonParser = JsonParser() val folderHandler = if(modFolder!=null) getFileHandle(modFolder,"jsons") else getFileHandle(modFolder, "jsons/Civ V - Vanilla") - val listOfJSONFiles = folderHandler.list{file -> file.name.endsWith(".json", true)} + val listOfJSONFiles = folderHandler + .list{file -> file.name.endsWith(".json", true)} + .sortedBy { it.name() } // generatedStrings maintains order, so let's feed it a predictable one + for (jsonFile in listOfJSONFiles) { val filename = jsonFile.nameWithoutExtension() diff --git a/core/src/com/unciv/models/translations/Translations.kt b/core/src/com/unciv/models/translations/Translations.kt index 6c3014a2..25031e24 100644 --- a/core/src/com/unciv/models/translations/Translations.kt +++ b/core/src/com/unciv/models/translations/Translations.kt @@ -52,13 +52,18 @@ class Translations : LinkedHashMap(){ } for (translation in languageTranslations) { - if (!containsKey(translation.key)) - this[translation.key] = TranslationEntry(translation.key) + // The key for placeholder-containing entries was unused before, let's make it useful + // What was previously stored as entryWithShortenedSquareBrackets is now the key + val hashKey = if (translation.key.contains('[')) + translation.key.replace(squareBraceRegex,"[]") + else translation.key + if (!containsKey(hashKey)) + this[hashKey] = TranslationEntry(translation.key) // why not in one line, Because there were actual crashes. // I'm pretty sure I solved this already, but hey double-checking doesn't cost anything. - val entry = this[translation.key] - if(entry!=null) entry[language] = translation.value + val entry = this[hashKey] + if (entry!=null) entry[language] = translation.value } val translationFilesTime = System.currentTimeMillis() - translationStart @@ -137,14 +142,34 @@ class Translations : LinkedHashMap(){ } } -val squareBraceRegex = Regex("\\[(.*?)\\]") // we don't need to allocate different memory for this every time we .tr() -val eitherSquareBraceRegex=Regex("\\[|\\]") +// we don't need to allocate different memory for these every time we .tr() - or recompile them. + + // Expect a literal [ followed by a captured () group and a literal ]. + // The group may contain any number of any character except ] - pattern [^]] +val squareBraceRegex = Regex("""\[([^]]*)]""") + + // Just look for either [ or ] +val eitherSquareBraceRegex = Regex("""\[|]""") + + // Analogous as above: Expect a {} pair with any chars but } in between and capture that +val curlyBraceRegex = Regex("""\{([^}]*)}""") + fun String.tr(): String { + // Latest optimizations: cached precompiled curlyBraceRegex and converted loop-based placeholder + // lookup into hash key lookup. Test result: 50s -> 24s + // *Discarded optimizations*: + // String.indexOf(Char) looks like it might be better than String.contains(String), + // (Char test should be cheaper than string comparison, and contains delegates to indexOf anyway) + // but measurements are equal within margin of error. + // Making our get() overload do two hash lookups instead of up to five: Zero difference. + // val entry: TranslationEntry = this[text] ?: return text + // return entry[language] ?: text + // THIS IS INCREDIBLY INEFFICIENT and causes loads of memory problems! - if(contains("[")){ // Placeholders! + if (contains("[")) { // Placeholders! /** * I'm SURE there's an easier way to do this but I can't think of it =\ * So what's all this then? @@ -158,12 +183,12 @@ fun String.tr(): String { * We will find the german placeholder text, and replace the placeholders with what was filled in the text we got! */ - val translationStringWithSquareBracketsOnly = replace(squareBraceRegex,"[]") + // Convert "work on [building] has completed in [city]" to "work on [] has completed in []" + val translationStringWithSquareBracketsOnly = this.replace(squareBraceRegex,"[]") + // That is now the key into the translation HashMap! + val translationEntry = UncivGame.Current.translations[translationStringWithSquareBracketsOnly] - val translationEntry = UncivGame.Current.translations.values - .firstOrNull { translationStringWithSquareBracketsOnly == it.entryWithShortenedSquareBrackets } - - if(translationEntry==null || + if (translationEntry==null || !translationEntry.containsKey(UncivGame.Current.settings.language)){ // Translation placeholder doesn't exist for this language, default to English return this.replace(eitherSquareBraceRegex,"") @@ -171,18 +196,18 @@ fun String.tr(): String { val termsInMessage = squareBraceRegex.findAll(this).map { it.groups[1]!!.value }.toList() val termsInTranslationPlaceholder = squareBraceRegex.findAll(translationEntry.entry).map { it.value }.toList() - if(termsInMessage.size!=termsInTranslationPlaceholder.size) + if (termsInMessage.size!=termsInTranslationPlaceholder.size) throw Exception("Message $this has a different number of terms than the placeholder $translationEntry!") var languageSpecificPlaceholder = translationEntry[UncivGame.Current.settings.language]!! - for(i in termsInMessage.indices){ + for (i in termsInMessage.indices) { languageSpecificPlaceholder = languageSpecificPlaceholder.replace(termsInTranslationPlaceholder[i], termsInMessage[i].tr()) } - return languageSpecificPlaceholder.tr() + return languageSpecificPlaceholder // every component is already translated } - if(contains("{")){ // sentence - return Regex("\\{(.*?)\\}").replace(this) { it.groups[1]!!.value.tr() } + if (contains("{")) { // sentence + return curlyBraceRegex.replace(this) { it.groups[1]!!.value.tr() } } return UncivGame.Current.translations diff --git a/tests/src/com/unciv/testing/TranslationTests.kt b/tests/src/com/unciv/testing/TranslationTests.kt index c09e27d9..451376d5 100644 --- a/tests/src/com/unciv/testing/TranslationTests.kt +++ b/tests/src/com/unciv/testing/TranslationTests.kt @@ -46,11 +46,13 @@ class TranslationTests { } private fun allStringAreTranslated(strings: Set): Boolean { + val squareBraceRegex = Regex("""\[([^]]*)]""") var allStringsHaveTranslation = true - for (key in strings) { + for (entry in strings) { + val key = if(entry.contains('[')) entry.replace(squareBraceRegex,"[]") else entry if (!translations.containsKey(key)) { allStringsHaveTranslation = false - println(key) + println(entry) } } return allStringsHaveTranslation @@ -72,12 +74,14 @@ class TranslationTests { var allTranslationsHaveCorrectPlaceholders = true val languages = translations.getLanguages() for (key in translations.keys) { - val placeholders = placeholderPattern.findAll(key).map { it.value }.toList() + val translationEntry = translations[key]!!.entry + val placeholders = placeholderPattern.findAll(translationEntry).map { it.value }.toList() for (language in languages) { for (placeholder in placeholders) { - if (!translations.get(key, language).contains(placeholder)) { + val output = translations.get(translationEntry, language) + if (!output.contains(placeholder)) { allTranslationsHaveCorrectPlaceholders = false - println("Placeholder `$placeholder` not found in `$language` for key `$key`") + println("Placeholder `$placeholder` not found in `$language` for entry `$translationEntry`") } } } @@ -88,6 +92,26 @@ class TranslationTests { ) } + @Test + fun allPlaceholderKeysMatchEntry() { + val squareBraceRegex = Regex("""\[([^]]*)]""") + var allPlaceholderKeysMatchEntry = true + for (key in translations.keys) { + if ( !key.contains('[') ) continue + val translationEntry = translations[key]!!.entry + val keyFromEntry = translationEntry.replace(squareBraceRegex,"[]") + if (key != keyFromEntry) { + allPlaceholderKeysMatchEntry = false + println("Entry $translationEntry found under bad key $key") + break + } + } + Assert.assertTrue( + "This test will only pass when all placeholder translations'keys match their entry with shortened placeholders", + allPlaceholderKeysMatchEntry + ) + } + @Test fun allTranslationsEndWithASpace() { val templateLines = Gdx.files.internal(TranslationFileWriter.templateFileLocation).reader().readLines() @@ -100,4 +124,4 @@ class TranslationTests { } Assert.assertFalse(failed) } -} \ No newline at end of file +}