Make tr() a little faster (#2538)

* Make tr() a little faster

* Make tr() a little faster - more consistent whitespace

* Make tr() a little faster - fix translation file generator

* Make tr() a little faster - adapted unit tests
This commit is contained in:
SomeTroglodyte 2020-04-29 18:02:48 +02:00 committed by GitHub
parent cee834b062
commit 4ff1e96892
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 33 deletions

View file

@ -4,11 +4,10 @@ import java.util.HashMap
class TranslationEntry(val entry: String) : HashMap<String, String>() {
/** For memory performance on .tr(), which was atrociously memory-expensive */
var entryWithShortenedSquareBrackets =""
init {
if(entry.contains('['))
entryWithShortenedSquareBrackets=entry.replace(squareBraceRegex,"[]")
}
}
// Now stored in the key of the hashmap storing the instances of this class
// /** For memory performance on .tr(), which was atrociously memory-expensive */
// val entryWithShortenedSquareBrackets =
// if (entry.contains('['))
// entry.replace(squareBraceRegex,"[]")
// else ""
}

View file

@ -76,14 +76,22 @@ object TranslationFileWriter {
}
val translationKey = line.split(" = ")[0].replace("\\n", "\n")
val hashMapKey = if (translationKey.contains('['))
translationKey.replace(squareBraceRegex,"[]")
else translationKey
var translationValue = ""
val translationEntry = translations[translationKey]
val translationEntry = translations[hashMapKey]
if (translationEntry != null && translationEntry.containsKey(language)) {
translationValue = translationEntry[language]!!
translationsOfThisLanguage++
} else stringBuilder.appendln(" # Requires translation!")
// Testing assertion only
if (translationEntry != null && translationEntry.entry != translationKey) {
println("Assertion failure in generateTranslationFiles: translationEntry.entry != translationKey")
}
val lineToWrite = translationKey.replace("\n", "\\n") +
" = " + translationValue.replace("\n", "\\n")
stringBuilder.appendln(lineToWrite)
@ -144,7 +152,10 @@ object TranslationFileWriter {
val jsonParser = JsonParser()
val folderHandler = if(modFolder!=null) getFileHandle(modFolder,"jsons")
else getFileHandle(modFolder, "jsons/Civ V - Vanilla")
val listOfJSONFiles = folderHandler.list{file -> file.name.endsWith(".json", true)}
val listOfJSONFiles = folderHandler
.list{file -> file.name.endsWith(".json", true)}
.sortedBy { it.name() } // generatedStrings maintains order, so let's feed it a predictable one
for (jsonFile in listOfJSONFiles)
{
val filename = jsonFile.nameWithoutExtension()

View file

@ -52,13 +52,18 @@ class Translations : LinkedHashMap<String, TranslationEntry>(){
}
for (translation in languageTranslations) {
if (!containsKey(translation.key))
this[translation.key] = TranslationEntry(translation.key)
// The key for placeholder-containing entries was unused before, let's make it useful
// What was previously stored as entryWithShortenedSquareBrackets is now the key
val hashKey = if (translation.key.contains('['))
translation.key.replace(squareBraceRegex,"[]")
else translation.key
if (!containsKey(hashKey))
this[hashKey] = TranslationEntry(translation.key)
// why not in one line, Because there were actual crashes.
// I'm pretty sure I solved this already, but hey double-checking doesn't cost anything.
val entry = this[translation.key]
if(entry!=null) entry[language] = translation.value
val entry = this[hashKey]
if (entry!=null) entry[language] = translation.value
}
val translationFilesTime = System.currentTimeMillis() - translationStart
@ -137,14 +142,34 @@ class Translations : LinkedHashMap<String, TranslationEntry>(){
}
}
val squareBraceRegex = Regex("\\[(.*?)\\]") // we don't need to allocate different memory for this every time we .tr()
val eitherSquareBraceRegex=Regex("\\[|\\]")
// we don't need to allocate different memory for these every time we .tr() - or recompile them.
// Expect a literal [ followed by a captured () group and a literal ].
// The group may contain any number of any character except ] - pattern [^]]
val squareBraceRegex = Regex("""\[([^]]*)]""")
// Just look for either [ or ]
val eitherSquareBraceRegex = Regex("""\[|]""")
// Analogous as above: Expect a {} pair with any chars but } in between and capture that
val curlyBraceRegex = Regex("""\{([^}]*)}""")
fun String.tr(): String {
// Latest optimizations: cached precompiled curlyBraceRegex and converted loop-based placeholder
// lookup into hash key lookup. Test result: 50s -> 24s
// *Discarded optimizations*:
// String.indexOf(Char) looks like it might be better than String.contains(String),
// (Char test should be cheaper than string comparison, and contains delegates to indexOf anyway)
// but measurements are equal within margin of error.
// Making our get() overload do two hash lookups instead of up to five: Zero difference.
// val entry: TranslationEntry = this[text] ?: return text
// return entry[language] ?: text
// THIS IS INCREDIBLY INEFFICIENT and causes loads of memory problems!
if(contains("[")){ // Placeholders!
if (contains("[")) { // Placeholders!
/**
* I'm SURE there's an easier way to do this but I can't think of it =\
* So what's all this then?
@ -158,12 +183,12 @@ fun String.tr(): String {
* We will find the german placeholder text, and replace the placeholders with what was filled in the text we got!
*/
val translationStringWithSquareBracketsOnly = replace(squareBraceRegex,"[]")
// Convert "work on [building] has completed in [city]" to "work on [] has completed in []"
val translationStringWithSquareBracketsOnly = this.replace(squareBraceRegex,"[]")
// That is now the key into the translation HashMap!
val translationEntry = UncivGame.Current.translations[translationStringWithSquareBracketsOnly]
val translationEntry = UncivGame.Current.translations.values
.firstOrNull { translationStringWithSquareBracketsOnly == it.entryWithShortenedSquareBrackets }
if(translationEntry==null ||
if (translationEntry==null ||
!translationEntry.containsKey(UncivGame.Current.settings.language)){
// Translation placeholder doesn't exist for this language, default to English
return this.replace(eitherSquareBraceRegex,"")
@ -171,18 +196,18 @@ fun String.tr(): String {
val termsInMessage = squareBraceRegex.findAll(this).map { it.groups[1]!!.value }.toList()
val termsInTranslationPlaceholder = squareBraceRegex.findAll(translationEntry.entry).map { it.value }.toList()
if(termsInMessage.size!=termsInTranslationPlaceholder.size)
if (termsInMessage.size!=termsInTranslationPlaceholder.size)
throw Exception("Message $this has a different number of terms than the placeholder $translationEntry!")
var languageSpecificPlaceholder = translationEntry[UncivGame.Current.settings.language]!!
for(i in termsInMessage.indices){
for (i in termsInMessage.indices) {
languageSpecificPlaceholder = languageSpecificPlaceholder.replace(termsInTranslationPlaceholder[i], termsInMessage[i].tr())
}
return languageSpecificPlaceholder.tr()
return languageSpecificPlaceholder // every component is already translated
}
if(contains("{")){ // sentence
return Regex("\\{(.*?)\\}").replace(this) { it.groups[1]!!.value.tr() }
if (contains("{")) { // sentence
return curlyBraceRegex.replace(this) { it.groups[1]!!.value.tr() }
}
return UncivGame.Current.translations

View file

@ -46,11 +46,13 @@ class TranslationTests {
}
private fun allStringAreTranslated(strings: Set<String>): Boolean {
val squareBraceRegex = Regex("""\[([^]]*)]""")
var allStringsHaveTranslation = true
for (key in strings) {
for (entry in strings) {
val key = if(entry.contains('[')) entry.replace(squareBraceRegex,"[]") else entry
if (!translations.containsKey(key)) {
allStringsHaveTranslation = false
println(key)
println(entry)
}
}
return allStringsHaveTranslation
@ -72,12 +74,14 @@ class TranslationTests {
var allTranslationsHaveCorrectPlaceholders = true
val languages = translations.getLanguages()
for (key in translations.keys) {
val placeholders = placeholderPattern.findAll(key).map { it.value }.toList()
val translationEntry = translations[key]!!.entry
val placeholders = placeholderPattern.findAll(translationEntry).map { it.value }.toList()
for (language in languages) {
for (placeholder in placeholders) {
if (!translations.get(key, language).contains(placeholder)) {
val output = translations.get(translationEntry, language)
if (!output.contains(placeholder)) {
allTranslationsHaveCorrectPlaceholders = false
println("Placeholder `$placeholder` not found in `$language` for key `$key`")
println("Placeholder `$placeholder` not found in `$language` for entry `$translationEntry`")
}
}
}
@ -88,6 +92,26 @@ class TranslationTests {
)
}
@Test
fun allPlaceholderKeysMatchEntry() {
val squareBraceRegex = Regex("""\[([^]]*)]""")
var allPlaceholderKeysMatchEntry = true
for (key in translations.keys) {
if ( !key.contains('[') ) continue
val translationEntry = translations[key]!!.entry
val keyFromEntry = translationEntry.replace(squareBraceRegex,"[]")
if (key != keyFromEntry) {
allPlaceholderKeysMatchEntry = false
println("Entry $translationEntry found under bad key $key")
break
}
}
Assert.assertTrue(
"This test will only pass when all placeholder translations'keys match their entry with shortened placeholders",
allPlaceholderKeysMatchEntry
)
}
@Test
fun allTranslationsEndWithASpace() {
val templateLines = Gdx.files.internal(TranslationFileWriter.templateFileLocation).reader().readLines()
@ -100,4 +124,4 @@ class TranslationTests {
}
Assert.assertFalse(failed)
}
}
}