Skip to content

Commit

Permalink
Add Catalog::UsesSymbolicIDsForSource()
Browse files Browse the repository at this point in the history
Detect use of symbolic identifiers (or keys) for msgid. Gettext is
sometimes misused this way and it is prevalent in JSON.
  • Loading branch information
vslavik committed Mar 21, 2023
1 parent 69c7cfb commit 7053a1f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
49 changes: 38 additions & 11 deletions src/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ wxString FixBrokenSearchPathValue(wxString p)
return p;
}

// Detect whether source strings are just IDs instead of actual text
bool DetectUseOfSymbolicIDs(Catalog& cat)
{
// Employ a simple heuristic: IDs won't contain whitespace.
// This is not enough as is, because some (notably Asian) languages don't use
// whitespace, so also check for use of ASCII characters only. Typical non-symbolic
// files will fail at least one of the tests in most of their strings.
//
for (auto& i: cat.items())
{
for (auto c: i->GetString())
{
if (c == ' ' || c >= 0x80)
return false;
}
}

wxLogTrace("poedit", "detected use of symbolic IDs for source language");
return true;
}

} // anonymous namespace


Expand Down Expand Up @@ -1088,18 +1109,24 @@ void Catalog::PostCreation()
{
if (!m_sourceLanguage.IsValid())
{
// detect source language from the text (ignoring plurals for simplicity,
// as we don't need 100% of the text):
wxString allText;
for (auto& i: items())
{
allText.append(i->GetString());
allText.append('\n');
}
if (!allText.empty())
if (!m_sourceIsSymbolicID)
m_sourceIsSymbolicID = DetectUseOfSymbolicIDs(*this);

if (!m_sourceIsSymbolicID)
{
m_sourceLanguage = Language::TryDetectFromText(allText.utf8_str());
wxLogTrace("poedit", "detected source language is '%s'", m_sourceLanguage.Code());
// detect source language from the text (ignoring plurals for simplicity,
// as we don't need 100% of the text):
wxString allText;
for (auto& i: items())
{
allText.append(i->GetString());
allText.append('\n');
}
if (!allText.empty())
{
m_sourceLanguage = Language::TryDetectFromText(allText.utf8_str());
wxLogTrace("poedit", "detected source language is '%s'", m_sourceLanguage.Code());
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,9 @@ class Catalog

/// Change the catalog's language and update headers accordingly
virtual void SetLanguage(Language lang);

/// Whether source text is just symbolic identifier and not actual text
bool UsesSymbolicIDsForSource() const { return m_sourceIsSymbolicID; }

/// Returns true if the catalog contains obsolete entries (~.*)
virtual bool HasDeletedItems() const = 0;
Expand Down Expand Up @@ -598,6 +601,7 @@ class Catalog
wxString m_fileName;
HeaderData m_header;
Language m_sourceLanguage;
bool m_sourceIsSymbolicID = false;

std::shared_ptr<CloudSyncDestination> m_cloudSync;
};
Expand Down

0 comments on commit 7053a1f

Please sign in to comment.