From 05dba7ad86e4bc5ddde3fcfb3faca2c4458c676b Mon Sep 17 00:00:00 2001 From: Mike Kaganski Date: Tue, 28 Jul 2020 17:52:14 +0300 Subject: [PATCH] Restructure discovery to have less apps The apps are not meant to represent MIME types of respective single file extensions; rather, they represent application/module that handles several extensions. So this groups extensions under modules (writer/calc/...). This is required for some WOPI hosts that whitelist discovery data on per-app base. The old list of MIME-type-based apps is kept for compatibility with existing integrations, until they are fixed to use new-style discovery. Extensions are removed from legacy part, to avoid duplicating actions. This also hardcodes content types, to avoid repeated parsing of discovery.xml. lint-discovery.py is updated to process new-style information (ignores legacy part). Change-Id: Ib8d3518f00510cd0788314d8a9da9a286a52e0ba Reviewed-on: https://gerrit.libreoffice.org/c/online/+/99637 Tested-by: Jenkins Tested-by: Jenkins CollaboraOffice Reviewed-by: Mike Kaganski --- discovery.xml | 315 ++++++++++++++++++++++++++++++------------ wsd/LOOLWSD.cpp | 158 ++++++++++++++++++--- wsd/lint-discovery.py | 277 ++++++++++++++++++++++--------------- 3 files changed, 528 insertions(+), 222 deletions(-) diff --git a/discovery.xml b/discovery.xml index b0ea602d4d5a..22b284621b4b 100644 --- a/discovery.xml +++ b/discovery.xml @@ -1,314 +1,451 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index 806c22a0d0db..a901f08b88a9 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -57,6 +57,7 @@ #include #include #include +#include #if !MOBILEAPP @@ -2828,26 +2829,143 @@ class ClientRequestDispatcher : public SimpleSocketHandler static std::string getContentType(const std::string& fileName) { - const std::string nodePath = Poco::format("//[@ext='%s']", Poco::Path(fileName).getExtension()); - std::string discPath = Path(Application::instance().commandPath()).parent().toString() + "discovery.xml"; - if (!File(discPath).exists()) - { - discPath = LOOLWSD::FileServerRoot + "/discovery.xml"; - } - - InputSource input(discPath); - DOMParser domParser; - AutoPtr doc = domParser.parse(&input); - if (doc) - { - Node* node = doc->getNodeByPath(nodePath); - if (node && node->parentNode()) - { - Element* elem = dynamic_cast(node->parentNode()); - if (elem && elem->hasAttributes()) - return elem->getAttribute("name"); - } - } + static std::unordered_map aContentTypes{ + { "svg", "image/svg+xml" }, + { "pot", "application/vnd.ms-powerpoint" }, + { "xla", "application/vnd.ms-excel" }, + + // Writer documents + { "sxw", "application/vnd.sun.xml.writer" }, + { "odt", "application/vnd.oasis.opendocument.text" }, + { "fodt", "application/vnd.oasis.opendocument.text-flat-xml" }, + + // Calc documents + { "sxc", "application/vnd.sun.xml.calc" }, + { "ods", "application/vnd.oasis.opendocument.spreadsheet" }, + { "fods", "application/vnd.oasis.opendocument.spreadsheet-flat-xml" }, + + // Impress documents + { "sxi", "application/vnd.sun.xml.impress" }, + { "odp", "application/vnd.oasis.opendocument.presentation" }, + { "fodp", "application/vnd.oasis.opendocument.presentation-flat-xml" }, + + // Draw documents + { "sxd", "application/vnd.sun.xml.draw" }, + { "odg", "application/vnd.oasis.opendocument.graphics" }, + { "fodg", "application/vnd.oasis.opendocument.graphics-flat-xml" }, + + // Chart documents + { "odc", "application/vnd.oasis.opendocument.chart" }, + + // Text master documents + { "sxg", "application/vnd.sun.xml.writer.global" }, + { "odm", "application/vnd.oasis.opendocument.text-master" }, + + // Math documents + // In fact Math documents are not supported at all. + // See: https://bugs.documentfoundation.org/show_bug.cgi?id=97006 + { "sxm", "application/vnd.sun.xml.math" }, + { "odf", "application/vnd.oasis.opendocument.formula" }, + + // Text template documents + { "stw", "application/vnd.sun.xml.writer.template" }, + { "ott", "application/vnd.oasis.opendocument.text-template" }, + + // Writer master document templates + { "otm", "application/vnd.oasis.opendocument.text-master-template" }, + + // Spreadsheet template documents + { "stc", "application/vnd.sun.xml.calc.template" }, + { "ots", "application/vnd.oasis.opendocument.spreadsheet-template" }, + + // Presentation template documents + { "sti", "application/vnd.sun.xml.impress.template" }, + { "otp", "application/vnd.oasis.opendocument.presentation-template" }, + + // Drawing template documents + { "std", "application/vnd.sun.xml.draw.template" }, + { "otg", "application/vnd.oasis.opendocument.graphics-template" }, + + // MS Word + { "doc", "application/msword" }, + { "dot", "application/msword" }, + + // MS Excel + { "xls", "application/vnd.ms-excel" }, + + // MS PowerPoint + { "ppt", "application/vnd.ms-powerpoint" }, + + // OOXML wordprocessing + { "docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }, + { "docm", "application/vnd.ms-word.document.macroEnabled.12" }, + { "dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template" }, + { "dotm", "application/vnd.ms-word.template.macroEnabled.12" }, + + // OOXML spreadsheet + { "xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.template" }, + { "xltm", "application/vnd.ms-excel.template.macroEnabled.12" }, + { "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" }, + { "xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12" }, + { "xlsm", "application/vnd.ms-excel.sheet.macroEnabled.12" }, + + // OOXML presentation + { "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation" }, + { "pptm", "application/vnd.ms-powerpoint.presentation.macroEnabled.12" }, + { "potx", "application/vnd.openxmlformats-officedocument.presentationml.template" }, + { "potm", "application/vnd.ms-powerpoint.template.macroEnabled.12" }, + + // Others + { "wpd", "application/vnd.wordperfect" }, + { "pdb", "application/x-aportisdoc" }, + { "hwp", "application/x-hwp" }, + { "wps", "application/vnd.ms-works" }, + { "wri", "application/x-mswrite" }, + { "dif", "application/x-dif-document" }, + { "slk", "text/spreadsheet" }, + { "csv", "text/csv" }, + { "dbf", "application/x-dbase" }, + { "wk1", "application/vnd.lotus-1-2-3" }, + { "cgm", "image/cgm" }, + { "dxf", "image/vnd.dxf" }, + { "emf", "image/x-emf" }, + { "wmf", "image/x-wmf" }, + { "cdr", "application/coreldraw" }, + { "vsd", "application/vnd.visio2013" }, + { "vss", "application/vnd.visio" }, + { "pub", "application/x-mspublisher" }, + { "lrf", "application/x-sony-bbeb" }, + { "gnumeric", "application/x-gnumeric" }, + { "mw", "application/macwriteii" }, + { "numbers", "application/x-iwork-numbers-sffnumbers" }, + { "oth", "application/vnd.oasis.opendocument.text-web" }, + { "p65", "application/x-pagemaker" }, + { "rtf", "text/rtf" }, + { "txt", "text/plain" }, + { "fb2", "application/x-fictionbook+xml" }, + { "cwk", "application/clarisworks" }, + { "wpg", "image/x-wpg" }, + { "pages", "application/x-iwork-pages-sffpages" }, + { "ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow" }, + { "key", "application/x-iwork-keynote-sffkey" }, + { "abw", "application/x-abiword" }, + { "fh", "image/x-freehand" }, + { "sxs", "application/vnd.sun.xml.chart" }, + { "602", "application/x-t602" }, + { "bmp", "image/bmp" }, + { "png", "image/png" }, + { "gif", "image/gif" }, + { "tiff", "image/tiff" }, + { "jpg", "image/jpg" }, + { "jpeg", "image/jpeg" }, + { "pdf", "application/pdf" }, + }; + + const std::string sExt = Poco::Path(fileName).getExtension(); + + const auto it = aContentTypes.find(sExt); + if (it != aContentTypes.end()) + return it->second; return "application/octet-stream"; } diff --git a/wsd/lint-discovery.py b/wsd/lint-discovery.py index 5f381bf2f0f3..730129b4cecf 100755 --- a/wsd/lint-discovery.py +++ b/wsd/lint-discovery.py @@ -14,50 +14,50 @@ import sys import xml.sax - # Parses an online.git discovery.xml. class DiscoveryHandler(xml.sax.handler.ContentHandler): def __init__(self): - # List of app <-> action types. - self.appActions = [] - self.inApp = False + # Dict of app -> {extension -> action} + self.appActions = {} self.app = None - self.inAction = False - self.action = None - + self.allExtensions = set() def startElement(self, name, attrs): if name == "app": - self.inApp = True for k, v in list(attrs.items()): if k == "name": self.app = v - elif name == "action": - self.inAction = True + self.appActions[self.app] = {} + elif name == "action" and self.app: + action = None + ext = None for k, v in list(attrs.items()): if k == "name": - self.action = v - + action = v + elif k == "ext": + ext = v + if action and ext: + self.appActions[self.app][ext] = action + if ext in self.allExtensions: + # Potential problem: see 2de5017e329ce09efbd8f4dc6066fdba3e2c080c + # discovery.xml with duplicating "ext" is valid, but can't be + # used directly in e.g. SharePoint, unless specific extensions + # are imported using New-SPWOPIBinding's parameters, avoiding + # the duplication. + print("warning: extension '" + ext + "' exists for '" + self.app + "', " + + "but already used earlier in discovery.xml") + self.allExtensions.add(ext) def endElement(self, name): - if name == "app": - self.inApp = False - if self.app and self.action: - self.appActions.append([self.app, self.action]) - self.app = None - self.action = None - elif name == "action": - self.inAction = False - + if name == "app" and self.app: + self.app = None # Parses core.git filter/source/config/fragments/types/*.xcu. class FilterTypeHandler(xml.sax.handler.ContentHandler): def __init__(self): self.name = None - self.inMediaType = False self.inExtensions = False self.content = [] - self.mediaType = None self.extensions = None - + self.extensionsSep = " " def startElement(self, name, attrs): if name == "node": for k, v in list(attrs.items()): @@ -65,26 +65,22 @@ def startElement(self, name, attrs): self.name = v elif name == "prop": for k, v in list(attrs.items()): - if k == "oor:name" and v == "MediaType": - self.inMediaType = True - elif k == "oor:name" and v == "Extensions": + if k == "oor:name" and v == "Extensions": self.inExtensions = True - + elif name == "value" and self.inExtensions: + for k, v in list(attrs.items()): + if k == "oor:separator": + self.extensionsSep = v def endElement(self, name): - if name == "prop" and self.inMediaType: - self.inMediaType = False - self.mediaType = "".join(self.content).strip() - self.content = [] - elif name == "prop" and self.inExtensions: + if name == "prop" and self.inExtensions: self.inExtensions = False - self.extensions = "".join(self.content).strip() + self.extensions = "".join(self.content).strip().encode("utf-8").split(self.extensionsSep) + self.extensionsSep = " " self.content = [] - def characters(self, content): - if self.inMediaType or self.inExtensions: + if self.inExtensions: self.content.append(content) - # Parses core.git filter/source/config/fragments/filters/*.xcu. class FilterFragmentHandler(xml.sax.handler.ContentHandler): def __init__(self): @@ -92,8 +88,9 @@ def __init__(self): self.typeName = None self.inFlags = False self.flags = None + self.inDocumentService = False + self.documentService = None self.content = [] - def startElement(self, name, attrs): if name == "prop": for k, v in list(attrs.items()): @@ -101,7 +98,8 @@ def startElement(self, name, attrs): self.inType = True elif k == "oor:name" and v == "Flags": self.inFlags = True - + elif k == "oor:name" and v == "DocumentService": + self.inDocumentService = True def endElement(self, name): if name == "prop" and self.inType: self.inType = False @@ -112,77 +110,86 @@ def endElement(self, name): encodedFlags = "".join(self.content).strip().encode("utf-8") self.flags = encodedFlags.split(" ") self.content = [] - + elif name == "prop" and self.inDocumentService: + self.inDocumentService = False + self.documentService = "".join(self.content).strip() + self.content = [] def characters(self, content): - if self.inType or self.inFlags: + if self.inType or self.inFlags or self.inDocumentService: self.content.append(content) - -# Builds a MIME type -> filter flag dictionary. -def getFilterFlags(filterDir): - # Build a MIME type -> type name dictionary. - filterNames = {} +# Builds a 'document service' -> {'extension' -> 'filter flags'} dictionary. +def getExtensionProperties(filterDir): + # Build a 'type name' -> 'extensions' dictionary. + typeNameExtensions = {} typeFragments = os.path.join(filterDir, "types") for typeFragment in os.listdir(typeFragments): if not typeFragment.endswith(".xcu"): continue - parser = xml.sax.make_parser() filterTypeHandler = FilterTypeHandler() parser.setContentHandler(filterTypeHandler) parser.parse(os.path.join(typeFragments, typeFragment)) - # Did we find a MIME type? - if filterTypeHandler.mediaType: - v = (filterTypeHandler.name, filterTypeHandler.extensions) - filterNames[filterTypeHandler.mediaType] = v - - # core.git doesn't declares this, but probably this is the intention. - filterNames["application/x-dif-document"] = ("calc_DIF", "dif") - filterNames["application/x-dbase"] = ("calc_dBase", "dbf") - - # Build a 'type name' -> 'filter flag list' dictionary. + # Did we find some extensions? + if filterTypeHandler.extensions: + typeNameExtensions[filterTypeHandler.name] = filterTypeHandler.extensions + # Build a 'type name' -> ('filter flag list', 'document service') dictionary. typeNameFlags = {} filterFragments = os.path.join(filterDir, "filters") for filterFragment in os.listdir(filterFragments): if not filterFragment.endswith(".xcu"): continue - parser = xml.sax.make_parser() handler = FilterFragmentHandler() parser.setContentHandler(handler) parser.parse(os.path.join(filterFragments, filterFragment)) - typeNameFlags[handler.typeName] = handler.flags - - # Now build the combined MIME type -> filter flags one. - filterFlags = {} - for i in filterNames.keys(): - typeName, extensions = filterNames[i] - if typeName in typeNameFlags.keys(): - filterFlags[i] = (typeNameFlags[typeName], extensions) - - return filterFlags - -# How it's described in discovery.xml -> how core.git knows it. -mimeTypeAliases = { - 'application/coreldraw': 'application/vnd.corel-draw', - 'application/vnd.visio2013': 'application/vnd.visio', + if "IMPORT" in handler.flags: + if handler.typeName in typeNameFlags: + if "EXPORT" in typeNameFlags[handler.typeName][0]: + continue # don't modify a filetype with maximal capabilities + typeNameFlags[handler.typeName] = (handler.flags, handler.documentService) + # Now build the combined 'document service' -> {'extension' -> 'filter flags'}. + extensionProperties = {} + for typeName in typeNameExtensions: + if typeName not in typeNameFlags: + continue + flags, documentService = typeNameFlags[typeName] + if documentService not in extensionProperties: + extensionProperties[documentService] = {} + for extension in typeNameExtensions[typeName]: + extensionProperties[documentService][extension] = flags + return extensionProperties + +# Map app names to document service names +appDocumentServices = { + 'writer': 'com.sun.star.text.TextDocument', + 'writer-global': 'com.sun.star.text.GlobalDocument', + 'writer-web': 'com.sun.star.text.WebDocument', + 'calc': 'com.sun.star.sheet.SpreadsheetDocument', + 'impress': 'com.sun.star.presentation.PresentationDocument', + 'draw': 'com.sun.star.drawing.DrawingDocument', } - -# We know that these can be edited. -mimeTypeWhiteList = { - 'application/vnd.ms-excel', - 'application/vnd.oasis.opendocument.text', - 'application/msword', +documentServicesApp = {v: k for k, v in appDocumentServices.items()} + +# We know about these extensions +extensionsSkipList = { + 'xls', # we know that it can be edited + 'pdf', # it exists for draw - its entry in core.git is missing document service + 'zip', + 'htm', + 'html', + 'xhtml', + '*', # well, obvious ;-) + '', # and this :-D } - def main(): discoveryXml = "discovery.xml" repoGuess = os.path.join(os.environ["HOME"], "git/libreoffice/master") filterDir = os.path.join(repoGuess, "filter/source/config/fragments") if len(sys.argv) >= 3: discoveryXml = sys.argv[1] - filterDir = sys.arv[2] + filterDir = sys.argv[2] # Parse discovery.xml, which describes what online.git exposes at the # moment. @@ -191,20 +198,31 @@ def main(): parser.setContentHandler(discoveryHandler) parser.parse(discoveryXml) - # Parse core.git filter definitions to build a MIME type <-> filter flag - # dictionary. - filterFlags = getFilterFlags(filterDir) - - # Now look up the filter flags in core.git for the MIME type. - for i in discoveryHandler.appActions: - mimeType = i[0] - discoveryAction = i[1] - if mimeType in mimeTypeWhiteList: + # Parse core.git filter definitions to build a + # 'document service' -> {'extension' -> 'filter flags'} dictionary. + extensionProperties = getExtensionProperties(filterDir) + + proposed = {} + + # Now look up the filter flags in core.git for the extension. + for app, actions in discoveryHandler.appActions.items(): + if app not in appDocumentServices: + continue # e.g., for "Capabilities" + documentService = appDocumentServices[app] + if documentService not in extensionProperties: + # Inconsistency found. + print("warning: actions for '" + app + "' " + + "exist, but not found in core.git") continue - if mimeType in mimeTypeAliases.keys(): - mimeType = mimeTypeAliases[mimeType] - if mimeType in filterFlags.keys(): - flags, extensions = filterFlags[mimeType] + for extension, discoveryAction in actions.items(): + if extension in extensionsSkipList: + continue + if extension not in extensionProperties[documentService]: + # Inconsistency found. + print("warning: action for '" + app + ":" + extension + "' " + + "exists, but is not found in core.git") + continue + flags = extensionProperties[documentService][extension] if "IMPORT" in flags and "EXPORT" in flags: coreAction = "edit" else: @@ -212,32 +230,65 @@ def main(): if discoveryAction != coreAction: # Inconsistency found. - print("warning: action for '" + mimeType + "' " + + print("warning: action for '" + app + ":" + extension + "' " + "is '" + discoveryAction + "', " + "but it should be '" + coreAction + "'") + # Now see if there are any new extensions in the core.git filter config + # which are missing. + for extension, flags in extensionProperties[documentService].items(): + if extension not in actions: + if "IMPORT" in flags and "EXPORT" in flags: + action = "edit" + else: + action = "view" + if app not in proposed: + proposed[app] = {} + proposed[app][extension] = action + # Now see if there are any new types in the core.git filter config which # are missing. - discoveryMimeTypes = [i[0] for i in discoveryHandler.appActions] - proposed = [] - for filterMimeType in filterFlags.keys(): - if filterMimeType not in discoveryMimeTypes: - flags, extensions = filterFlags[filterMimeType] - if "IMPORT" in flags and "EXPORT" in flags: - action = "edit" - else: - action = "view" - print("warning: mime type '" + filterMimeType + "' is known, " + - "but not advertised in discovery.xml " + - "(extension would be '" + extensions + "', and " + - "action would be '"+action+"')") - proposed.append((filterMimeType, extensions, action)) + for documentService, extensions in extensionProperties.items(): + missingName = None + if documentService not in documentServicesApp: + # Inconsistency found. + print("warning: extensions for '" + documentService + "' " + + "found in core.git, without mapping to apps in discovery.xml") + missingName = documentService + else: + app = documentServicesApp[documentService] + if app not in discoveryHandler.appActions: + # Inconsistency found. + print("warning: extensions for '" + app + "' " + + "found in core.git, all missing in discovery.xml") + missingName = app + + if missingName: + for extension, flags in extensions.items(): + if "IMPORT" in flags and "EXPORT" in flags: + action = "edit" + else: + action = "view" + if missingName not in proposed: + proposed[missingName] = {} + proposed[missingName][extension] = action # Produce a copy&paste-able XML output for the proposed changes. - for proposal in proposed: - print(' ') - print(' ') + for app, extensions in proposed.items(): + newExtensions = {} + for extension, action in extensions.items(): + if extension in extensionsSkipList: + continue + if extension in discoveryHandler.allExtensions: + continue # see 2de5017e329ce09efbd8f4dc6066fdba3e2c080c + newExtensions[extension] = action + if not newExtensions: + continue # no extensions after filtering + + print(' ') + for extension, action in newExtensions.items(): + print(' ') print(' ') if __name__ == "__main__":