Skip to content

Commit

Permalink
zippkg: start breaking out decoding
Browse files Browse the repository at this point in the history
Decoding was getting fairly complicated, it seems cleaner
to have something handle the relationships file and traversal
and the document just be notified via callback when new
files have been found.
  • Loading branch information
tbaliance committed Sep 4, 2017
1 parent 01c7b2b commit 6164089
Show file tree
Hide file tree
Showing 7 changed files with 399 additions and 311 deletions.
2 changes: 2 additions & 0 deletions common/schemas.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const (
ImageType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
CommentsType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
ThumbnailType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/thumbnail"
DrawingType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing"
ChartType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"

ExtendedPropertiesType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"
CorePropertiesType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
Expand Down
217 changes: 95 additions & 122 deletions document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ import (
"io/ioutil"
"log"
"os"
"path/filepath"

"baliance.com/gooxml/common"
"baliance.com/gooxml/zippkg"

dml "baliance.com/gooxml/schema/schemas.openxmlformats.org/drawingml"
st "baliance.com/gooxml/schema/schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
"baliance.com/gooxml/schema/schemas.openxmlformats.org/package/2006/relationships"
wml "baliance.com/gooxml/schema/schemas.openxmlformats.org/wordprocessingml"
)

Expand Down Expand Up @@ -317,128 +317,13 @@ func Read(r io.ReaderAt, size int64) (*Document, error) {

files := []*zip.File{}
files = append(files, zr.File...)
// first pass, identify the files that should always be there
for i, f := range files {
switch f.Name {
case zippkg.ContentTypesFilename:
if err := zippkg.Decode(f, doc.ContentTypes.X()); err != nil {
return nil, err
}
files[i] = nil
case zippkg.BaseRelsFilename:
if err := zippkg.Decode(f, doc.Rels.X()); err != nil {
return nil, err
}
files[i] = nil
}
}

basePaths := map[interface{}]string{}
decMap := make(map[string]interface{})
for _, r := range doc.Rels.Relationships() {
switch r.Type() {
case common.OfficeDocumentType:
doc.x = wml.NewDocument()
decMap[r.Target()] = doc.x

// look for the document relationships file as well
basePath, _ := filepath.Split(r.Target())
decMap[zippkg.RelationsPathFor(r.Target())] = doc.docRels.X()
basePaths[doc.docRels] = basePath
case common.CorePropertiesType:
decMap[r.Target()] = doc.CoreProperties.X()
case common.ExtendedPropertiesType:
decMap[r.Target()] = doc.AppProperties.X()
case common.ThumbnailType:
// read our thumbnail
for i, f := range files {
if f == nil {
continue
}
if f.Name == r.Target() {
rc, err := f.Open()
if err != nil {
return nil, fmt.Errorf("error reading thumbnail: %s", err)
}
doc.Thumbnail, _, err = image.Decode(rc)
rc.Close()
if err != nil {
return nil, fmt.Errorf("error decoding thumbnail: %s", err)
}
files[i] = nil
}
}
default:
log.Printf("unsupported type: %s", r.Type())
}
}

if err := zippkg.DecodeFromMap(files, decMap); err != nil {
return nil, err
}

for _, r := range doc.docRels.Relationships() {
switch r.Type() {
case common.SettingsType:
decMap[basePaths[doc.docRels]+r.Target()] = doc.Settings.X()
case common.NumberingType:
doc.Numbering = NewNumbering()
decMap[basePaths[doc.docRels]+r.Target()] = doc.Numbering.X()
case common.StylesType:
doc.Styles.Clear()
decMap[basePaths[doc.docRels]+r.Target()] = doc.Styles.X()
case common.HeaderType:
hdr := wml.NewHdr()
doc.headers = append(doc.headers, hdr)
decMap[basePaths[doc.docRels]+r.Target()] = hdr
case common.FooterType:
ftr := wml.NewFtr()
doc.footers = append(doc.footers, ftr)
decMap[basePaths[doc.docRels]+r.Target()] = ftr
case common.ThemeType:
thm := dml.NewTheme()
doc.themes = append(doc.themes, thm)
decMap[basePaths[doc.docRels]+r.Target()] = thm
case common.WebSettingsType:
doc.webSettings = wml.NewWebSettings()
decMap[basePaths[doc.docRels]+r.Target()] = doc.webSettings
case common.FontTableType:
doc.fontTable = wml.NewFonts()
decMap[basePaths[doc.docRels]+r.Target()] = doc.fontTable
case common.EndNotesType:
doc.endNotes = wml.NewEndnotes()
decMap[basePaths[doc.docRels]+r.Target()] = doc.endNotes
case common.FootNotesType:
doc.footNotes = wml.NewFootnotes()
decMap[basePaths[doc.docRels]+r.Target()] = doc.footNotes
case common.ImageType:
imgPath := basePaths[doc.docRels] + r.Target()
for i, f := range files {
if f == nil {
continue
}
if f.Name == imgPath {
path, err := zippkg.ExtractToDiskTmp(f, doc.TmpPath)
if err != nil {
return nil, err
}
img, err := ImageFromFile(path)
if err != nil {
return nil, err
}
_ = img
ref := &iref{path: img.Path}
doc.images = append(doc.images, ref)
files[i] = nil
}
}
default:
fmt.Println("unsupported document rel", r)
}
}
if err := zippkg.DecodeFromMap(files, decMap); err != nil {
return nil, err
}
decMap := zippkg.DecodeMap{}
decMap.SetOnNewRelationshipFunc(doc.onNewRelationship)
// we should discover all contents by starting with these two files
decMap.AddTarget(zippkg.ContentTypesFilename, doc.ContentTypes.X())
decMap.AddTarget(zippkg.BaseRelsFilename, doc.Rels.X())
decMap.Decode(files)

for _, f := range files {
if f == nil {
Expand Down Expand Up @@ -573,3 +458,91 @@ func (d *Document) FormFields() []FormField {
}
return ret
}

func (doc *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ string, files []*zip.File, rel *relationships.Relationship) error {
switch typ {
case common.OfficeDocumentType:
doc.x = wml.NewDocument()
decMap.AddTarget(target, doc.x)
// look for the document relationships file as well
decMap.AddTarget(zippkg.RelationsPathFor(target), doc.docRels.X())
case common.CorePropertiesType:
decMap.AddTarget(target, doc.CoreProperties.X())
case common.ExtendedPropertiesType:
decMap.AddTarget(target, doc.AppProperties.X())
case common.ThumbnailType:
// read our thumbnail
for i, f := range files {
if f == nil {
continue
}
if f.Name == target {
rc, err := f.Open()
if err != nil {
return fmt.Errorf("error reading thumbnail: %s", err)
}
doc.Thumbnail, _, err = image.Decode(rc)
rc.Close()
if err != nil {
return fmt.Errorf("error decoding thumbnail: %s", err)
}
files[i] = nil
}
}
case common.SettingsType:
decMap.AddTarget(target, doc.Settings.X())
case common.NumberingType:
doc.Numbering = NewNumbering()
decMap.AddTarget(target, doc.Numbering.X())
case common.StylesType:
doc.Styles.Clear()
decMap.AddTarget(target, doc.Styles.X())
case common.HeaderType:
hdr := wml.NewHdr()
doc.headers = append(doc.headers, hdr)
decMap.AddTarget(target, hdr)
case common.FooterType:
ftr := wml.NewFtr()
doc.footers = append(doc.footers, ftr)
decMap.AddTarget(target, ftr)
case common.ThemeType:
thm := dml.NewTheme()
doc.themes = append(doc.themes, thm)
decMap.AddTarget(target, thm)
case common.WebSettingsType:
doc.webSettings = wml.NewWebSettings()
decMap.AddTarget(target, doc.webSettings)
case common.FontTableType:
doc.fontTable = wml.NewFonts()
decMap.AddTarget(target, doc.fontTable)
case common.EndNotesType:
doc.endNotes = wml.NewEndnotes()
decMap.AddTarget(target, doc.endNotes)
case common.FootNotesType:
doc.footNotes = wml.NewFootnotes()
decMap.AddTarget(target, doc.footNotes)
case common.ImageType:
for i, f := range files {
if f == nil {
continue
}
if f.Name == target {
path, err := zippkg.ExtractToDiskTmp(f, doc.TmpPath)
if err != nil {
return err
}
img, err := ImageFromFile(path)
if err != nil {
return err
}
_ = img
ref := &iref{path: img.Path}
doc.images = append(doc.images, ref)
files[i] = nil
}
}
default:
log.Printf("unsupported relationship type: %s tgt: %s", typ, target)
}
return nil
}
41 changes: 41 additions & 0 deletions spreadsheet/new.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2017 Baliance. All rights reserved.
//
// Use of this source code is governed by the terms of the Affero GNU General
// Public License version 3.0 as published by the Free Software Foundation and
// appearing in the file LICENSE included in the packaging of this file. A
// commercial license can be purchased by contacting [email protected].

package spreadsheet

import (
"baliance.com/gooxml/common"
sml "baliance.com/gooxml/schema/schemas.openxmlformats.org/spreadsheetml"
"baliance.com/gooxml/zippkg"
)

// New constructs a new workbook.
func New() *Workbook {
wb := &Workbook{}
wb.x = sml.NewWorkbook()

wb.AppProperties = common.NewAppProperties()
wb.CoreProperties = common.NewCoreProperties()
wb.StyleSheet = NewStyleSheet()

wb.Rels = common.NewRelationships()
wb.wbRels = common.NewRelationships()
wb.Rels.AddRelationship(zippkg.AppPropsFilename, common.ExtendedPropertiesType)
wb.Rels.AddRelationship(zippkg.CorePropsFilename, common.CorePropertiesType)
wb.Rels.AddRelationship("xl/workbook.xml", common.OfficeDocumentType)
wb.wbRels.AddRelationship("styles.xml", common.StylesType)

wb.ContentTypes = common.NewContentTypes()
wb.ContentTypes.AddOverride("/xl/workbook.xml", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml")
wb.ContentTypes.AddOverride("/xl/styles.xml", "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml")

wb.SharedStrings = NewSharedStrings()
wb.ContentTypes.AddOverride("/xl/sharedStrings.xml", common.SharedStringsContentType)
wb.wbRels.AddRelationship("sharedStrings.xml", common.SharedStingsType)

return wb
}
68 changes: 68 additions & 0 deletions spreadsheet/read.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2017 Baliance. All rights reserved.
//
// Use of this source code is governed by the terms of the Affero GNU General
// Public License version 3.0 as published by the Free Software Foundation and
// appearing in the file LICENSE included in the packaging of this file. A
// commercial license can be purchased by contacting [email protected].

package spreadsheet

import (
"archive/zip"
"fmt"
"io"
"io/ioutil"
"os"

"baliance.com/gooxml/zippkg"
)

// Read reads a workbook from an io.Reader(.xlsx).
func Read(r io.ReaderAt, size int64) (*Workbook, error) {
wb := New()
td, err := ioutil.TempDir("", "gooxml-xlsx")
if err != nil {
return nil, err
}
wb.TmpPath = td

zr, err := zip.NewReader(r, size)
if err != nil {
return nil, fmt.Errorf("parsing zip: %s", err)
}

files := []*zip.File{}
files = append(files, zr.File...)
decMap := zippkg.DecodeMap{}
decMap.SetOnNewRelationshipFunc(wb.onNewRelationship)
// we should discover all contents by starting with these two files
decMap.AddTarget(zippkg.ContentTypesFilename, wb.ContentTypes.X())
decMap.AddTarget(zippkg.BaseRelsFilename, wb.Rels.X())
decMap.Decode(files)

// etra files are things we don't handle yet, or files that happened to have
// been in the zip before. We just round-trip them.
for _, f := range files {
if f == nil {
continue
}
if err := wb.AddExtraFileFromZip(f); err != nil {
return nil, err
}
}
return wb, nil
}

// Open opens and reads a workbook from a file (.xlsx).
func Open(filename string) (*Workbook, error) {
f, err := os.Open(filename)
if err != nil {
return nil, fmt.Errorf("error opening %s: %s", filename, err)
}
defer f.Close()
fi, err := os.Stat(filename)
if err != nil {
return nil, fmt.Errorf("error opening %s: %s", filename, err)
}
return Read(f, fi.Size())
}
Loading

0 comments on commit 6164089

Please sign in to comment.