Skip to content

Commit

Permalink
add KeepElements & DropDrawingOf
Browse files Browse the repository at this point in the history
  • Loading branch information
fumiama committed Mar 9, 2023
1 parent 54e7323 commit b68d555
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 13 deletions.
4 changes: 2 additions & 2 deletions apipara.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ func (f *Docx) AddParagraph() *Paragraph {

// AddParagraph adds a new paragraph
func (c *WTableCell) AddParagraph() *Paragraph {
c.Paragraphs = append(c.Paragraphs, Paragraph{
c.Paragraphs = append(c.Paragraphs, &Paragraph{
Children: make([]interface{}, 0, 64),
file: c.file,
})

return &c.Paragraphs[len(c.Paragraphs)-1]
return c.Paragraphs[len(c.Paragraphs)-1]
}

// Justification allows to set para's horizonal alignment
Expand Down
12 changes: 9 additions & 3 deletions cmd/main/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
func main() {
fileLocation := flag.String("f", "new-file.docx", "file location")
analyzeOnly := flag.Bool("a", false, "analyze file only")
clean := flag.Bool("c", false, "clean mode (keep text and picture only)")
unm := flag.Bool("u", false, "lease unmarshalled file")
flag.Parse()
var w *docx.Docx
Expand Down Expand Up @@ -173,6 +174,9 @@ func main() {
if err != nil {
panic(err)
}
if *clean {
doc.Document.Body.DropDrawingOf("NilPicture")
}
if *unm {
i := strings.LastIndex(*fileLocation, "/")
name := (*fileLocation)[:i+1] + "unmarshal_" + (*fileLocation)[i+1:]
Expand All @@ -191,9 +195,11 @@ func main() {
}
fmt.Println("Plain text:")
for _, it := range doc.Document.Body.Items {
switch it.(type) {
case *docx.Paragraph, *docx.Table: // printable
fmt.Println(it)
switch o := it.(type) {
case *docx.Paragraph: // printable
fmt.Println(o.String())
case *docx.Table: // printable
fmt.Println(o.String())
}
}
fmt.Println("End of main")
Expand Down
64 changes: 60 additions & 4 deletions structdoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ package docx
import (
"encoding/xml"
"io"
"reflect"
"regexp"
"strings"
"unsafe"
)

//nolint:revive,stylecheck
Expand Down Expand Up @@ -74,19 +77,19 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
switch tt.Name.Local {
case "p":
var value Paragraph
value.file = b.file
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
value.file = b.file
b.Items = append(b.Items, &value)
case "tbl":
var value Table
value.file = b.file
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
value.file = b.file
b.Items = append(b.Items, &value)
default:
err = d.Skip() // skip unsupported tags
Expand All @@ -99,6 +102,51 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
return nil
}

// KeepElements keep named elems amd removes others
//
// names: *docx.Paragraph *docx.Table
func (b *Body) KeepElements(name ...string) {
items := make([]interface{}, 0, len(b.Items))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range b.Items {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
b.Items = items
}

// DropDrawingOf drops all matched drawing in body
// name: Canvas, Shape, Group, ShapeAndCanvas, ShapeAndCanvasAndGroup, NilPicture
func (b *Body) DropDrawingOf(name string) {
for _, item := range b.Items {
switch o := item.(type) {
case *Paragraph:
f := reflect.ValueOf(o).MethodByName("Drop" + name)
if *(*uintptr)(unsafe.Pointer(&f)) == 0 {
continue
}
_ = f.Call(nil)
case *Table:
for _, tr := range o.TableRows {
for _, tc := range tr.TableCells {
for _, p := range tc.Paragraphs {
f := reflect.ValueOf(p).MethodByName("Drop" + name)
if *(*uintptr)(unsafe.Pointer(&f)) == 0 {
continue
}
_ = f.Call(nil)
}
}
}
}
}
}

// Document <w:document>
type Document struct {
XMLName xml.Name `xml:"w:document"`
Expand Down Expand Up @@ -150,6 +198,13 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
// ParagraphSplitRule check whether the paragraph is a separator or not
type ParagraphSplitRule func(*Paragraph) bool

// SplitDocxByPlainTextRegex matches p.String()
func SplitDocxByPlainTextRegex(re *regexp.Regexp) ParagraphSplitRule {
return func(p *Paragraph) bool {
return re.MatchString(p.String())
}
}

// SplitByParagraph splits a doc to many docs by using a matched paragraph
// as the separator.
//
Expand Down Expand Up @@ -263,10 +318,11 @@ func (t *Table) copymedia(to *Docx) (nt Table) {
ntr.file = to
for _, tc := range tr.TableCells {
ntc := *tc
ntc.Paragraphs = make([]Paragraph, 0, len(tc.Paragraphs))
ntc.Paragraphs = make([]*Paragraph, 0, len(tc.Paragraphs))
ntc.file = to
for _, p := range tc.Paragraphs {
ntc.Paragraphs = append(ntc.Paragraphs, p.copymedia(to))
np := p.copymedia(to)
ntc.Paragraphs = append(ntc.Paragraphs, &np)
}
ntr.TableCells = append(ntr.TableCells, &ntc)
}
Expand Down
181 changes: 181 additions & 0 deletions structpara.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package docx
import (
"encoding/xml"
"io"
"reflect"
"strconv"
"strings"
)
Expand Down Expand Up @@ -296,3 +297,183 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
p.Children = children
return nil
}

// KeepElements keep named elems amd removes others
//
// names: *docx.Hyperlink *docx.Run *docx.RunProperties
func (p *Paragraph) KeepElements(name ...string) {
items := make([]interface{}, 0, len(p.Children))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range p.Children {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
p.Children = items
}

// DropCanvas drops all canvases in paragraph
func (p *Paragraph) DropCanvas() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Canvas != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Canvas != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}

// DropShape drops all shapes in paragraph
func (p *Paragraph) DropShape() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Shape != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Shape != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}

// DropGroup drops all groups in paragraph
func (p *Paragraph) DropGroup() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Group != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Group != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}

// DropShapeAndCanvas drops all shapes and canvases in paragraph
func (p *Paragraph) DropShapeAndCanvas() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Shape != nil || d.Inline.Graphic.GraphicData.Canvas != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Shape != nil || d.Anchor.Graphic.GraphicData.Canvas != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}

// DropShapeAndCanvasAndGroup drops all shapes, canvases and groups in paragraph
func (p *Paragraph) DropShapeAndCanvasAndGroup() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Shape != nil || d.Inline.Graphic.GraphicData.Canvas != nil || d.Inline.Graphic.GraphicData.Group != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Shape != nil || d.Anchor.Graphic.GraphicData.Canvas != nil || d.Anchor.Graphic.GraphicData.Group != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}

// DropNilPicture drops all drawings with nil picture in paragraph
func (p *Paragraph) DropNilPicture() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline == nil && d.Anchor == nil {
continue
}
if (d.Inline != nil && d.Inline.Graphic == nil) || (d.Anchor != nil && d.Anchor.Graphic == nil) {
continue
}
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData == nil {
continue
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData == nil {
continue
}
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Pic == nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Pic == nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}
19 changes: 19 additions & 0 deletions structrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package docx
import (
"encoding/xml"
"io"
"reflect"
"strconv"
"strings"
)
Expand Down Expand Up @@ -175,6 +176,24 @@ func (r *Run) parse(d *xml.Decoder, tt xml.StartElement) (child interface{}, err
return
}

// KeepElements keep named elems amd removes others
//
// names: *docx.Text *docx.Drawing *docx.Tab *docx.BarterRabbet
func (r *Run) KeepElements(name ...string) {
items := make([]interface{}, 0, len(r.Children))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range r.Children {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
r.Children = items
}

// RunProperties encapsulates visual properties of a run
type RunProperties struct {
XMLName xml.Name `xml:"w:rPr,omitempty"`
Expand Down
2 changes: 1 addition & 1 deletion structshape.go
Original file line number Diff line number Diff line change
Expand Up @@ -600,11 +600,11 @@ func (c *WTextBoxContent) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e
switch tt.Name.Local {
case "p":
var value Paragraph
value.file = c.file
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
value.file = c.file
c.Paragraphs = append(c.Paragraphs, value)
default:
err = d.Skip() // skip unsupported tags
Expand Down
Loading

0 comments on commit b68d555

Please sign in to comment.