forked from watson-developer-cloud/node-sdk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocument_conversion.v1.js
160 lines (153 loc) · 4.4 KB
/
document_conversion.v1.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
'use strict';
var DocumentConversionV1 = require('watson-developer-cloud/document-conversion/v1');
var fs = require('fs');
var document_conversion = new DocumentConversionV1({
username: 'INSERT YOUR USERNAME FOR THE SERVICE HERE',
password: 'INSERT YOUR PASSWORD FOR THE SERVICE HERE',
version_date: '2015-12-01'
});
// convert a single document
document_conversion.convert({
// (JSON) ANSWER_UNITS, NORMALIZED_HTML, or NORMALIZED_TEXT
file: fs.createReadStream(__dirname + '/resources/example.html'),
conversion_target: document_conversion.conversion_target.ANSWER_UNITS,
config: {
// split the html file by "h2", "h3" and "h4" tags
html_to_answer_units: {
selectors: [ 'h2','h3', 'h4']
}
}
}, function (err, response) {
console.log("----------\n");
console.log("convert a single document\n");
console.log("----------\n");
if (err) {
console.error(err);
} else {
console.log(JSON.stringify(response, null, 2));
}
});
// dry run of indexing a single document
document_conversion.index({
file: fs.createReadStream(__dirname + '/resources/sample-docx.docx'),
config: {
retrieve_and_rank: {
dry_run: true
}
}
}, function (err, response) {
console.log("----------\n");
console.log("dry run of indexing a single document\n");
console.log("----------\n");
if (err) {
console.error(err);
} else {
console.log(JSON.stringify(response, null, 2));
}
});
// dry run of indexing only metadata
document_conversion.index({
metadata: {
metadata: [
{ name: 'id', value: '1' },
{ name: 'SomeMetadataName', value: 'SomeMetadataValue' }
]
},
config: {
retrieve_and_rank: {
dry_run: true
}
}
}, function (err, response) {
console.log("----------\n");
console.log("dry run of indexing only metadata\n");
console.log("----------\n");
if (err) {
console.error(err);
} else {
console.log(JSON.stringify(response, null, 2));
}
});
// dry run of indexing a single document with metadata and additional configuration for convert_document and field mapping
document_conversion.index({
file: fs.createReadStream(__dirname + '/resources/example.html'),
metadata: {
metadata: [
{ name: 'id', value: '2' },
{ name: 'Author', value: 'IBM' },
{ name: 'Date Created', value: '2016-03-21' },
{ name: 'Category', value: 'Example' }
]
},
config: {
convert_document: {
normalized_html: {
// Exclude all anchor tags "<a>"
exclude_tags_completely: [ 'a' ]
}
},
retrieve_and_rank: {
dry_run: true,
fields: {
mappings: [
{ from: 'Author', to: 'Created By' },
{ from: 'Date Created', to: 'Created On' }
],
include: [
'Created By',
'Created On'
],
exclude: [
'Category'
]
}
}
}
}, function (err, response) {
console.log("----------\n");
console.log("dry run of indexing a single document with metadata and additional configuration for convert_document and field mappings\n");
console.log("----------\n");
if (err) {
console.error(err);
} else {
console.log(JSON.stringify(response, null, 2));
}
});
// indexing a single document with metadata and additional configuration for convert_document and field mappings
document_conversion.index({
file: fs.createReadStream(__dirname + '/resources/example.html'),
metadata: {
metadata: [
{ name: 'id', value: '3' },
{ name: 'SomeMetadataName', value: 'SomeMetadataValue' }
]
},
config: {
convert_document: {
normalized_html: {
// Exclude all anchor tags "<a>"
exclude_tags_completely: [ 'a' ]
}
},
retrieve_and_rank: {
dry_run: false,
service_instance_id: 'INSERT YOUR RETRIEVE AND RANK SERVICE INSTANCE ID HERE',
cluster_id: 'INSERT YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID HERE',
search_collection: 'INSERT YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME HERE',
fields: {
mappings: [
{ from: 'SomeMetadataName', to: 'Created By' }
]
}
}
}
}, function (err, response) {
console.log("----------\n");
console.log("indexing a single document with metadata and additional configuration for convert_document and field mappings\n");
console.log("----------\n");
if (err) {
console.error(err);
} else {
console.log(JSON.stringify(response, null, 2));
}
});