-
Notifications
You must be signed in to change notification settings - Fork 6
Example Queries
annekroon edited this page Apr 2, 2019
·
6 revisions
- listofdoctypes is a list of doctypes
- fromdate and todate are strings of the form '2011-01-31'
query = {
"query": {
"bool": {
"filter": [ {'bool': {'should': [{ "match": { "doctype": d}} for d in listofdoctypes]}},
{ "range": { "publication_date": { "gte": fromdate, "lt":todate }}}
]
}
}
}
from inca import Inca
myinca = Inca()
fromdate = '2015-01-01'
todate = '2018-01-02'
listofdoctypes = ['nu','nrc (www)','volkskrant (www)']
q = {"query": {
"bool": {
"must": [
{"query_string" : {
"fields" : ["title", "title_rss"],
"query" : '"Europese Commissie" OR "Europese Centrale Bank"',
"use_dis_max" : 'true'}}],
"filter": [ {'bool': {'should': [{ "match": { "doctype": d}} for d in listofdoctypes]}},
{ "range": { "publication_date": { "gte": fromdate, "lt":todate }}}]}
}}
g = myinca.database.document_generator(q)
first = next(g)
from inca import Inca
myinca = Inca()
fromdate = '2017-09-13'
todate = '2018-01-15'
listofdoctypes = ['nu','nrc (www)','volkskrant (www)', 'telegraaf (www)', 'ad (www)', 'trouw (www)', 'nos (www)', 'geenstijl (www)', 'parool (www)', 'metro (www)']
exportfields = ['title','title_rss','doctype', 'publication_date', 'byline', 'teaser_rss', 'url', 'text']
# simpele variant
querystring = '"Europese Raad" OR "Europese Commissie" OR "Europese Centrale Bank" OR "ECB" OR "Europese Parlement" OR "Europaparlement" OR "EU" OR "Europese Unie" OR "Europese Commissie" OR "Eurozone" OR "Eurogroep"'
# betere variant
querystring2 = '("Europese Raad" OR "Europese Commissie" OR "Europese Centrale Bank" OR "ECB" OR "Europese Parlement" OR "Europaparlement" OR "EU" OR "Europese Unie" OR "Europese Commissie" OR "Eurozone" OR "Eurogroep") OR (Brussel AND (text:"EU" or text:"Europese Unie" or text:"Europese Commissie" or text:"Europees Parlement" or text:"Europese Parlement" or text:"Europaparlement" or text:"Europese Centrale Bank"))'
q = {"query": {
"bool": {
"must": [
{"query_string" : {
"fields" : ["title", "title_rss"],
"query" : querystring2}}],
"filter": [ {'bool': {'should': [{ "match": { "doctype.keyword": d}} for d in listofdoctypes]}},
{ "range": { "publication_date": { "gte": fromdate, "lt":todate }}}]}
}}
# g = myinca.database.document_generator(q)
# first = next(g)
myinca.importers_exporters.export_csv(query = q, fields = exportfields)